#!/usr/bin/env python
# -*- coding: utf-8 -*-

# base64_extraction.sh ver 0.2
# 引数のファイルに含まれるbase64部分を抽出・デコードしてファイルに保存する
# 第一引数… 入力フィル名
# 第二引数… 指定した数値より短い文字列はBase64の変換対象にしない（default 16）
# 2016.08.06 kanata

import os
import sys
import base64
import commands
import traceback

def write_decorded_base64(decodeed_base64,out_file_base,file_offset):
    # write file
    try :
        file_content=base64.b64decode(decodeed_base64)
    except :
        #print "Base64 error"
        return(1)
    file_name=out_file_base[0] + "_" +str(file_offset)
    out_fd = open(file_name, 'w+')
    out_fd.write(file_content)
    out_fd.close()
    analysis_file_type(file_name)

def analysis_file_type(file_name):
    # detect filename extension & rename
    if os.name == "posix":
        file_extention=commands.getoutput("file -i "+file_name+"|awk -F'/' '{print $NF}'|awk '{print $1}'|awk -F'-' '{print $NF}'|tr -d ';'")
    else:
        file_extention="dat"
    #print "Debug:"+file_extention
    new_file_name = file_name+"."+file_extention
    if os.path.exists(new_file_name):
        os.remove(new_file_name)
    os.rename(file_name, new_file_name)
    print "\t"+new_file_name
    return(0)

if __name__ == '__main__':
    allow_decord_length=16  # The strings length shoter than this number. The strings not base64 target.
    file_offset=0
    file_content=""
    state_base64=False   # True:in False:out
    i=0                  # counter
    base64_length=0      # for chek allow_decord_length

    if os.name != "posix":
        print "[WARNING] I wonder this is not work encode for base64 by my windows7 enviroment. Please compare encoded file content by other ways."

    if len(sys.argv) >= 2:
        in_file=sys.argv[1]
        out_file_base=os.path.splitext(os.path.basename(in_file))
        file_size=os.path.getsize(in_file)
    else:
        print "ex)"+sys.argv[0]+" [filename] [minimum base64 length]"
        sys.exit(1)

    if len(sys.argv) == 3 and sys.argv[2].isdigit():
        allow_decord_length=int(sys.argv[2])

    try :
        in_fd = open(in_file,'rb')
    except :
        print "file error"

    while 1 :
        b = in_fd.read(1)

        if b in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" : # base64対象文字列
            state_base64=True
            file_content = file_content + b
            base64_length=base64_length + 1
        elif b in "=":
            file_content = file_content + b
        elif b in "\n\r":
            pass
        elif b in "\\":
            b = in_fd.read(1)
            if b in "n":
                pass
                # TODO:2count \n are not pass
            else:
                if state_base64 and base64_length > allow_decord_length:
                    #print "Debug:"+file_content+" "+str(i)
                    # ファイル出力処理,ファイル命名処理 #
                    write_decorded_base64(file_content,out_file_base,i)
                state_base64=False
                file_content=""
                base64_length=0
        else:
            if state_base64 and base64_length > allow_decord_length:
                #print "Debug:"+file_content+" "+str(i)
                # ファイル出力処理,ファイル命名処理 #
                write_decorded_base64(file_content,out_file_base,i)
            state_base64=False
            file_content=""
            base64_length=0
        sys.stdout.write("\rchecking..."+str(i)+"/"+str(file_size))
        sys.stdout.flush()
        i = i + 1

        if i > file_size:
            break
    in_fd.close()
    sys.stdout.write("\r")
    sys.stdout.flush()

