base64_extraction.py
1 |
#!/usr/bin/env python
|
---|---|
2 |
# -*- coding: utf-8 -*-
|
3 |
|
4 |
# base64_extraction.sh ver 0.2
|
5 |
# 引数のファイルに含まれるbase64部分を抽出・デコードしてファイルに保存する
|
6 |
# 第一引数… 入力フィル名
|
7 |
# 第二引数… 指定した数値より短い文字列はBase64の変換対象にしない(default 16)
|
8 |
# 2016.08.06 kanata
|
9 |
|
10 |
import os |
11 |
import sys |
12 |
import base64 |
13 |
import commands |
14 |
import traceback |
15 |
|
16 |
def write_decorded_base64(decodeed_base64,out_file_base,file_offset): |
17 |
# write file
|
18 |
try :
|
19 |
file_content=base64.b64decode(decodeed_base64) |
20 |
except :
|
21 |
#print "Base64 error"
|
22 |
return(1) |
23 |
file_name=out_file_base[0] + "_" +str(file_offset) |
24 |
out_fd = open(file_name, 'w+') |
25 |
out_fd.write(file_content) |
26 |
out_fd.close() |
27 |
analysis_file_type(file_name) |
28 |
|
29 |
def analysis_file_type(file_name): |
30 |
# detect filename extension & rename
|
31 |
if os.name == "posix": |
32 |
file_extention=commands.getoutput("file -i "+file_name+"|awk -F'/' '{print $NF}'|awk '{print $1}'|awk -F'-' '{print $NF}'|tr -d ';'") |
33 |
else:
|
34 |
file_extention="dat"
|
35 |
#print "Debug:"+file_extention
|
36 |
new_file_name = file_name+"."+file_extention
|
37 |
if os.path.exists(new_file_name):
|
38 |
os.remove(new_file_name) |
39 |
os.rename(file_name, new_file_name) |
40 |
print "\t"+new_file_name |
41 |
return(0) |
42 |
|
43 |
if __name__ == '__main__': |
44 |
allow_decord_length=16 # The strings length shoter than this number. The strings not base64 target. |
45 |
file_offset=0
|
46 |
file_content=""
|
47 |
state_base64=False # True:in False:out |
48 |
i=0 # counter |
49 |
base64_length=0 # for chek allow_decord_length |
50 |
|
51 |
if os.name != "posix": |
52 |
print "[WARNING] I wonder this is not work encode for base64 by my windows7 enviroment. Please compare encoded file content by other ways." |
53 |
|
54 |
if len(sys.argv) >= 2: |
55 |
in_file=sys.argv[1]
|
56 |
out_file_base=os.path.splitext(os.path.basename(in_file)) |
57 |
file_size=os.path.getsize(in_file) |
58 |
else:
|
59 |
print "ex)"+sys.argv[0]+" [filename] [minimum base64 length]" |
60 |
sys.exit(1)
|
61 |
|
62 |
if len(sys.argv) == 3 and sys.argv[2].isdigit(): |
63 |
allow_decord_length=int(sys.argv[2]) |
64 |
|
65 |
try :
|
66 |
in_fd = open(in_file,'rb') |
67 |
except :
|
68 |
print "file error" |
69 |
|
70 |
while 1 : |
71 |
b = in_fd.read(1)
|
72 |
|
73 |
if b in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" : # base64対象文字列 |
74 |
state_base64=True
|
75 |
file_content = file_content + b |
76 |
base64_length=base64_length + 1
|
77 |
elif b in "=": |
78 |
file_content = file_content + b |
79 |
elif b in "\n\r": |
80 |
pass
|
81 |
elif b in "\\": |
82 |
b = in_fd.read(1)
|
83 |
if b in "n": |
84 |
pass
|
85 |
# TODO:2count \n are not pass
|
86 |
else:
|
87 |
if state_base64 and base64_length > allow_decord_length: |
88 |
#print "Debug:"+file_content+" "+str(i)
|
89 |
# ファイル出力処理,ファイル命名処理 #
|
90 |
write_decorded_base64(file_content,out_file_base,i) |
91 |
state_base64=False
|
92 |
file_content=""
|
93 |
base64_length=0
|
94 |
else:
|
95 |
if state_base64 and base64_length > allow_decord_length: |
96 |
#print "Debug:"+file_content+" "+str(i)
|
97 |
# ファイル出力処理,ファイル命名処理 #
|
98 |
write_decorded_base64(file_content,out_file_base,i) |
99 |
state_base64=False
|
100 |
file_content=""
|
101 |
base64_length=0
|
102 |
sys.stdout.write("\rchecking..."+str(i)+"/"+str(file_size)) |
103 |
sys.stdout.flush() |
104 |
i = i + 1
|
105 |
|
106 |
if i > file_size:
|
107 |
break
|
108 |
in_fd.close() |
109 |
sys.stdout.write("\r")
|
110 |
sys.stdout.flush() |
111 |
|