原始backup数据的提取
- 一直觉得微信聊天记录备份是一个麻烦事情,最近发现微信占用的空间越来越大,又舍不得删除,想着能够实现自动化备份微信聊天记录等信息。在互联网上看到有可以备份微信聊天记录的工具,但是是收费的。我觉得既然别人能备份,我们自己也应该可以试着做到。
- google了下,发现可以通过iTunes来备份手机数据。备份后,数据会保存在~/Library/Application Support/MobileSync/Backup目录下面。可以通过shift+command+g 然后输入地址直接跳转到该目录下。该目录下有备份的文件夹,随便选一个,进入之后,里面有:Info.plist、Manifest.mbdb、Manifest.plist、Status.plist等文件。
- 通过对这些数据进行分析,觉得Manifest.mbdb应该是一个数据库一样的文件,存储着各种信息,所以查了下怎么解析这个文件。
- 通过对Manifest.mbdb解析,看到了很多的文件以及对应的目录。这些应该就是备份的文件列表。然后试着对这些列表进行还原。通过Manifest.mbdb可以得到当前目录里面的这些文件对应关系。然后根据这些文件的信息,进行还原。
提取数据的Python脚本
#!/usr/bin/env python
#~/Library/Application Support/MobileSync/Backup
import sys
import os
import hashlib
import shutil
mbdx = {}
dict = {}
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value<<8) + ord(data[offset])
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
return '', offset+2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset+length]
return value, (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename).read()
if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
id = hashlib.sha1(fullpath)
mbdx[fileinfo['start_offset']] = id.hexdigest()
return mbdb
def modestr(val):
def mode(val):
if (val & 0x4): r = 'r'
else: r = '-'
if (val & 0x2): w = 'w'
else: w = '-'
if (val & 0x1): x = 'x'
else: x = '-'
return r+w+x
return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
# if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
dict[f['fileID']] = f['filename']
if not verbose: return "%s => %s (%s)" % (f['fileID'], f['filename'], f['domain'])
if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
if len(sys.argv)!=3:
print "\nUsage: Python iOS-Corrupted-Backup-Reader.py [Full path to backup directory] [Full path to output directory]\n"
print "Example: Python iOS-Corrupted-Backup-Reader.py c:\backup c:\output"
sys.exit(0)
backuppath=sys.argv[1]
outputpath=sys.argv[2]
if os.path.exists(backuppath)==0:
print "Backup directory not found."
sys.exit(0)
if os.path.exists(outputpath)==0:
print "Output directory not found. Create the directory before running the script."
sys.exit(0)
if backuppath[:-1]!='/':
backuppath=backuppath+'/'
if outputpath[:-1]!='/':
outputpath=outputpath+'/'
mbdb = process_mbdb_file(backuppath+"Manifest.mbdb")
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo)
folder=os.listdir(backuppath)
for fname in folder:
ffullname=backuppath+fname
if fname in dict:
tmp=dict[fname]
odir,oname=tmp[:tmp.rfind('/')],tmp[tmp.rfind('/')+1:]
if os.path.exists(outputpath+odir)==0:
os.makedirs(outputpath+odir)
print ">>makedirs "+outputpath+odir
try:
shutil.copy(ffullname,outputpath+odir+'/'+oname)
print ">>copy from "+ffullname+" to "+outputpath+odir+'/'+oname
except:
pass
folder=os.listdir(backuppath)
for fname in folder:
ffullname=backuppath+fname
print "handler "+ffullname
f=open(ffullname,'rb')
ftype=f.read(15)
f.close()
try:
if os.path.exists(outputpath+'other-data')==0:
os.makedirs(outputpath+'other-data')
if ftype.find('bplist')!=-1 or ftype.find('<?xml')!=-1:
mtype='.plist'
if ftype.find('SQLite')!=-1:
mtype='.sqlitedb'
if ftype.find('JFIF')!=-1 or ftype.find('Exif')!=-1:
mtype='.jpeg'
if ftype.find('PNG')!=-1:
mtype='.png'
if ftype.find('cook')!=-1:
mtype='.binarycookies'
if ftype.find('ftypqt')!=-1:
mtype='.mov'
if ftype.find('ID3')!=-1:
mtype='.mp3'
file_path=outputpath+'other-data'+'/'+fname+mtype
if(os.path.exists(file_path)==False):
shutil.copy(ffullname,file_path)
print ">>copy from "+ffullname+" to "+file_path
else:
outfilename=outputpath+'other-data'+'/'+fname+str(randrange(0,1000))+mtype
shutil.copy(ffullname,outfilename)
print ">>copy from "+ffullname+" to "+outfilename
except:
pass
print "Files successfully moved to"+outputpath
说明:
手机备份的文件所在路径:
~/Library/Application Support/MobileSync/Backup
执行样例:
Python ReadiPhoneBackupData.py/Library/Application Support/MobileSync/Backup/xxx /Users/xxx/Desktop/xxx