How to parse Manifest.mbdb file in iOS 4.0 iTunes Backup

In iOS 4.0, Apple redesigned the backup process.

iTunes used to store the list of file names associated with the backup files in the Manifest.plist file, but in iOS 4.0 it transferred this information to Manifest.mbdb

You can see an example of this file by making a backup on your iOS 4.0 devices and looking at the ~ / Library / Application Support / MobileSync / Backup folder (look at the subfolders with the most recent date)

Here is a screenshot of how the file looks in a text editor:

alt text
(source: supercrazyawesome.com )

How do I convert this to a Cocoa app so that I can update my (free) iPhone Backup Extractor app ( http://supercrazyawesome.com ) for iOS 4.0?

+82
iphone extract ios4 backup
Jun 21 '10 at 13:44
source share
8 answers

Thanks, user374559 and reneD - this code and description are very useful.

My jab on some kind of Python is to parse and print the information in Unix ls-l format:

#!/usr/bin/env python import sys def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return '', offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo return mbdb def process_mbdx_file(filename): mbdx = {} # Map offset of info in the MBDB file => fileID string data = open(filename).read() if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file") offset = 4 offset = offset + 2 # value 0x02 0x00, not sure what this is filecount, offset = getint(data, offset, 4) # 4-byte count of records while offset < len(data): # 26 byte record, made up of ... fileID = data[offset:offset+20] # 20 bytes of fileID fileID_string = ''.join(['%02x' % ord(b) for b in fileID]) offset = offset + 20 mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog mode, offset = getint(data, offset, 2) # 2-byte mode field mbdx[mbdb_offset] = fileID_string return mbdx def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") mbdx = process_mbdx_file("Manifest.mbdx") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose) 
+83
Jun 28 '10 at 8:05
source share

In iOS 5, the Manifest.mbdx file has been deleted. For the purposes of this article, this was redundant since the domain and path are in Manifest.mbdb, and a hash identifier can be generated using SHA1.

Here is my update of the halochrome code, so it works with backups of iOS 5 devices. The only changes are to delete process_mbdx_file () and add a few lines to process_mbdb_file ().

Tested with backups of iPhone 4S and iPad 1, both with a large number of applications and with files.

 #!/usr/bin/env python import sys import hashlib mbdx = {} def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return '', offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo fullpath = fileinfo['domain'] + '-' + fileinfo['filename'] id = hashlib.sha1(fullpath) mbdx[fileinfo['start_offset']] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose) 
+29
Nov 12 '11 at 1:52
source share

I finished my work on this material, that is, iOS 4 + iTunes 9.2, updated my backup decoder library for Python - http://www.iki.fi/fingon/iphonebackupdb.py

He does what I need, a little documentation, but feel free to copy ideas from there; -)

(It looks like it works fine with my backups).

+18
Jun 23 '10 at 19:01
source share

Here you can find information and a short description of the MBDB / MBDX format:

http://code.google.com/p/iphonebackupbrowser/

This is my application for viewing backup files. I tried to document the format of the new files that come with iTunes 9.2.

+10
Jun 30 '10 at 21:03
source share

This python script is awesome.

Here is my Ruby version (with slight improvement) and search capabilities. (for iOS 5)

 # encoding: utf-8 require 'fileutils' require 'digest/sha1' class ManifestParser def initialize(mbdb_filename, verbose = false) @verbose = verbose process_mbdb_file(mbdb_filename) end # Returns the numbers of records in the Manifest files. def record_number @mbdb.size end # Returns a huge string containing the parsing of the Manifest files. def to_s s = '' @mbdb.each do |v| s += "#{fileinfo_str(v)}\n" end s end def to_file(filename) File.open(filename, 'w') do |f| @mbdb.each do |v| f.puts fileinfo_str(v) end end end # Copy the backup files to their real path/name. # * domain_match Can be a regexp to restrict the files to copy. # * filename_match Can be a regexp to restrict the files to copy. def rename_files(domain_match = nil, filename_match = nil) @mbdb.each do |v| if v[:type] == '-' # Only rename files. if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match) dst = "#{v[:domain]}/#{v[:filename]}" puts "Creating: #{dst}" FileUtils.mkdir_p(File.dirname(dst)) FileUtils.cp(v[:fileID], dst) end end end end # Return the filename that math the given regexp. def search(regexp) result = Array.new @mbdb.each do |v| if "#{v[:domain]}::#{v[:filename]}" =~ regexp result << v end end result end private # Retrieve an integer (big-endian) and new offset from the current offset def getint(data, offset, intsize) value = 0 while intsize > 0 value = (value<<8) + data[offset].ord offset += 1 intsize -= 1 end return value, offset end # Retrieve a string and new offset from the current offset into the data def getstring(data, offset) return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset...(offset + length)] return value, (offset + length) end def process_mbdb_file(filename) @mbdb = Array.new data = File.open(filename, 'rb') { |f| f.read } puts "MBDB file read. Size: #{data.size}" raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb' offset = 4 offset += 2 # value x05 x00, not sure what this is while offset < data.size fileinfo = Hash.new fileinfo[:start_offset] = offset fileinfo[:domain], offset = getstring(data, offset) fileinfo[:filename], offset = getstring(data, offset) fileinfo[:linktarget], offset = getstring(data, offset) fileinfo[:datahash], offset = getstring(data, offset) fileinfo[:unknown1], offset = getstring(data, offset) fileinfo[:mode], offset = getint(data, offset, 2) if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink fileinfo[:type] = 'l' elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File fileinfo[:type] = '-' elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir fileinfo[:type] = 'd' else # $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode'] fileinfo[:type] = '?' end fileinfo[:unknown2], offset = getint(data, offset, 4) fileinfo[:unknown3], offset = getint(data, offset, 4) fileinfo[:userid], offset = getint(data, offset, 4) fileinfo[:groupid], offset = getint(data, offset, 4) fileinfo[:mtime], offset = getint(data, offset, 4) fileinfo[:atime], offset = getint(data, offset, 4) fileinfo[:ctime], offset = getint(data, offset, 4) fileinfo[:filelen], offset = getint(data, offset, 8) fileinfo[:flag], offset = getint(data, offset, 1) fileinfo[:numprops], offset = getint(data, offset, 1) fileinfo[:properties] = Hash.new (0...(fileinfo[:numprops])).each do |ii| propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo[:properties][propname] = propval end # Compute the ID of the file. fullpath = fileinfo[:domain] + '-' + fileinfo[:filename] fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath) # We add the file to the list of files. @mbdb << fileinfo end @mbdb end def modestr(val) def mode(val) r = (val & 0x4) ? 'r' : '-' w = (val & 0x2) ? 'w' : '-' x = (val & 0x1) ? 'x' : '-' r + w + x end mode(val >> 6) + mode(val >> 3) + mode(val) end def fileinfo_str(f) return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]] info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination f[:properties].each do |k, v| info += " #{k}=#{v.inspect}" end info end end if __FILE__ == $0 mp = ManifestParser.new 'Manifest.mbdb', true mp.to_file 'filenames.txt' end 
+7
Feb 12 2018-12-12T00:
source share

I liked the gravel code, and I changed the main function so that it showed a sorted list of total size by application:

 verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") mbdx = process_mbdx_file("Manifest.mbdx") sizes = {} for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose) if (fileinfo['mode'] & 0xE000) == 0x8000: sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen'] for domain in sorted(sizes, key=sizes.get): print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024)) 

That way, you can figure out which application uses all this space.

+4
Dec 06 '10 at 15:39
source share

For those looking for an implementation of a Java application to read MBDB files, there are several:

+2
Sep 17 '14 at 0:13
source share

Thanks to the response to the halo detector. The code works fine with Python 2.7. There is only one thing I want to accomplish. When you read manifest.mbdb, you should use binary mode. Otherwise, not all content is read.

I also made some minor changes to make the code work with Python 3.4. Here is the code.

 #!/usr/bin/env python import sys import hashlib mbdx = {} def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value << 8) + data[offset] offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF): return '', offset + 2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset + length] return value.decode(encoding='latin-1'), (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename, 'rb').read() # 'b' is needed to read all content at once if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo fullpath = fileinfo['domain'] + '-' + fileinfo['filename'] id = hashlib.sha1(fullpath.encode()) mbdx[fileinfo['start_offset']] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r + w + x return mode(val >> 6) + mode((val >> 3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file( r"Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print(fileinfo_str(fileinfo, verbose)) 
0
Aug 21 '15 at 14:17
source share



All Articles