Reading a binary in a struct in Python

I have a binary file with a known format / structure.

How do I read all binary data into an array of structure?

Something like (in pseudo-code)

bytes = read_file(filename) struct = {'int','int','float','byte[255]'} data = read_as_struct(bytes, struct) data[1] >>> 10,11,10.1,Arr[255] 

EDIT:

Decision:

 data = [] fmt = '=iiiii256i' fmt_s = '=iiiii' fmt_spec = '256i' struct_size = struct.calcsize(fmt) for i in range(struct_size, len(bytes)-struct_size, struct_size): dat1= list(struct.unpack(fmt_s, bytes[i-struct_size:i-1024])) dat2= list(struct.unpack(fmt_spec, bytes[i-1024:i])) dat1.append(dat2) data.append(dat1) 
+4
source share
3 answers

Use struct module ; you need to define the types in string format documented using this library:

 struct.unpack('=HHf255s', bytes) 

The above example expects its own byte order, two unsigned shorts, float and a string of 255 characters.

To itertools over the already completely read bytes string, I would use itertools ; There is a convenient recipe for an organizer in which I have an adapter:

 from itertools import izip_longest, imap from struct import unpack, calcsize fmt_s = '=5i' fmt_spec = '=256i' size_s = calcsize(fmt_s) size = size_s + calcsize(fmt_spec) def chunked(iterable, n, fillvalue=''): args = [iter(iterable)] * n return imap(''.join, izip_longest(*args, fillvalue=fillvalue)) data = [unpack(fmt_s, section[:size_s]) + (unpack(fmt_spec, section[size_s:]),) for section in chunked(bytes, size)] 

This creates tuples, not lists, but is easy enough to set up if you need to:

 data = [list(unpack(fmt_s, section[:size_s])) + [list(unpack(fmt_spec, section[size_s:]))] for section in chunked(bytes, size)] 
+12
source

Actually, it looks like you are trying to read a list (or array) of structures from a file. The idiomatic way to do this in Python is to use the struct module and call struct.unpack() in a loop - either a fixed number of times if you know their number in advance or before reaching the end of the file - and save the results in list . Here is an example of the latter:

 import struct struct_fmt = '=5if255s' # int[5], float, byte[255] struct_len = struct.calcsize(struct_fmt) struct_unpack = struct.Struct(struct_fmt).unpack_from results = [] with open(filename, "rb") as f: while True: data = f.read(struct_len) if not data: break s = struct_unpack(data) results.append(s) 

The same results can be obtained a little more succinctly using understanding, as well as a short helper function generator (i.e. read_chunks() below):

 def read_chunks(f, length): while True: data = f.read(length) if not data: break yield data with open(filename, "rb") as f: results = [struct_unpack(chunk) for chunk in read_chunks(f, struct_len)] 
+14
source

Add comments

 import struct 

First, just read the binary into an array

 mbr = file('mbrcontent', 'rb').read() 

So you can just extract part of the array

 partition_table = mbr[446:510] 

and then unzip it as an integer

 signature = struct.unpack('<H', mbr[510:512])[0] 

more complex example

 little_endian = (signature == 0xaa55) # should be True print "Little endian:", little_endian PART_FMT = (little_endian and '<' or '>') + ( "B" # status (0x80 = bootable (active), 0x00 = non-bootable) # CHS of first block "B" # Head "B" # Sector is in bits 5; bits 9 of cylinder are in bits 7-6 "B" # bits 7-0 of cylinder "B" # partition type # CHS of last block "B" # Head "B" # Sector is in bits 5; bits 9 of cylinder are in bits 7-6 "B" # bits 7-0 of cylinder "L" # LBA of first sector in the partition "L" # number of blocks in partition, in little-endian format ) PART_SIZE = 16 fmt_size = struct.calcsize(PART_FMT) # sanity check expectations assert fmt_size == PART_SIZE, "Partition format string is %i bytes, not %i" % (fmt_size, PART_SIZE) def cyl_sector(sector_cyl, cylinder7_0): sector = sector_cyl & 0x1F # bits 5-0 # bits 7-6 of sector_cyl contain bits 9-8 of the cylinder cyl_high = (sector_cyl >> 5) & 0x03 cyl = (cyl_high << 8) | cylinder7_0 return sector, cyl #I have corrected the indentation, but the change is refused because less than 6 characters, so I am adding this useful comment. for partition in range(4): print "Partition #%i" % partition, offset = PART_SIZE * partition (status, start_head, start_sector_cyl, start_cyl7_0, part_type, end_head, end_sector_cyl, end_cyl7_0, lba, blocks ) = struct.unpack( PART_FMT,partition_table[offset:offset + PART_SIZE]) if status == 0x80: print "Bootable", elif status: print "Unknown status [%s]" % hex(status), print "Type=0x%x" % part_type start = (start_head,) + cyl_sector(start_sector_cyl, start_cyl7_0) end = (end_head,) + cyl_sector(end_sector_cyl, end_cyl7_0) print " (Start: Heads:%i\tCyl:%i\tSect:%i)" % start print " (End: Heads:%i\tCyl:%i\tSect:%i)" % end print " LBA:", lba print " Blocks:", blocks 
0
source

All Articles