Forming a dictionary from a list item

Hi, I have a list that contains metadata from images as follows:

['Component 1: Y component: Quantization table 0, Sampling factors 1 horiz/1 vert', 'Component 2: Cb component: Quantization table 1, Sampling factors 1 horiz/1 vert', 'Component 3: Cr component: Quantization table 1, Sampling factors 1 horiz/1 vert', 'Compression Type: Progressive, Huffman', 'Content-Length: 14312', 'Content-Type: image/jpeg'] 

I want to make a dictionary using the list splitting ":" in the following format:

 {Component 1: {Y component: [Quantization table 0, Sampling factors 1 horiz/1 vert'], Component 2: {Cb component: [Quantization table 1, Sampling factors 1 horiz/1 vert]}, Component 3: {Cr component: [Quantization table 1, Sampling factors 1 horiz/1 vert]}, Compression Type: [Progressive, Huffman],Content-Length: 14312,Content-Type: image/jpeg} 

Currently, I have written code that does not work.

 def make_dict(seq): res = {} if seq[0] is not '': for elt in seq: k, v = elt.split(':') try: res[k].append(v) except KeyError: res[k] = [v] print res 

This code does not work. I also tried other approaches, but I can not get the format.

+6
source share
4 answers

You can use list comprehension in dict comprehension with collections.OrderedDict :

 >>> li=['Component 1: Y component: Quantization table 0, Sampling factors 1 horiz/1 vert', 'Component 2: Cb component: Quantization table 1, Sampling factors 1 horiz/1 vert', 'Component 3: Cr component: Quantization table 1, Sampling factors 1 horiz/1 vert', 'Compression Type: Progressive, Huffman', 'Content-Length: 14312', 'Content-Type: image/jpeg'] >>> d=OrderedDict((sub[0],{sub[1]:sub[2:]}) if sub[2:] else (sub[0],sub[1]) for sub in [item.split(':') for item in li]) >>> d OrderedDict([('Component 1', {' Y component': [' Quantization table 0, Sampling factors 1 horiz/1 vert']}), ('Component 2', {' Cb component': [' Quantization table 1, Sampling factors 1 horiz/1 vert']}), ('Component 3', {' Cr component': [' Quantization table 1, Sampling factors 1 horiz/1 vert']}), ('Compression Type', ' Progressive, Huffman'), ('Content-Length', ' 14312'), ('Content-Type', ' image/jpeg')]) >>> 
+3
source
 l = ['Component 1: Y component: Quantization table 0, Sampling factors 1 horiz/1 vert', 'Component 2: Cb component: Quantization table 1, Sampling factors 1 horiz/1 vert', 'Component 3: Cr component: Quantization table 1, Sampling factors 1 horiz/1 vert', 'Compression Type: Progressive, Huffman', 'Content-Length: 14312', 'Content-Type: image/jpeg'] d = {} for ele in l: spl = ele.split(":", 2) if len(spl) == 3: k1, k2, v = spl d[k1] = {k2: v.split(",")} else: k,v = spl d[k] = v.split() if "," in v else v 

Output:

 {'Component 1': {' Y component': [' Quantization table 0', ' Sampling factors 1 horiz/1 vert']}, 'Component 2': {' Cb component': [' Quantization table 1', ' Sampling factors 1 horiz/1 vert']}, 'Component 3': {' Cr component': [' Quantization table 1', ' Sampling factors 1 horiz/1 vert']}, 'Compression Type': [' Progressive', ' Huffman'], 'Content-Length': ' 14312', 'Content-Type': ' image/jpeg'} 

To remove a space, you can str.strip disable it:

 d = {} for ele in l: spl = ele.split(":", 2) if len(spl) == 3: k1, k2, v = spl d[k1] = {k2.strip(): list(map(str.strip,v.split(",")))} else: k,v = spl d[k] = list(map(str.strip, v.split())) if "," in v else v.strip 

Output:

 {'Component 1': {'Y component': ['Quantization table 0', 'Sampling factors 1 horiz/1 vert']}, 'Component 2': {'Cb component': ['Quantization table 1', 'Sampling factors 1 horiz/1 vert']}, 'Component 3': {'Cr component': ['Quantization table 1', 'Sampling factors 1 horiz/1 vert']}, 'Compression Type': ['Progressive', 'Huffman'], 'Content-Length': '14312', 'Content-Type': 'image/jpeg'} 

Both of them really correspond to the expected result.

+3
source

You can use a recursive algorithm like the one below if you want to handle any level of dictionary embedding. Example -

 def makedict(elem): if ':' in elem: k,v = map(str.strip, elem.split(':',1)) return {k:makedict(v)} elif ',' in elem: elems = list(map(str.strip, elem.split(','))) #Simply map(...) for Python 2.x return elems return elem 

If you want to make a dictionary of dictionaries, you can do -

 d = {} for elem in s: d.update(makedict(elem)) 

Or, if you want the list of dictionaries from dictionaries to call the above function for each element in your list in the list's understanding, an example is

 result = [makedict(elem) for elem in yourlist] 

Demo for dictionary dictionaries -

 >>> d = {} >>> for elem in s: ... d.update(makedict(elem)) ... >>> d {'Component 2': {'Cb component': ['Quantization table 1', 'Sampling fac >>> import pprint >>> pprint.pprint(d) {'Component 1': {'Y component': ['Quantization table 0', 'Sampling factors 1 horiz/1 vert']}, 'Component 2': {'Cb component': ['Quantization table 1', 'Sampling factors 1 horiz/1 vert']}, 'Component 3': {'Cr component': ['Quantization table 1', 'Sampling factors 1 horiz/1 vert']}, 'Compression Type': ['Progressive', 'Huffman'], 'Content-Length': '14312', 'Content-Type': 'image/jpeg'} 
+2
source

You can elegantly solve the problem using recursion and a separation limit (you can use the second argument to split to limit the amount of markup):

 def make_dict(l): d = dict() for elem in l: key, value = elem.split(':', 1) if ':' in value: d[key] = make_dict([value]) else: d[key] = value return d 

and the test seems to meet your expectations:

 >>> l = ['Component 1: Y component: Quantization table 0, Sampling factors 1 horiz/1 vert', 'Component 2: Cb component: Quantization table 1, Sampling factors 1 horiz/1 vert', 'Component 3: Cr component: Quantization table 1, Sampling factors 1 horiz/1 vert', 'Compression Type: Progressive, Huffman', 'Content-Length: 14312', 'Content-Type: image/jpeg'] >>> make_dict(l) {'Component 1': {' Y component': ' Quantization table 0, Sampling factors 1 horiz/1 vert'}, 'Component 2': {' Cb component': ' Quantization table 1, Sampling factors 1 horiz/1 vert'}, 'Component 3': {' Cr component': ' Quantization table 1, Sampling factors 1 horiz/1 vert'}, 'Compression Type': ' Progressive, Huffman', 'Content-Length': ' 14312', 'Content-Type': ' image/jpeg'} 
+1
source

All Articles