The best way to understand something like this is to simply write a bunch of tests and use timeit to find out which is faster. I did some tests below, but you should try this with your dict vocabulary, as your results may vary.
If you want the time to be more stable (accurate), you can increase the argument number to timeit - it will just make the test take longer. Also note that the value returned by timeit is the total runtime, not the runtime.
testing with 10 keys... serialize flat: 2.97198390961 serialize eval: 4.60271120071 serialize defaultdict: 20.3057091236 serialize dict: 20.2011070251 serialize defaultdict new pickle: 14.5152060986 serialize dict new pickle: 14.7755970955 serialize json: 13.5039670467 serialize cjson: 4.0456969738 unserialize flat: 1.29577493668 unserialize eval: 25.6548647881 unserialize defaultdict: 10.2215960026 unserialize dict: 10.208122015 unserialize defaultdict new pickle: 5.70747089386 unserialize dict new pickle: 5.69750404358 unserialize json: 5.34811091423 unserialize cjson: 1.50241613388 testing with 100 keys... serialize flat: 2.91076397896 serialize eval: 4.72978711128 serialize defaultdict: 21.331786871 serialize dict: 21.3218340874 serialize defaultdict new pickle: 15.7140991688 serialize dict new pickle: 15.6440980434 serialize json: 14.3557379246 serialize cjson: 5.00576901436 unserialize flat: 1.6677339077 unserialize eval: 22.9142649174 unserialize defaultdict: 10.7773029804 unserialize dict: 10.7524499893 unserialize defaultdict new pickle: 6.13370203972 unserialize dict new pickle: 6.18057107925 unserialize json: 5.92281794548 unserialize cjson: 1.91151690483
code:
import cPickle import json try: import cjson # not Python standard library except ImportError: cjson = False from collections import defaultdict dd1 = defaultdict(list) dd2 = defaultdict(list) for i in xrange(1000000): dd1[str(i % 10)].append(str(i)) dd2[str(i % 100)].append(str(i)) dt1 = dict(dd1) dt2 = dict(dd2) from timeit import timeit def testdict(dd, dt): def serialize_defaultdict(): with open('defaultdict.pickle', 'w') as f: cPickle.dump(dd, f) def serialize_p2_defaultdict(): with open('defaultdict.pickle2', 'w') as f: cPickle.dump(dd, f, -1) def serialize_dict(): with open('dict.pickle', 'w') as f: cPickle.dump(dt, f) def serialize_p2_dict(): with open('dict.pickle2', 'w') as f: cPickle.dump(dt, f, -1) def serialize_json(): with open('dict.json', 'w') as f: json.dump(dt, f) if cjson: def serialize_cjson(): with open('dict.cjson', 'w') as f: f.write(cjson.encode(dt)) def serialize_flat(): with open('dict.flat', 'w') as f: f.write('\n'.join([' '.join([k] + v) for k, v in dt.iteritems()])) def serialize_eval(): with open('dict.eval', 'w') as f: f.write('\n'.join([k + '\t' + repr(v) for k, v in dt.iteritems()])) def unserialize_defaultdict(): with open('defaultdict.pickle') as f: assert cPickle.load(f) == dd def unserialize_p2_defaultdict(): with open('defaultdict.pickle2') as f: assert cPickle.load(f) == dd def unserialize_dict(): with open('dict.pickle') as f: assert cPickle.load(f) == dt def unserialize_p2_dict(): with open('dict.pickle2') as f: assert cPickle.load(f) == dt def unserialize_json(): with open('dict.json') as f: assert json.load(f) == dt if cjson: def unserialize_cjson(): with open('dict.cjson') as f: assert cjson.decode(f.read()) == dt def unserialize_flat(): with open('dict.flat') as f: dtx = {} for line in f: vals = line.split() dtx[vals[0]] = vals[1:] assert dtx == dt def unserialize_eval(): with open('dict.eval') as f: dtx = {} for line in f: vals = line.split('\t') dtx[vals[0]] = eval(vals[1]) assert dtx == dt print 'serialize flat:', timeit(serialize_flat, number=10) print 'serialize eval:', timeit(serialize_eval, number=10) print 'serialize defaultdict:', timeit(serialize_defaultdict, number=10) print 'serialize dict:', timeit(serialize_dict, number=10) print 'serialize defaultdict new pickle:', timeit(serialize_p2_defaultdict, number=10) print 'serialize dict new pickle:', timeit(serialize_p2_dict, number=10) print 'serialize json:', timeit(serialize_json, number=10) if cjson: print 'serialize cjson:', timeit(serialize_cjson, number=10) print 'unserialize flat:', timeit(unserialize_flat, number=10) print 'unserialize eval:', timeit(unserialize_eval, number=10) print 'unserialize defaultdict:', timeit(unserialize_defaultdict, number=10) print 'unserialize dict:', timeit(unserialize_dict, number=10) print 'unserialize defaultdict new pickle:', timeit(unserialize_p2_defaultdict, number=10) print 'unserialize dict new pickle:', timeit(unserialize_p2_dict, number=10) print 'unserialize json:', timeit(unserialize_json, number=10) if cjson: print 'unserialize cjson:', timeit(unserialize_cjson, number=10) print 'testing with 10 keys...' testdict(dd1, dt1) print 'testing with 100 keys...' testdict(dd2, dt2)