I know this is a year later, but the problem is still open, and I am surprised that json.iterencode () was not mentioned.
iterencode , , json encode .
__iter__, .
.
class StreamArray(list):
"""
Converts a generator into a list object that can be json serialisable
while still retaining the iterative nature of a generator.
IE. It converts it to a list without having to exhaust the generator
and keep it contents in memory.
"""
def __init__(self, generator):
self.generator = generator
self._len = 1
def __iter__(self):
self._len = 0
for item in self.generator:
yield item
self._len += 1
def __len__(self):
"""
Json parser looks for a this method to confirm whether or not it can
be parsed
"""
return self._len
. , StreamArray, iterencode() . json , .
:
def large_list_generator_func():
for i in xrange(5):
chunk = {'hello_world': i}
print 'Yielding chunk: ', chunk
yield chunk
with open('/tmp/streamed_write.json', 'w') as outfile:
large_generator_handle = large_list_generator_func()
stream_array = StreamArray(large_generator_handle)
for chunk in json.JSONEncoder().iterencode(stream_array):
print 'Writing chunk: ', chunk
outfile.write(chunk)
, , .
Yielding chunk: {'hello_world': 0}
Writing chunk: [
Writing chunk: {
Writing chunk: "hello_world"
Writing chunk: :
Writing chunk: 0
Writing chunk: }
Yielding chunk: {'hello_world': 1}
Writing chunk: ,
Writing chunk: {
Writing chunk: "hello_world"
Writing chunk: :
Writing chunk: 1
Writing chunk: }