I argue: the Python generator chain is memory inefficient and makes them unsuitable for certain types of applications. If possible, please prove that I'm wrong.
Firstly, a very simple and simple example without generators:
import gc
def cocktail_objects():
return filter(lambda obj: isinstance(obj, Cocktail), gc.get_objects())
class Cocktail(object):
def __init__(self, ingredients):
self.ingredients = ingredients
def __str__(self):
return self.ingredients
def __repr__(self):
return 'Cocktail(' + str(self) + ')'
def create(first_ingredient):
return Cocktail(first_ingredient)
def with_ingredient(cocktail, ingredient):
return Cocktail(cocktail.ingredients + ' and ' + ingredient)
first_ingredients = ['rum', 'vodka']
print 'using iterative style:'
for ingredient in first_ingredients:
cocktail = create(ingredient)
cocktail = with_ingredient(cocktail, 'coke')
cocktail = with_ingredient(cocktail, 'limes')
print cocktail
print cocktail_objects()
Will print as expected:
rum and coke and limes
[Cocktail(rum and coke and limes)]
vodka and coke and limes
[Cocktail(vodka and coke and limes)]
Now let’s use iterator objects to make the transformation of cocktails more convenient:
class create_iter(object):
def __init__(self, first_ingredients):
self.first_ingredients = first_ingredients
self.i = 0
def __iter__(self):
return self
def next(self):
try:
ingredient = self.first_ingredients[self.i]
except IndexError:
raise StopIteration
else:
self.i += 1
return create(ingredient)
class with_ingredient_iter(object):
def __init__(self, cocktails_iter, ingredient):
self.cocktails_iter = cocktails_iter
self.ingredient = ingredient
def __iter__(self):
return self
def next(self):
cocktail = next(self.cocktails_iter)
return with_ingredient(cocktail, self.ingredient)
print 'using iterators:'
base = create_iter(first_ingredients)
with_coke = with_ingredient_iter(base, 'coke')
with_coke_and_limes = with_ingredient_iter(with_coke, 'limes')
for cocktail in with_coke_and_limes:
print cocktail
print cocktail_objects()
The output is identical to the previous one.
Finally, replace the iterators with the generators to get rid of the boiler plate:
def create_gen(first_ingredients):
for ingredient in first_ingredients:
yield create(ingredient)
def with_ingredient_gen(cocktails_gen, ingredient):
for cocktail in cocktails_gen:
yield with_ingredient(cocktail, ingredient)
print 'using generators:'
base = create_gen(first_ingredients)
with_coke = with_ingredient_gen(base, 'coke')
with_coke_and_limes = with_ingredient_gen(with_coke, 'limes')
for cocktail in with_coke_and_limes:
print cocktail
print cocktail_objects()
This, however, prints:
rum and coke and limes
[Cocktail(rum), Cocktail(rum and coke), Cocktail(rum and coke and limes)]
vodka and coke and limes
[Cocktail(vodka), Cocktail(vodka and coke), Cocktail(vodka and coke and limes)]
This means that in the chain of generators, all current objects in this chain remain in memory and are not freed, even if those in the earlier positions of the chain are no longer needed. Result: above the required amount of memory.
: , , ? , , .
, ( numpy ) . , , . Python 2.7. , Python 3, , . ? , - , , ?
1:
print 'using imap:'
from itertools import imap
base = imap(lambda ingredient: create(ingredient), first_ingredients)
with_coke = imap(lambda cocktail: with_ingredient(cocktail, 'coke'), base)
with_coke_and_limes = imap(lambda cocktail: with_ingredient(cocktail, 'limes'), with_coke)
for cocktail in with_coke_and_limes:
print cocktail
print gc.collect()
print cocktail_objects()
, , "" - . .
: , , . , Python , . itertools.imap, - .