preprocess, threshold. - :
def preprocess(data, threshold):
def predicate(x):
return x < threshold
return old_preprocess(data, predicate)
preds , picklable:
preds = [1,2,3,42]
pool = Pool(processes=4)
pool.map(preprocess, zip(data, preds))
, , operator:
def preprocess(data, pred):
threshold, op = pred
def predicate(x):
return op(x, threshold)
return old_preprocess(data, predicate)
import operator as op
preds = [(1, op.lt), (2, op.gt), (3, op.ge), (42, op.lt)]
pool = Pool(processes=4)
pool.map(preprocess, zip(data, preds))
, . , - marshal, bytes .
- :
real_preds = [marshal.dumps(pred.__code__) for pred in preds]
preprocess :
import types
def preprocess(data, pred):
pred = types.FunctionType(marshal.loads(pred), globals())
MWE :
>>> from multiprocessing import Pool
>>> import marshal
>>> import types
>>> def preprocess(pred):
... pred = types.FunctionType(marshal.loads(pred), globals())
... return pred(2)
...
>>> preds = [lambda x: x < 1,
... lambda x: x <2,
... lambda x: x < 3,
... lambda x: x < 42]
>>> real_preds = [marshal.dumps(pred.__code__) for pred in preds]
>>> pool = Pool(processes=4)
>>> pool.map(preprocess, real_preds)
[False, False, True, True]
, pool.map . , lambda pool.map:
>>> pool.map(lambda x: preprocess(x), real_preds)
Exception in thread Thread-5:
Traceback (most recent call last):
File "/usr/lib/python3.3/threading.py", line 639, in _bootstrap_inner
self.run()
File "/usr/lib/python3.3/threading.py", line 596, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.3/multiprocessing/pool.py", line 351, in _handle_tasks
put(task)
File "/usr/lib/python3.3/multiprocessing/connection.py", line 206, in send
ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(obj)
_pickle.PicklingError: Can't pickle <class 'function'>: attribute lookup builtins.function failed
"is pool.map ? , . , " ", , , , " " :
lambda w: (w.lower() not in stopwords.words('english') and re.search("[a-z]", w.lower()))
, , pool.map. , w .
, , pool.map , w 35000 . w 1000, Pool 15 map ( 256 . 60000, Pool ).
, w , lambda def w.lower(). map pool.map.