, . , :
.
, -
, .
, ,
, Python.
. ,
start, .
, - ,
. ,
.
, :
import re
import random
import sys
import time
def get_patterns():
rng = random.Random(1)
n = 300
return list(set([str(rng.randint(60000, 80000)) for _ in xrange(n)]))
def original(rs, matches):
for eachcsv in matches:
with open(eachcsv, 'r') as f:
lines = f.readlines()
for entry in rs:
for line in lines:
if entry in line:
print("found %s in %s" % (entry, eachcsv))
def mine(rs, matches):
my_rx = re.compile(build_regex(rs))
for eachcsv in matches:
with open(eachcsv, 'r') as f:
body = f.read()
matches = my_rx.findall(body)
for match in matches:
print "found %s in %s" % (match, eachcsv)
def build_regex(literal_patterns):
return "|".join([re.escape(pat) for pat in literal_patterns])
def print_elapsed_time(label, callable, args):
t1 = time.time()
callable(*args)
t2 = time.time()
elapsed_ms = (t2 - t1) * 1000
print "%8s: %9.1f milliseconds" % (label, elapsed_ms)
def main(args):
rs = get_patterns()
filenames = args[1:]
for function_name_and_function in (('original', original), ('mine', mine)):
name, func = function_name_and_function
print_elapsed_time(name, func, [rs, filenames])
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))
original, - mine.
300 400 . 30- . . , 3% ( , , ).
: . , - , :
~/source/stackoverflow/36923237$ python search.py example.csv
found green fox in example.csv
original: 9218.0 milliseconds
found green fox in example.csv
mine: 600.4 milliseconds
: , .
, foobar umspquux. - foobar, umspquux. , .
. , . "f" "u", , , "o" "m". . , , .
- . . "foobar" "foobar" "foobar - ". . , -, , . , '|'. , foobar | umspquux "foobar" , "umspquux". '|' '|' '\'.
build_regex_literal_patterns. ['foobar', 'umspquux'] foobar | umspquux. - try-out, , .
, , , - - . , , , .
re.escape build_regex_literal_patterns , ( '|') ( '\ |'), .
findall . (.. ).
Python Python . , Google Develoeprs Python , Jeffrey Friedl - , , Python .