, , ? :
word=get_string(4**12)+"$"
def matcher(word, match_string):
positions = [-1]
while 1:
positions.append(word.find(match_string, positions[-1] + 1))
if positions[-1] == -1:
return positions[1:-1]
print matcher(word,'AAAAAAAAAAAA')
[13331731, 13331732, 13331733]
print matcher('AACTATAAATTTACCA','AT')
[4, 8]
, 30 , 4 ^ 12. 12- , . - -.
- , , :
import suffixtree
stree = suffixtree.SuffixTree(word)
print stree.find_substring("AAAAAAAAAAAA")
, , . , -, , , , . find_substring ( , , , ).
: ,
, 10 4 ^ 12, 9,5 ( , , ...). , ( , ), . ( , ) , 10 , . , . ( , , word , max_length, , , ):
def split_find(word,search_words,max_length):
number_sub_trees = len(word)/max_length
matches = {}
for i in xrange(0,number_sub_trees):
stree = suffixtree.SuffixTree(word[max_length*i:max_length*(i+1)])
for search in search_words:
if search not in matches:
match = stree.find_substring(search)
if match > -1:
matches[search] = match + max_length*i,i
if i < number_sub_trees:
match = word[max_length*(i+1) - len(search):max_length*(i+1) + len(search)].find(search)
if match > -1:
matches[search] = match + max_length*i,i
return matches
word=get_string(4**12)
search_words = ['AAAAAAAAAAAAAAAA']
max_length = 4**10
print split_find(word,search_words,max_length)
4 ^ 10, 700 .
, 4 ^ 12, 10 13 ( , , , , ). 100 , 1.00 * 41sec = 1 .
, 14 , ... 9,5 .
, 1,6 1 , , !