. github . .
ben@nixbox:~/bin$ time python kmers.py ../E-coli.txt 9 500 3
(500, 3)-clumps of 9-mers found in that file: 1904
real 0m15.510s
user 0m14.241s
sys 0m0.956s
( ) /. , , . . ( , : -)).

(20,3) - 3-: "CAT". ( "AAA" ), , k, L t.
, . , , : (5,3) - 3-.

5 . , 3-mers ATA, TAA AAA. , ATA , AAA. , TAA , AAA - (5,3) AAA s.
, , , - , ; k-mer . , - ( python, dict s) k-mers . , , k-mer.
, - - - , , list - , deque - , , dict - , Counter - deque. , OrderedDict , ; , 1.
, , , - .
:
def get_clumps(genome, k, L, t):
kmers = KmerSequence(L-k, t)
for kmer in sliding_window(genome, k):
kmers.add(kmer)
return kmers.clumps
class KmerSequence(object):
__slots__ = ['order', 'counts', 'limit', 'clumps', 't']
def __init__(self, limit, threshold):
self.order = deque()
self.counts = Counter()
self.limit = limit
self.clumps = set()
self.t = threshold
def add(self, kmer):
if len(self.order) > self.limit:
self._remove_oldest()
self._add_one(kmer)
def _add_one(self,kmer):
self.order.append(kmer)
new_count = self.counts[kmer] + 1
self.counts[kmer] = new_count
if new_count == self.t:
self.clumps.add(kmer)
def _remove_oldest(self):
self.counts[self.order.popleft()] -= 1
:
with open(genomefile) as f:
genome = f.read()
k = 9
L = 500
t = 3
clumps = get_clumps(genome, k,L,t)
, , , script __main__ - github . ..