Try the following:
import collections def runLenEncode(s): start, i = 0, 0; answer = [] while i<len(s): while i<len(s) and s[start]==s[i]: i += 1 answer.append((s[start], i-start)) start = i return answer def runLenFilter(encoding, thresholds, repLens): answer = [] for char, count in encoding: if char in thresholds and count>=thresholds[char]: count = repLens[char] answer.append(char*count) return ''.join(answer) def maxFilter(encoding, repLens): maxCounts = collections.defaultdict(int) for char, count in encoding: if char in repLens and count > maxCounts[char]: maxCounts[char] = count maxCounts = dict(maxCounts) answer = [] for char, count in encoding: if char in repLens and count==maxCounts[char]: count = repLens[char] answer.append(char*count) return ''.join(answer) if __name__ == "__main__": print('starting') s = "daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaaaaaaaaaaafnnasm" encoding = runLenEncode(s) print("encoding:", encoding) thresholds = {'a':3} repLens = {'a':2} decoded = runLenFilter(encoding, thresholds, repLens) print('lenFilter:', decoded) filtered = maxFilter(encoding, repLens) print("maxFilter:", filtered) print('done')
And the conclusion :
$ python3 myFile.py starting encoding: [('d', 1), ('a', 1), ('s', 1), ('l', 1), ('a', 1), ('k', 1), ('n', 1), ('d', 1), ('l', 1), ('a', 5), ('j', 1), ('n', 1), ('j', 1), ('n', 1), ('d', 1), ('i', 1), ('b', 1), ('n', 1), ('i', 1), ('a', 3), ('f', 1), ('i', 1), ('j', 1), ('d', 1), ('n', 1), ('f', 1), ('i', 1), ('j', 1), ('d', 1), ('n', 1), ('s', 1), ('i', 1), ('j', 1), ('f', 1), ('n', 1), ('s', 1), ('d', 1), ('i', 1), ('n', 1), ('i', 1), ('f', 1), ('a', 11), ('f', 1), ('n', 2), ('a', 1), ('s', 1), ('m', 1)] lenFilter: daslakndlaajnjndibniaafijdnfijdnsijfnsdinifaafnnasm maxFilter: daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaafnnasm done