Python line coverage optimization

I have this starting line.

'bananaappleorangestrawberryapplepear'

And also have a tuple with the lines:

('apple', 'plepe', 'leoran', 'lemon')

I need a function, so from the source string and the tuple with the strings, I get the following:

'bananaxxxxxxxxxgestrawberryxxxxxxxar'

I know how to really do this by finding a word in the source line for each word, and then encode character by character in the entire source line with the replaced words.

But it is not very efficient and ugly. I suspect there must be some way to do this more elegantly, functionally, using itertools or something else. If you know a Python library that can do this efficiently, let me know.

UPDATE : Justin Peel pointed to a case that I did not describe in my original question. If the words "aaa" and "aaaaaa" are on the start line, the result should look like "xxxxxx".

+5
source share
6 answers
import re

words = ('apple', 'plepe', 'leoran', 'lemon')
s = 'bananaappleorangestrawberryapplepear'

x = set()

for w in words:
    for m in re.finditer(w, s):
        i = m.start()
        for j in range(i, i+len(w)):
            x.add(j)

result = ''.join(('x' if i in x else s[i]) for i in range(len(s)))
print result

gives:

bananaxxxxxxxxxgestrawberryxxxxxxxar
+3
source

Here is another answer. There may be a faster way to replace the letters x, but I do not think it is necessary, because it is already pretty fast.

import re

def do_xs(s,pats):
    pat = re.compile('('+'|'.join(pats)+')')

    sout = list(s)
    i = 0
    match = pat.search(s)
    while match:
        span = match.span()
        sout[span[0]:span[1]] = ['x']*(span[1]-span[0])
        i = span[0]+1
        match = pat.search(s,i)
    return ''.join(sout)

txt = 'bananaappleorangestrawberryapplepear'
pats = ('apple', 'plepe', 'leoran', 'lemon')
print do_xs(txt,pats)

, , . , 1 . , , , .

+1

, stdlib :

s1 = 'bananaappleorangestrawberryapplepear'
t = ('apple', 'plepe', 'leoran', 'lemon')
s2 = s1

solution = 'bananaxxxxxxxxxgestrawberryxxxxxxxar'

for word in t:
    if word not in s1: continue
    index = -1 # Start at -1 so our index search starts at 0
    for iteration in range(s1.count(word)):
        index = s1.find(word, index+1)
        length = len(word)
        before = s2[:index]
        after = s2[index+length:]
        s2 = before + 'x'*length + after

print s2 == solution
+1
>>> string_ = 'bananaappleorangestrawberryapplepear'
>>> words = ('apple', 'plepe', 'leoran', 'lemon')
>>> xes = [(string_.find(w), len(w)) for w in words]
>>> xes
[(6, 5), (29, 5), (9, 6), (-1, 5)]
>>> for index, len_ in xes:
...   if index == -1: continue
...   string_ = string_.replace(string_[index:index+len_], 'x'*len_)
...
>>> string_
'bananaxxxxxxxxxgestrawberryxxxxxxxar'
>>>

, , , - .

+1
a = ('apple', 'plepe', 'leoran', 'lemon')
b = 'bananaappleorangestrawberryapplepear'

for fruit in a:
    if a in b:
        b = b.replace(fruit, numberofx's)

, , , X .

0
def mask_words(s, words):
    mask = [False] * len(s)
    for word in words:
        pos = 0
        while True:
            idx = s.find(word, pos)
            if idx == -1:
                break

            length = len(word)
            for i in xrange(idx, idx+length):
                mask[i] = True
            pos = idx+length

    # Sanity check:
    assert len(mask) == len(s)

    result = []
    for masked, c in zip(mask, s):
        result.append('x' if masked else c)

    return "".join(result)
0

All Articles