How to replace the N-th type of needle in a haystack? (Python)

I am trying to replace the Nth kind of needle in a haystack. I want to do this simply with re.sub (), but I cannot find a suitable regular expression to solve this problem. I am trying to adapt: http://docstore.mik.ua/orelly/perl/cookbook/ch06_06.htm , but, in my opinion, I can’t cover multi-lines.

My current method is an iterative approach that finds the position of each event from the very beginning after each mutation. This is pretty inefficient and I would like to get some input. Thank!

+2
source share
6 answers

, , , , pythonic:

>>> def nth_matcher(n, replacement):
...     def alternate(n):
...         i=0
...         while True:
...             i += 1
...             yield i%n == 0
...     gen = alternate(n)
...     def match(m):
...         replace = gen.next()
...         if replace:
...             return replacement
...         else:
...             return m.group(0)
...     return match
...     
... 
>>> re.sub("([0-9])", nth_matcher(3, "X"), "1234567890")
'12X45X78X0'

EDIT: :

  • alternate(n). generator, True/False, n- True. list(alternate(3)) == [False, False, True, False, False, True, False, ...].

  • match(m). , re.sub: alternate(n) (gen.next()), True ; ( ).

, . , , .

+1

, re.sub. , :

def replaceNthWith(n, replacement):
    def replace(match, c=[0]):
        c[0] += 1
        return replacement if c[0] == n else match.group(0)
    return replace

:

re.sub(pattern, replaceNthWith(n, replacement), str)

, , .

DEMO

+3

- . , :

#N=3   
re.sub(
  r'^((?:.*?mytexttoreplace){2}.*?)mytexttoreplace',
  '\1yourreplacementtext.', 
  'mystring',
  flags=re.DOTALL
)

DOTALL.

+2

re.findall MatchObject.start() MatchObject.end()?

.findall, N- .start/.end, , ?

+1

( "" ) , . "nth_occurrence_sub" - , :

def nth_match_end(pattern, string, n, flags):
    for i, match_object in enumerate(re.finditer(pattern, string, flags)):
        if i + 1 == n:
            return match_object.end()


def nth_occurrence_sub(pattern, repl, string, n=0, flags=0):
    max_n = len(re.findall(pattern, string, flags))
    if abs(n) > max_n or n == 0:
        return string
    if n < 0:
        n = max_n + n + 1
    sub_n_times = re.sub(pattern, repl, string, n, flags)
    if n == 1:
        return sub_n_times
    nm1_end = nth_match_end(pattern, string, n - 1, flags)
    sub_nm1_times = re.sub(pattern, repl, string, n - 1, flags)
    sub_nm1_change = sub_nm1_times[:-1 * len(string[nm1_end:])]
    components = [
        string[:nm1_end],
        sub_n_times[len(sub_nm1_change):]
        ]
    return ''.join(components)
0

, . SQL REGEXP_REPLACE(). :

def sql_regexp_replace( txt, pattern, replacement='', position=1, occurrence=0, regexp_modifier='c'):
    class ReplWrapper(object):
        def __init__(self, replacement, occurrence):
            self.count = 0
            self.replacement = replacement
            self.occurrence = occurrence
        def repl(self, match):
            self.count += 1
            if self.occurrence == 0 or self.occurrence == self.count:
                return match.expand(self.replacement)
            else: 
                try:
                    return match.group(0)
                except IndexError:
                    return match.group(0)
    occurrence = 0 if occurrence < 0 else occurrence
    flags = regexp_flags(regexp_modifier)
    rx = re.compile(pattern, flags)
    replw = ReplWrapper(replacement, occurrence)
    return txt[0:position-1] + rx.sub(replw.repl, txt[position-1:])

, , , match.expand(), \1 .

If you want this to work, you will need to handle the flags differently (or take it from my github , it's easy to implement and you can lure it for a test by setting it to 0and ignoring my call regexp_flags()).

0
source

All Articles