How can I handle multiple regexp cases neatly in python

So, I get some input in python that I need to parse with regular expressions.

I'm currently using something like this:

matchOK = re.compile(r'^OK\s+(\w+)\s+(\w+)$')
matchFailed = re.compile(r'^FAILED\s(\w+)$')
#.... a bunch more regexps

for l in big_input:
  match = matchOK.search(l)
  if match:
     #do something with match
     continue
  match = matchFailed.search(l)
  if match:
     #do something with match
     continue
  #.... a bunch more of these 
  # Then some error handling if nothing matches

Now, as a rule, I love python because it is beautiful and short. But it looks verbose. I expected that I could do something like this:

for l in big_input:      
  if match = matchOK.search(l):
     #do something with match     
  elif match = matchFailed.search(l):
     #do something with match 
  #.... a bunch more of these
  else
    # error handling

Am I missing something, or is the first form as neat as I am going to get?

+5
source share
4 answers
class helper:
    def __call__(self, match):
        self.match= match
        return bool(match)

h= helper()
for l in big_input:      
    if h(matchOK.search(l)):
        # do something with h.match     
    elif h(matchFailed.search(l)):
        # do something with h.match 
    ... # a bunch more of these
    else:
        # error handling

Or combinations as class methods:

class matcher:
    def __init__(self):
        # compile matchers
        self.ok= ...
        self.failed= ...
        self....= ...

    def matchOK(self, l):
        self.match= self.ok(l)
        return bool(self.match)

    def matchFailed(self, l):
        self.match= self.failed(l)
        return bool(self.match)

    def match...(self, l):
        ...

m= matcher()
for l in big_input:      
    if m.matchOK(l):
        # do something with m.match     
    elif m.matchFailed(l):
        # do something with m.match 
    ... # a bunch more of these
    else:
        # error handling
+3
source

How about something like:

for l in big_input:
    for p in (matchOK, matchFailed): # other patterns go in here
        match = p.search(l)
        if match: break
    if (not match): p = None # no patterns matched
    if (p is matchOK):
        # do something with match
    elif (p is matchFailed):
        # do something with match
    #.... a bunch more of these 
    else:
        assert p is None
        # Then some error handling if nothing matches
0
source

- ?

import re


def f_OK(ch):
    print 'BINGO ! : %s , %s' % re.match('OK\s+(\w+)\s+(\w+)',ch).groups()

def f_FAIL(ch):
    print 'only one : ' + ch.split()[-1]

several_func = (f_OK, f_FAIL)


several_REs = ('OK\s+\w+\s+\w+',
               'FAILED\s+\w+')

globpat = re.compile(')|('.join(several_REs).join(('^(',')$')))




with open('big_input.txt') as handle:
    for i,line in enumerate(handle):
        print 'line '+str(i)+' - ',
        mat = globpat.search(line)
        if mat:
            several_func[mat.lastindex-1](mat.group())
        else:
            print '## no match ## '+repr(line)

, :

OK tiramisu sunny   
FAILED overclocking   
FAILED nuclear    
E = mcXc    
OK the end  

line 0 -  BINGO ! : tiramisu , sunny
line 1 -  only one : overclocking
line 2 -  only one : nuclear
line 3 -  ## no match ## 'E = mcXc\n'
line 4 -  BINGO ! : the , end

RE , , ..

0

Even better, what about a slightly simpler version of eat code using a nested function:

import re

matchOK = re.compile("ok")
matchFailed = re.compile("failed")
big_input = ["ok to begin with", "failed later", "then gave up"]

for l in big_input:
    match = None
    def matches(pattern):
        global match
        match = pattern.search(l)
        return match
    if matches(matchOK):
        print "matched ok:", l, match.start()
    elif matches(matchFailed):
        print "failed:", l, match.start()
    else:
        print "ignored:", l

Note that this will work if the loop is part of the top level of the code but not easily converted to a function - the variable matchshould still be true global at the top level.

-1
source

All Articles