I use them a lot when I implement scanners (tokenizers) or when I iterate over data containers.
: -, ++:
whitespace = ' \t\r\n'
operators = '~!%^&*()-+=[]{};:\'"/?.,<>\\|'
def scan(s):
"returns a token and a state/token id"
words = {0:'', 1:'', 2:''}
state = 2
for c in s:
if c in operators:
if state != 1:
yield (words[state], state)
words[state] = ''
state = 1
words[state] += c
elif c in whitespace:
if state != 2:
yield (words[state], state)
words[state] = ''
state = 2
words[state] += c
else:
if state != 0:
yield (words[state], state)
words[state] = ''
state = 0
words[state] += c
yield (words[state], state)
:
>>> it = scan('foo(); i++')
>>> it.next()
('', 2)
>>> it.next()
('foo', 0)
>>> it.next()
('();', 1)
>>> it.next()
(' ', 2)
>>> it.next()
('i', 0)
>>> it.next()
('++', 1)
>>>