You can create a regular trie and add border frames. then your complexity will be O (n), where n is the length of the pattern. You first need to replace the runs **with *in in the template (also operation O (n)).
If the list of words was , I’m a bull , then a trie would look something like this:
(I ($ [I])
a (m ($ [am])
n ($ [an])
? ($ [am an])
* ($ [am an]))
o (x ($ [ox])
? ($ [ox])
* ($ [ox]))
? ($ [I]
m ($ [am])
n ($ [an])
x ($ [ox])
? ($ [am an ox])
* ($ [I am an ox]
m ($ [am]) ...)
* ($ [I am an ox]
I ...
...
python:
import sys
def addWord(root, word):
add(root, word, word, '')
def add(root, word, tail, prev):
if tail == '':
addLeaf(root, word)
else:
head = tail[0]
tail2 = tail[1:]
add(addEdge(root, head), word, tail2, head)
add(addEdge(root, '?'), word, tail2, head)
if prev != '*':
for l in range(len(tail)+1):
add(addEdge(root, '*'), word, tail[l:], '*')
def addEdge(root, char):
if not root.has_key(char):
root[char] = {}
return root[char]
def addLeaf(root, word):
if not root.has_key('$'):
root['$'] = []
leaf = root['$']
if word not in leaf:
leaf.append(word)
def findWord(root, pattern):
prev = ''
for p in pattern:
if p == '*' and prev == '*':
continue
prev = p
if not root.has_key(p):
return []
root = root[p]
if not root.has_key('$'):
return []
return root['$']
def run():
print("Enter words, one per line terminate with a . on a line")
root = {}
while 1:
line = sys.stdin.readline()[:-1]
if line == '.': break
addWord(root, line)
print(repr(root))
print("Now enter search patterns. Do not use multiple sequential '*'s")
while 1:
line = sys.stdin.readline()[:-1]
if line == '.': break
print(findWord(root, line))
run()