I am trying to write a function that will look for str for substr, given the various possibilities of writing strange letters, such as æ, ø, å in Danish. For example, you can search for “Ålborg,” and the function will return true if there is, say, “Aalborg” on the page.
The function below works, but the performance is unbearable. What would you recommend to increase productivity?
def danish_tolerating_search(substr, str): '''Figure out if substr is in str, taking into account possible deviations in writing letters æ, ø, å. æ <-> ae a ea ø <-> oe o å <-> aa ao ''' # normalize input substr = substr.lower().replace('aa',u'å') str = str.lower() # normalized recursive search # TODO fix perfomance def s(substr, str): if str.find(substr) >= 0: return True if substr.find(u'æ') >= 0: if s(substr.replace(u'æ','ae', 1), str): return True elif s(substr.replace(u'æ', 'a', 1), str): return True elif s(substr.replace(u'æ','ea', 1), str): return True if str.find(u'æ') >= 0: if s(substr, str.replace(u'æ','ae', 1)): return True elif s(substr, str.replace(u'æ', 'a', 1)): return True elif s(substr, str.replace(u'æ','ea', 1)): return True if substr.find(u'ø') >= 0: if s(substr.replace(u'ø','oe', 1), str): return True elif s(substr.replace(u'ø', 'o', 1), str): return True if str.find(u'ø') >= 0: if s(substr, str.replace(u'ø','oe', 1)): return True elif s(substr, str.replace(u'ø', 'o', 1)): return True if substr.find(u'å') >= 0: if s(substr.replace(u'å','aa', 1), str): return True elif s(substr.replace(u'å', 'a', 1), str): return True elif s(substr.replace(u'å', 'o', 1), str): return True if str.find(u'å') >= 0: if s(substr, str.replace(u'å','aa', 1)): return True elif s(substr, str.replace(u'å', 'a', 1)): return True elif s(substr, str.replace(u'å', 'o', 1)): return True return False return s(substr, str)