Python compare strings ignore special characters

I want to compare two lines so that the comparison ignores differences in special characters. I.e

Hai, this is a test

Must match

Hi! this is a test "or" Hai this is a test

Is there any way to do this without changing the source lines?

+2
source share
7 answers

This removes punctuation and spaces before doing the comparison:

In [32]: import string

In [33]: def compare(s1, s2):
    ...:     remove = string.punctuation + string.whitespace
    ...:     return s1.translate(None, remove) == s2.translate(None, remove)

In [34]: compare('Hai, this is a test', 'Hai ! this is a test')
Out[34]: True
+13
source
>>> def cmp(a, b):
...     return [c for c in a if c.isalpha()] == [c for c in b if c.isalpha()]
... 
>>> cmp('Hai, this is a test', 'Hai ! this is a test')
True
>>> cmp('Hai, this is a test', 'Hai this is a test')
True
>>> cmp('Hai, this is a test', 'other string')
False

This creates two temporary lists, but does not change the source lines in any way.

+6
source

, , , :

import re
def equal(a, b):
    # Ignore non-space and non-word characters
    regex = re.compile(r'[^\s\w]')
    return regex.sub('', a) == regex.sub('', b)

>>> equal('Hai, this is a test', 'Hai this is a test')
True
>>> equal('Hai, this is a test', 'Hai this@#)($! i@#($()@#s a test!!!')
True
0

, , .

: ',', '!' .

:

a='Hai, this is a test'
b='Hai ! this is a test'
tempa=a.translate(None,',! ')
tempb=b.translate(None,',! ')

tempa tempb.

0

Levenshtein . . n .

0

, , .

>>> import string
>>> first = "Hai, this is a test"
>>> second = "Hai ! this is a test"
>>> third = "Hai this is a test"
>>> def my_match(left, right):
    i, j = 0, 0
    ignored = set(string.punctuation + string.whitespace)
    while i < len(left) and j < len(right):
        if left[i] in ignored:
            i += 1
        elif right[j] in ignored:
            j += 1
        elif left[i] != right[j]:
            return False
        else:
            i += 1
            j += 1
    if i != len(left) or j != len(right):
        return False
    return True

>>> my_match(first, second)
True
>>> my_match(first, third)
True
>>> my_match("test", "testing")
False
0

,

def samealphabetic(*args):
    return len(set(filter(lambda s: s.isalpha(), arg) for arg in args)) <= 1

print samealphabetic('Hai, this is a test',
                     'Hai ! this is a test',
                     'Hai this is a test')

True. <= , .

0

All Articles