Regex for comparing strings with Umlaut and non-Umlaut variants

Can someone help me with a javascript regex that I can use to compare strings that are the same, given their non-Umlaut-ed versions.

for example, in German the word Grüßecan also be spelled Gruesse. These two lines should be considered identical. Mappings (ignoring the shell at the moment):

  • ä = ae
  • ü = ue
  • ö = oe
  • ß = ss

Since there are not many “verses” to consider, I could make a replacement for each option, but I wonder if there is a more elegant way, especially since this use case may need to be expanded in the future to include, for example, Scandavian characters ...

+5
source share
7 answers

sort of

tr = {"ä":"ae", "ü":"ue", "ö":"oe", "ß":"ss" }

replaceUmlauts = function(s) {
    return s.replace(/[äöüß]/g, function($0) { return tr[$0] })
}

compare = function(a, b) {
    return replaceUmlauts(a) == replaceUmlauts(b)
}

alert(compare("grüße", "gruesse"))

you can easily expand this by adding more entries to the "tr"

not very elegant but works

+14
source

In addition to stereo answers :

tr = {"\u00e4":"ae", "\u00fc":"ue", "\u00f6":"oe", "\u00df":"ss" }

ersetzeUmlauts = function(s) {
    return s.replace(/[\u00e4|\u00fc|\u00f6|\u00df]/g, function($0) { return tr[$0] })
}

I was dealing with Umlauts in an Aptana / Eclipse script, and normal characters ('ä', etc.) did not help me.

+5
source

, , .

, , Unicode Normalization. , . "javascript", Javascript , . . , Normalizer Class PHP. , Python Perl , Microsoft.

wikipedia Unicode Equivalence.

+4

: (: )

function umlaut(str) {
 return str
  .replace(/Â|À|Å|Ã/g, "A")
  .replace(/â|à|å|ã/g, "a")
  .replace(/Ä/g, "AE")
  .replace(/ä/g, "ae")
  .replace(/Ç/g, "C")
  .replace(/ç/g, "c")
  .replace(/É|Ê|È|Ë/g, "E")
  .replace(/é|ê|è|ë/g, "e")
  .replace(/Ó|Ô|Ò|Õ|Ø/g, "O")
  .replace(/ó|ô|ò|õ/g, "o")
  .replace(/Ö/g, "OE")
  .replace(/ö/g, "oe")
  .replace(/Š/g, "S")
  .replace(/š/g, "s")
  .replace(/ß/g, "ss")
  .replace(/Ú|Û|Ù/g, "U")
  .replace(/ú|û|ù/g, "u")
  .replace(/Ü/g, "UE")
  .replace(/ü/g, "ue")
  .replace(/Ý|Ÿ/g, "Y")
  .replace(/ý|ÿ/g, "y")
  .replace(/Ž/g, "Z")
  .replace(/ž/, "z"); 
}
+3

Regex .

Umlaut -Umlaut ; , , .

+1

, , (ä|ae).

+1

- "regexp" , , , "ä" (ae | ä) ", . javascript (, document.write(), ), ;

regexp_match("Grüße|Gruesse",somestring)

- :

mappings = (("ä","ae"),("ö","oe"),("ü","ue"))
def my_regexp_match(regexp,input) {
    for key,value in mappings {
         new_regexp = regexp.replace(key,"("+key+"|"+value+")")
    }
    regexp_match(new_regexp,input)
}
my_regexp_match("Grüße",somestring)

, "pythonic" - , , re.compile() javascript, - for -loop , my_regexp_match ( )

+1

All Articles