I get a UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-51: ordinal not in range(128) attempting to use string.maketrans in Python . I seem to be discouraged by such an error in the following code ( gist ):
# -*- coding: utf-8 -*- import string def translit1(string): """ This function works just fine """ capital_letters = { u'': u'A', u'': u'B', u'': u'V', u'': u'G', u'': u'D', u'': u'E', u'': u'E', u'': u'Zh', u'': u'Z', u'': u'I', u'': u'Y', u'': u'K', u'': u'L', u'': u'M', u'': u'N', u'': u'O', u'': u'P', u'': u'R', u'': u'S', u'': u'T', u'': u'U', u'': u'F', u'': u'H', u'': u'Ts', u'': u'Ch', u'': u'Sh', u'': u'Sch', u'': u'', u'': u'Y', u'': u'', u'': u'E', u'': u'Yu', u'': u'Ya' } lower_case_letters = { u'': u'a', u'': u'b', u'': u'v', u'': u'g', u'': u'd', u'': u'e', u'': u'e', u'': u'zh', u'': u'z', u'': u'i', u'': u'y', u'': u'k', u'': u'l', u'': u'm', u'': u'n', u'': u'o', u'': u'p', u'': u'r', u'': u's', u'': u't', u'': u'u', u'': u'f', u'': u'h', u'': u'ts', u'': u'ch', u'': u'sh', u'': u'sch', u'': u'', u'': u'y', u'': u'', u'': u'e', u'': u'yu', u'': u'ya' } translit_string = "" for index, char in enumerate(string): if char in lower_case_letters.keys(): char = lower_case_letters[char] elif char in capital_letters.keys(): char = capital_letters[char] if len(string) > index+1: if string[index+1] not in lower_case_letters.keys(): char = char.upper() else: char = char.upper() translit_string += char return translit_string def translit2(text): """ This method should be more easy to grasp, but throws exception: UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-51: ordinal not in range(128) """ symbols = string.maketrans(u"", u"abvgdeezijklmnoprstufh'y'eABVGDEEZIJKLMNOPRSTUFH'Y'E") sequence = { u'':'zh', u'':'ts', u'':'ch', u'':'sh', u'':'sch', u'':'ju', u'':'ja', u'':'Zh', u'':'Ts', u'':'Ch' } for char in sequence.keys(): text = text.replace(char, sequence[char]) return text.translate(symbols) if __name__ == "__main__": print translit1(u"")
Source trace:
Traceback (most recent call last): File "translit_error.py", line 124, in <module> print translit2(u"")
I mean, why is Python string.maketrans trying to use ascii table anyway? And how do the letters of the English alphabet go out of the range 0-128?
$ python -c "print ord(u'A')" 65 $ python -c "print ord(u'z')" 122 $ python -c "print ord(u\"'\")" 39
After a few hours, I feel completely tired to solve this problem.
Can someone say what is happening and how to fix it?