Python render unicode in html

Question

Python render unicode in html

I am writing a script to export my links and their headers from chrome to html.
Chrome bookmarks stored as json in utf encoding
Some names are in Russian, so they are stored like this:
"name": "\ u0425 \ u0430 \ u0431 \ u0440 \ ..."

import codecs
f = codecs.open("chrome.json","r", "utf-8")
data = f.readlines()

urls = [] # for links
names = [] # for link titles

ind = 0

for i in data:
    if i.find('"url":') != -1:
        urls.append(i.split('"')[3])
        names.append(data[ind-2].split('"')[3])
    ind += 1

fw = codecs.open("chrome.html","w","utf-8")
fw.write("<html><body>\n")
for n in names:
    fw.write(n + '<br>')
    # print type(n) # this will return <type 'unicode'> for each url!
fw.write("</body></html>")

Now, in chrome.html, I got the ones that display as \ u0425 \ u0430 \ u0431 ...
How can I return them to Russian?
using python 2.5

Edit: Solved!

s = '\u041f\u0440\u0438\u0432\u0435\u0442 world!'
type(s)
<type 'str'>

print s.decode('raw-unicode-escape').encode('utf-8')
 world!

What I need to convert str from \ u041f ... to unicode .

f = open("chrome.json", "r")
data = f.readlines()
f.close()

urls = [] # for links
names = [] # for link titles

ind = 0

for i in data:
    if i.find('"url":') != -1:
        urls.append(i.split('"')[3])
        names.append(data[ind-2].split('"')[3])
    ind += 1

fw = open("chrome.html","w")
fw.write("<html><body>\n")
for n in names:
    fw.write(n.decode('raw-unicode-escape').encode('utf-8') + '<br>')
fw.write("</body></html>")

+5

json python unicode

vladimirze Feb 26 '11 at 15:46

source share

4 answers

JSON, JSON. Unicode , . ( ), JSON Python.

( , \u, , , JSON, .)

import json, cgi, codecs

with open('chrome.json') as fp:
    bookmarks= json.load(fp)

with codecs.open('chrome.html', 'w', 'utf-8') as fp:
    fp.write(u'<html><body>\n')
    for root in bookmarks[u'roots'].values():
        for child in root['children']:
            fp.write(u'<a href="%s">%s</a>' % (
                cgi.escape(child[u'url']),
                cgi.escape(child[u'name'])
            ))
    fp.write(u'</body></html>')

cgi.escape HTML- < & .

+1

bobince 27 . '11 15:09

, , , :

s = '\u0425\u0430\u0431'
l = s.split('\u')
l.remove('')
for x in l:
    print(unichr(int(x, 16))),

:

html, '\u0425...', .

, .

0

mmoore 26 . '11 16:29

utf-8, utf-8, ascii:

fw = codecs.open("chrome.html","w","utf-8")
fw.write(codecs.BOM_UTF8.decode('utf-8'))
fw.write(u'你好')

, fw python, 'utf-8-sig', .

You may need to encode unicode in utf-8, but I think the codecs are already doing it right:

0

wisty Feb 26 '11 at 16:42

source share

John Machin · Accepted Answer · 2011-02-26T22:59:38+0000

, ; , ASCII, . :

name=u'Python Programming Language \u2013 Official Website'
url=u'http://www.python.org/'

,

urls.append(i.split('"')[3])
names.append(data[ind-2].split('"')[3])
# (1) relies on name being 2 lines before url
# (2) fails if there is a `"` in the name
# example: "name": "The \"Fubar\" website",

json. Python 2.5 simplejson.

script, :

try:
    import json
except ImportError: 
    import simplejson as json
import sys

def convert_file(infname, outfname):

    def explore(folder_name, folder_info):
        for child_dict in folder_info['children']:
            ctype = child_dict.get('type')
            name = child_dict.get('name')
            if ctype == 'url':
                url = child_dict.get('url')
                # print "name=%r url=%r" % (name, url)
                fw.write(name.encode('utf-8') + '<br>\n')
            elif ctype == 'folder':
                explore(name, child_dict)
            else:
                print "*** Unexpected ctype=%r ***" % ctype

    f = open(infname, 'rb')
    bmarks = json.load(f)
    f.close()
    fw = open(outfname, 'w')
    fw.write("<html><body>\n")
    for folder_name, folder_info in bmarks['roots'].iteritems():
        explore(folder_name, folder_info)
    fw.write("</body></html>")
    fw.close()    

if __name__ == "__main__":
    convert_file(sys.argv[1], sys.argv[2])

Python 2.5.4 Windows 7 Pro.

Python render unicode in html

** Edit: Solved! **

More articles:

Edit: Solved!