Python render unicode in html

I am writing a script to export my links and their headers from chrome to html.
Chrome bookmarks stored as json in utf encoding
Some names are in Russian, so they are stored like this:
"name": "\ u0425 \ u0430 \ u0431 \ u0440 \ ..."

import codecs
f = codecs.open("chrome.json","r", "utf-8")
data = f.readlines()

urls = [] # for links
names = [] # for link titles

ind = 0

for i in data:
    if i.find('"url":') != -1:
        urls.append(i.split('"')[3])
        names.append(data[ind-2].split('"')[3])
    ind += 1

fw = codecs.open("chrome.html","w","utf-8")
fw.write("<html><body>\n")
for n in names:
    fw.write(n + '<br>')
    # print type(n) # this will return <type 'unicode'> for each url!
fw.write("</body></html>")

Now, in chrome.html, I got the ones that display as \ u0425 \ u0430 \ u0431 ...
How can I return them to Russian?
using python 2.5

** Edit: Solved! **

s = '\u041f\u0440\u0438\u0432\u0435\u0442 world!'
type(s)
<type 'str'>

print s.decode('raw-unicode-escape').encode('utf-8')
 world!

What I need to convert str from \ u041f ... to unicode .

f = open("chrome.json", "r")
data = f.readlines()
f.close()

urls = [] # for links
names = [] # for link titles

ind = 0

for i in data:
    if i.find('"url":') != -1:
        urls.append(i.split('"')[3])
        names.append(data[ind-2].split('"')[3])
    ind += 1

fw = open("chrome.html","w")
fw.write("<html><body>\n")
for n in names:
    fw.write(n.decode('raw-unicode-escape').encode('utf-8') + '<br>')
fw.write("</body></html>")
+5
source share
4 answers

, ; , ASCII, . :

name=u'Python Programming Language \u2013 Official Website'
url=u'http://www.python.org/'

,

urls.append(i.split('"')[3])
names.append(data[ind-2].split('"')[3])
# (1) relies on name being 2 lines before url
# (2) fails if there is a `"` in the name
# example: "name": "The \"Fubar\" website",

json. Python 2.5 simplejson.

script, :

try:
    import json
except ImportError: 
    import simplejson as json
import sys

def convert_file(infname, outfname):

    def explore(folder_name, folder_info):
        for child_dict in folder_info['children']:
            ctype = child_dict.get('type')
            name = child_dict.get('name')
            if ctype == 'url':
                url = child_dict.get('url')
                # print "name=%r url=%r" % (name, url)
                fw.write(name.encode('utf-8') + '<br>\n')
            elif ctype == 'folder':
                explore(name, child_dict)
            else:
                print "*** Unexpected ctype=%r ***" % ctype

    f = open(infname, 'rb')
    bmarks = json.load(f)
    f.close()
    fw = open(outfname, 'w')
    fw.write("<html><body>\n")
    for folder_name, folder_info in bmarks['roots'].iteritems():
        explore(folder_name, folder_info)
    fw.write("</body></html>")
    fw.close()    

if __name__ == "__main__":
    convert_file(sys.argv[1], sys.argv[2])

Python 2.5.4 Windows 7 Pro.

+1

JSON, JSON. Unicode , . ( ), JSON Python.

( , \u, , , JSON, .)

import json, cgi, codecs

with open('chrome.json') as fp:
    bookmarks= json.load(fp)

with codecs.open('chrome.html', 'w', 'utf-8') as fp:
    fp.write(u'<html><body>\n')
    for root in bookmarks[u'roots'].values():
        for child in root['children']:
            fp.write(u'<a href="%s">%s</a>' % (
                cgi.escape(child[u'url']),
                cgi.escape(child[u'name'])
            ))
    fp.write(u'</body></html>')

cgi.escape HTML- < & .

+1

, , , :

s = '\u0425\u0430\u0431'
l = s.split('\u')
l.remove('')
for x in l:
    print(unichr(int(x, 16))),

:

  

html, '\u0425...', .

, .

0

utf-8, utf-8, ascii:

fw = codecs.open("chrome.html","w","utf-8")
fw.write(codecs.BOM_UTF8.decode('utf-8'))
fw.write(u'你好')

, fw python, 'utf-8-sig', .

You may need to encode unicode in utf-8, but I think the codecs are already doing it right:

0
source

All Articles