Here is your approach to your problem. You will need to make your own code to convert the http src attribute.
from pyparsing import * import urllib2 imgtag = makeHTMLTags("img")[0] page = urllib2.urlopen("http://www.yahoo.com") html = page.read() page.close() # print html def modifySrcRef(tokens): ret = "<img" for k,i in tokens.items(): if k in ("startImg","empty"): continue if k.lower() == "src": # or do whatever with this i = i.upper() ret += ' %s="%s"' % (k,i) return ret + " />" imgtag.setParseAction(modifySrcRef) print imgtag.transformString(html)
Tags are converted to:
<img src="HTTP://L.YIMG.COM/A/I/WW/BETA/Y3.GIF" title="Yahoo" height="44" width="232" alt="Yahoo!" /> <a href="r/xy"><img src="HTTP://L.YIMG.COM/A/I/WW/TBL/ALLYS.GIF" height="20" width="138" alt="All Yahoo! Services" border="0" /></a>
source share