This is what I use, and it has worked so far. You can get urlnorm from pip.
Please note that I am sorting request parameters. I found this significant.
from urlparse import urlsplit, urlunsplit, parse_qsl from urllib import urlencode import urlnorm def canonizeurl(url): split = urlsplit(urlnorm.norm(url)) path = split[2].split(' ')[0] while path.startswith('/..'): path = path[3:] while path.endswith('%20'): path = path[:-3] qs = urlencode(sorted(parse_qsl(split.query))) return urlunsplit((split.scheme, split.netloc, path, qs, ''))
stuckintheshuck Mar 26 '13 at 4:56 2013-03-26 04:56
source share