Get URL when accessing urllib2.URLError

This is specific to urllib2, but it sets up exception handling in general. How to pass additional information to the calling function in another module through a raised exception? I guess I would re-raise using a special exception class, but I'm not sure about the technical details.

Instead of polluting the sample code with what I tried and failed, I simply present it as the main form. My ultimate goal is for the last line in the sample to work.

#mymod.py import urllib2 def openurl(): req = urllib2.Request("http://duznotexist.com/") response = urllib2.urlopen(req) #main.py import urllib2 import mymod try: mymod.openurl() except urllib2.URLError as e: #how do I do this? print "Website (%s) could not be reached due to %s" % (e.url, e.reason) 
+7
source share
2 answers

You can add information and then re-create the exception.

 #mymod.py import urllib2 def openurl(): req = urllib2.Request("http://duznotexist.com/") try: response = urllib2.urlopen(req) except urllib2.URLError as e: # add URL and reason to the exception object e.url = "http://duznotexist.com/" e.reason = "URL does not exist" raise e # re-raise the exception, so the calling function can catch it #main.py import urllib2 import mymod try: mymod.openurl() except urllib2.URLError as e: print "Website (%s) could not be reached due to %s" % (e.url, e.reason) 
+8
source

I do not think that re-education of an exception is an appropriate way to solve this problem.

As Jonathan Vanasco said,

if you open a.com, and 301 redirects to b.com, urlopen will automatically follow this because an HTTPError with a redirect has been raised. if b.com raises a URLError, the code above marks a.com as non-existent

My solution is to overwrite redirect_request from urllib2.HTTPRedirectHandler

 import urllib2 class NewHTTPRedirectHandler(urllib2.HTTPRedirectHandler): def redirect_request(self, req, fp, code, msg, headers, newurl): m = req.get_method() if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") or code in (301, 302, 303) and m == "POST"): newurl = newurl.replace(' ', '%20') newheaders = dict((k,v) for k,v in req.headers.items() if k.lower() not in ("content-length", "content-type") ) # reuse the req object # mind that req will be changed if redirection happends req.__init__(newurl, headers=newheaders, origin_req_host=req.get_origin_req_host(), unverifiable=True) return req else: raise HTTPError(req.get_full_url(), code, msg, headers, fp) opener = urllib2.build_opener(NewHTTPRedirectHandler) urllib2.install_opener(opener) # mind that req will be changed if redirection happends #req = urllib2.Request('http://127.0.0.1:5000') req = urllib2.Request('http://www.google.com/') try: response = urllib2.urlopen(req) except urllib2.URLError as e: print 'error' print req.get_full_url() else: print 'normal' print response.geturl() 

try redirecting the url to an unknown url:

 import os from flask import Flask,redirect app = Flask(__name__) @app.route('/') def hello(): # return 'hello world' return redirect("http://a.com", code=302) if __name__ == '__main__': port = int(os.environ.get('PORT', 5000)) app.run(host='0.0.0.0', port=port) 

And the result:

 error http://a.com/ normal http://www.google.com/ 
0
source

All Articles