Python: how to download a zip file

I am trying to download a zip file using this code:

o = urllib2.build_opener( urllib2.HTTPCookieProcessor() ) #login p = urllib.urlencode( { usernameField: usernameVal, passField: passVal } ) f = o.open(authUrl, p ) data = f.read() print data f.close() #download file f = o.open(remoteFileUrl) localFile = open(localFile, "wb") localFile.write(f.read()) f.close() 

I get some binary data, but the size of the file I uploaded is too small and is not a valid zip file. Am I not loading the zip file correctly? The HTTP response header for f = o.open(remoteFileUrl) shown below. I don't know if special handling is needed to handle this answer:

HTTP / 1.1 200 OK Server:
Apache-Coyote / 1.1 Pragma: Private
Cache-Control: required-revalidate
Expires: Tue, Dec 31 1997 23:59:59 GMT
Content-Disposition: inline,
file name = "files.zip";
Content-Type: application / zip
Transmission Encoding: chunked

+6
python
source share
4 answers

f.read() does not necessarily read the entire file, but simply its package (it can be the whole file if it is small, but not for a large file).

You need to loop over such packages:

 while 1: packet = f.read() if not packet: break localFile.write(packet) f.close() 

f.read() returns an empty package, meaning that you have read the entire file.

+10
source share

If you do not mind reading the entire zip file in memory, the fastest way to read and write it is:

 data = f.readlines() with open(localFile,'wb') as output: output.writelines(data) 

Otherwise, to read and write in pieces as they are received over the network, do

 with open(localFile, "wb") as output: chunk = f.read() while chunk: output.write(chunk) chunk = f.read() 

This is a little less neat, but does not allow you to immediately save the entire file in memory. Hope this helps.

+1
source share

Here is a more robust solution using urllib2 to upload a file to chunks and print download status

 import os import urllib2 import math def downloadChunks(url): """Helper to download large files the only arg is a url this file will go to a temp directory the file will also be downloaded in chunks and print out how much remains """ baseFile = os.path.basename(url) #move the file to a more uniq path os.umask(0002) temp_path = "/tmp/" try: file = os.path.join(temp_path,baseFile) req = urllib2.urlopen(url) total_size = int(req.info().getheader('Content-Length').strip()) downloaded = 0 CHUNK = 256 * 10240 with open(file, 'wb') as fp: while True: chunk = req.read(CHUNK) downloaded += len(chunk) print math.floor( (downloaded / total_size) * 100 ) if not chunk: break fp.write(chunk) except urllib2.HTTPError, e: print "HTTP Error:",e.code , url return False except urllib2.URLError, e: print "URL Error:",e.reason , url return False return file 
+1
source share

Try the following:

 #download file f = o.open(remoteFileUrl) response = "" while 1: data = f.read() if not data: break response += data with open(localFile, "wb") as local_file: local_file.write(response) 
0
source share

All Articles