All you need to do is use the python ftplib module. Since os.walk() based on a search algorithm in Breadth mode, you need to find directories and file names at each iteration, and then continue recursive movements from the first directory. I implemented this algorithm about 2 years ago for use as FTPwalker , which is the optimal package for moving extremely large directory trees through FTP.
from os import path as ospath class FTPWalk: """ This class is contain corresponding functions for traversing the FTP servers using BFS algorithm. """ def __init__(self, connection): self.connection = connection def listdir(self, _path): """ return files and directory names within a path (directory) """ file_list, dirs, nondirs = [], [], [] try: self.connection.cwd(_path) except Exception as exp: print ("the current path is : ", self.connection.pwd(), exp.__str__(),_path) return [], [] else: self.connection.retrlines('LIST', lambda x: file_list.append(x.split())) for info in file_list: ls_type, name = info[0], info[-1] if ls_type.startswith('d'): dirs.append(name) else: nondirs.append(name) return dirs, nondirs def walk(self, path='/'): """ Walk through FTP server directory tree, based on a BFS algorithm. """ dirs, nondirs = self.listdir(path) yield path, dirs, nondirs for name in dirs: path = ospath.join(path, name) yield from self.walk(path)
Now to use this class, you can simply create a connection object using the ftplib module and pass the object to the FTPWalk object and just loop over the walk() function:
In [2]: from test import FTPWalk In [3]: import ftplib In [4]: connection = ftplib.FTP("ftp.uniprot.org") In [5]: connection.login() Out[5]: '230 Login successful.' In [6]: ftpwalk = FTPWalk(connection) In [7]: for i in ftpwalk.walk(): print(i) ...: ('/', ['pub'], []) ('/pub', ['databases'], ['robots.txt']) ('/pub/databases', ['uniprot'], []) ('/pub/databases/uniprot', ['current_release', 'previous_releases'], ['LICENSE', 'current_release/README', 'current_release/knowledgebase/complete', 'previous_releases/', 'current_release/relnotes.txt', 'current_release/uniref']) ('/pub/databases/uniprot/current_release', ['decoy', 'knowledgebase', 'rdf', 'uniparc', 'uniref'], ['README', 'RELEASE.metalink', 'changes.html', 'news.html', 'relnotes.txt']) ... ... ...