use hashlib to get the md5 of each file and compare the results.
import hashlib
def filemd5(filename, block_size=2**20):
f = open(filename)
md5 = hashlib.md5()
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
f.close()
return md5.digest()
if __name__ == "__main__":
a = filemd5('/home/neo/todo')
b = filemd5('/home/neo/todo2')
print(a == b)
Update: In Python 2.1, there is a filecmp module that does exactly what you want, and has methods for comparing directories too. I never knew about this module, I am still learning Python itself :-)
>>> import filecmp
>>> filecmp.cmp('undoc.rst', 'undoc.rst')
True
>>> filecmp.cmp('undoc.rst', 'index.rst')
False
source
share