#! /usr/bin/python import os, string, sys # Contains an entry for each relative filename encountered in multiple # directory structures, with a list of directories in which it occurs. class DirComparer : def __init__( self ) : self.dirs = [] self.dirnum = -1 self.contents = {} def startNewDirectory( self, path ) : self.dirs.append( path ) self.dirnum = self.dirnum + 1 def addPath( self, fullpath ) : relpath = fullpath[ len(self.dirs[self.dirnum]): ] dirname = self.dirs[self.dirnum] if ( relpath[0] == os.sep ) : relpath = relpath[1:] if self.contents.has_key( relpath ) : self.contents[relpath].setInDir( dirname ) else : file = File( relpath ) file.setInDir( dirname ) self.contents[relpath] = file def getFiles( self ) : return self.contents class File : def __init__( self, relpath ) : self.relpath = relpath self.dirs = [] def getPath( self ) : return self.relpath def setInDir( self, dirname ) : self.dirs.append( dirname ) def isInDir( self, dirname ) : if dirname in self.dirs : return 1 else : return 0 def addPath( comparer, dirname, names ) : for name in names : fullpath = os.path.join( dirname, name ) comparer.addPath( fullpath ) def displayComparison( dir1, dir2, comparer ) : dir1abs = os.path.abspath( dir1 ) dir2abs = os.path.abspath( dir2 ) dir1only = [] dir1newer = [] dir2only = [] dir2newer = [] inbothdirs = [] bothdirssame = [] files = comparer.getFiles() keys = files.keys() keys.sort() for key in keys : fileobj = files[key] if fileobj.isInDir( dir1 ) : if fileobj.isInDir( dir2 ) : inbothdirs.append( fileobj ) else : dir1only.append( fileobj ) elif fileobj.isInDir( dir2 ) : dir2only.append( fileobj ) print print "***************************************************" print "Comparing: " print dir1abs print dir2abs print "***************************************************" print for dir in [ [dir1abs, dir1only], [dir2abs, dir2only] ] : # for dir in [ [dir1, dir1only] ] : print "Files in \"%s\" only: \n" % (dir[0]) if dir[1] == [] : print " (none)" for fileobj in dir[1] : print " " + fileobj.getPath() print for f in inbothdirs : relpath = f.getPath() fullpath1 = os.path.join( dir1, relpath ) fullpath2 = os.path.join( dir2, relpath ) modtime1 = os.path.getmtime( fullpath1 ) modtime2 = os.path.getmtime( fullpath2 ) if modtime1 > modtime2 : dir1newer.append( f ) elif modtime2 > modtime1 : dir2newer.append( f ) else : bothdirssame.append( f ) for dir in [ [dir1abs, dir1newer], [dir2abs, dir2newer] ] : print "File in \"%s\" is newer: \n" % (dir[0]) if dir[1] == [] : print " (none)" for fileobj in dir[1] : print " " + fileobj.getPath() print print "Summary:\n" for dir in [ [dir1abs, dir1only], [dir2abs, dir2only] ] : print " \"%s\" contains %d unique files." % (dir[0], len(dir[1])) for dir in [ [dir1abs, dir1newer], [dir2abs, dir2newer] ] : print " \"%s\" contains %d newer files." % (dir[0], len(dir[1])) print " Files with same modification time in both directories numbered %d." % (len(bothdirssame)) print if __name__ == '__main__' : comparer = DirComparer() try : dirname1 = sys.argv[1] dirname2 = sys.argv[2] except : print "\n Usage:\n compare-dirs.py dir1 dir2\n" print print """\ Compares the contents of two different directories, telling you which files are uniquely in only one of the two directories and which files newer in one of the two directories.\n""" sys.exit(1) for dir in (dirname1, dirname2) : if (not os.path.exists(dir)) or (not os.path.isdir(dir)): print "\n ERROR: Path \"%s\" is not a directory or does not exist.\n" % (dir) sys.exit(1) ################################# # Here's the os.path.walk part. # ################################# comparer.startNewDirectory( dirname1 ) os.path.walk( dirname1, addPath, comparer ) comparer.startNewDirectory( dirname2 ) os.path.walk( dirname2, addPath, comparer ) displayComparison( dirname1, dirname2, comparer )