Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # this code list an huge amount of files (~600 files) and the files are
- # compressed postgres dump files, from 2 databases. The file patterns are:
- # 20140510-0200_calipso_teste.sql.xz
- # 20140608-0300_calipso_sistema.sql.xz
- # where the prefix 20140608-0300 means 08/06/2014 and 3:00 am, both has the same
- # sufix are .sql.xz
- # the proposal here are verify which files are equal (have to test binary and filecmp.cmp can do it)
- # and get the most new from them sometimes those databases don't change on whole day.
- # It have to crate a list of files to delete.
- from os import listdir
- from os.path import isfile, join, getctime, getsize
- import filecmp
- mypath = '.'
- sufix = '.sql.xz'
- sufix_len = len(sufix)
- prefix_len = 14
- onlyfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
- compares = {}
- for v in onlyfiles:
- for e in onlyfiles:
- if e == v or e in compares.get(v, []) or v in compares.get(e, []) or (e[prefix_len:] != v[prefix_len:]) or (v[-sufix_len:] != sufix or e[-sufix_len:] != sufix):
- #print "Ignore %s with %s" % (v, e)
- continue
- print "compare %s with %s" % (v, e)
- if filecmp.cmp(v, e):
- if v in compares:
- compares[v].add(e)
- elif e in compares:
- compares[e].add(v)
- else:
- compares[v] = set()
- compares[v].add(e)
- print "all:"
- from pprint import pprint
- pprint(compares)
Advertisement
Add Comment
Please, Sign In to add comment