Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Entry:
- def __init__(self):
- self.dupes = []
- def add(self, l):
- self.dupes.append(l.split(" "))
- def _parse(self, s):
- s = s.split(' ')[0] # remove the ' MB' suffix
- s = s.replace(',', '.') # change , into . for float(...) parsing
- try:
- return float(s)
- except:
- return 0.0
- def size(self):
- s = self.dupes[0][2] # 3rd field of the first 'line'
- return self._parse(s)
- def name(self):
- return self.dupes[0][0]
- def count(self):
- return len(self.dupes)
- def __repr__(self):
- n = self.name()
- c = self.count()
- s = self.size()
- return '%s : %d * %d MB' % (n, c, s)
- class Entry2(Entry):
- def add(self, l):
- fs = l.split(" ")
- self.n = fs[0]
- self.s = self._parse(fs[2])
- path = fs[1]
- date = fs[3]
- self.dupes.append((path, date))
- def name(self):
- return self.n
- def size(self):
- return self.s
- def remap(lines, kind=Entry2):
- e = None
- for l in lines:
- if l.startswith('--------------'):
- yield e
- e = kind()
- continue
- else:
- e.add(l)
- def main(f):
- with open(f, "r") as i:
- ls = i.readlines()
- yield from remap(ls)
- F = "c:/Users/noob/Desktop/duplicate_marion.txt"
- es = main(F)
- total = sum(e.size() for e in es if e)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement