SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/env python3 | |
2 | ||
3 | # dedup.py | |
4 | # Removes duplicates from Bitwarden export .csv | |
5 | # 2019-02-09 5erif | |
6 | ||
7 | import sys | |
8 | import hashlib | |
9 | from urllib.parse import urlparse | |
10 | ||
11 | # Field ordinals in Bitwarden CSV | |
12 | FOLDER = 0 | |
13 | FAVORITE = 1 | |
14 | TYPE = 2 | |
15 | NAME = 3 | |
16 | NOTES = 4 | |
17 | FIELDS = 5 | |
18 | URI = 6 | |
19 | USERNAME = 7 | |
20 | PASSWORD = 8 | |
21 | TOTP = 9 | |
22 | ||
23 | def main(argv): | |
24 | ||
25 | if len(argv) < 1: | |
26 | print('Missing input file path') | |
27 | sys.exit(1) | |
28 | ||
29 | in_file_path = argv[0] | |
30 | out_file_path = in_file_path[0:(len(in_file_path)-4)]+'_out.csv' | |
31 | rem_file_path = in_file_path[0:(len(in_file_path)-4)]+'_rem.csv' | |
32 | completed_lines_hash = set() | |
33 | line_number = -1 | |
34 | write_count = 0 | |
35 | cache = '' | |
36 | ||
37 | out_file = open(out_file_path, 'w', encoding = 'utf8') | |
38 | rem_file = open(rem_file_path, 'w', encoding = 'utf8') | |
39 | for line in open(in_file_path, 'r', encoding = 'utf8'): | |
40 | line_number += 1 | |
41 | fields = line.split(',') | |
42 | if len(fields) < 10: | |
43 | # Add previous line if short | |
44 | line = cache.strip('\n') + line | |
45 | cache = line | |
46 | fields = line.split(',') | |
47 | if len(fields) > 9: | |
48 | print(f'Recovered with line {line_number}:\n{line}') | |
49 | cache = '' | |
50 | else: | |
51 | print(f'Missing fields in line {line_number}:\n{line}') | |
52 | rem_file.write(line) | |
53 | continue | |
54 | else: | |
55 | cache = '' | |
56 | if line_number != 0: | |
57 | domain = urlparse(fields[URI]).netloc | |
58 | if len(domain) > 0: | |
59 | fields[URI] = domain | |
60 | token = fields[URI] + fields[USERNAME] + fields[PASSWORD] | |
61 | hashValue = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest() | |
62 | if hashValue not in completed_lines_hash: | |
63 | out_file.write(line) | |
64 | completed_lines_hash.add(hashValue) | |
65 | write_count += 1 | |
66 | else: | |
67 | rem_file.write(line) | |
68 | # Uncomment for verbose mode | |
69 | # print(f'Skipping duplicate on line {line_number}:\n{line}') | |
70 | out_file.close() | |
71 | rem_file.close() | |
72 | ||
73 | dup_count = line_number - write_count | |
74 | print(f'\nOutput file: {out_file_path}\n{write_count} unique entries saved') | |
75 | print(f'\n{dup_count} duplicates saved to {rem_file_path}') | |
76 | ||
77 | if __name__ == "__main__": | |
78 | main(sys.argv[1:]) |