Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- None_None
- ConfigHandler_56663624
- ConfigHandler_56663624
- ConfigHandler_56663624
- ConfigHandler_56663624
- None_None
- ColumnConverter_56963312
- ColumnConverter_56963312
- PredicatesFactory_56963424
- PredicatesFactory_56963424
- PredicateConverter_56963648
- PredicateConverter_56963648
- ConfigHandler_80134888
- ConfigHandler_80134888
- ConfigHandler_80134888
- ConfigHandler_80134888
- None_None
- ConfigHandler_56663624
- ColumnConverter_56963312
- PredicatesFactory_56963424
- PredicateConverter_56963648
- ConfigHandler_80134888
- from collections import OrderedDict
- with open('/home/jon/testdata.txt') as fin:
- lines = (line.rstrip() for line in fin)
- unique_lines = OrderedDict.fromkeys( (line for line in lines if line) )
- print unique_lines.keys()
- # ['None_None', 'ConfigHandler_56663624', 'ColumnConverter_56963312',PredicatesFactory_56963424', 'PredicateConverter_56963648', 'ConfigHandler_80134888']
- lines = open('workfile.txt', 'r').readlines()
- lines_set = set(lines)
- out = open('workfile.txt', 'w')
- for line in lines_set:
- out.write(line)
- from pprint import pprint
- with open('input.txt', 'r') as f:
- print pprint(set(f.readlines()))
- hvn@lappy: /tmp () $ sort -nr dup | uniq
- PredicatesFactory_56963424
- PredicateConverter_56963648
- None_None
- ConfigHandler_80134888
- ConfigHandler_56663624
- ColumnConverter_56963312
- In [2]: with open("dup", 'rt') as f:
- lines = f.readlines()
- ...:
- In [3]: lines
- Out[3]:
- ['None_Nonen',
- 'n',
- 'ConfigHandler_56663624n',
- 'ConfigHandler_56663624n',
- 'ConfigHandler_56663624n',
- 'ConfigHandler_56663624n',
- 'n',
- 'None_Nonen',
- 'n',
- 'ColumnConverter_56963312n',
- 'ColumnConverter_56963312n',
- 'n',
- 'PredicatesFactory_56963424n',
- 'PredicatesFactory_56963424n',
- 'n',
- 'PredicateConverter_56963648n',
- 'PredicateConverter_56963648n',
- 'n',
- 'ConfigHandler_80134888n',
- 'ConfigHandler_80134888n',
- 'ConfigHandler_80134888n',
- 'ConfigHandler_80134888n']
- In [4]: set(lines)
- Out[4]:
- set(['ColumnConverter_56963312n',
- 'n',
- 'PredicatesFactory_56963424n',
- 'ConfigHandler_56663624n',
- 'PredicateConverter_56963648n',
- 'ConfigHandler_80134888n',
- 'None_Nonen'])
- import json
- myfile = json.load(open('yourfile', 'r'))
- uniq = set()
- for p in myfile:
- if p in uniq:
- print "duplicate : " + p
- del p
- else:
- uniq.add(p)
- print uniq
- import uuid
- def _remove_duplicates(filePath):
- lines=open(filePath, 'r').readlines()
- lines_set = set(lines)
- tmp_file=str(uuid.uuid4())
- out=open(tmp_file, 'w')
- for line in lines_set:
- out.write(line)
- os.rename(tmp_file,filePath)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement