Advertisement
Guest User

Untitled

a guest
Jul 28th, 2016
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.52 KB | None | 0 0
  1. None_None
  2.  
  3. ConfigHandler_56663624
  4. ConfigHandler_56663624
  5. ConfigHandler_56663624
  6. ConfigHandler_56663624
  7.  
  8. None_None
  9.  
  10. ColumnConverter_56963312
  11. ColumnConverter_56963312
  12.  
  13. PredicatesFactory_56963424
  14. PredicatesFactory_56963424
  15.  
  16. PredicateConverter_56963648
  17. PredicateConverter_56963648
  18.  
  19. ConfigHandler_80134888
  20. ConfigHandler_80134888
  21. ConfigHandler_80134888
  22. ConfigHandler_80134888
  23.  
  24. None_None
  25.  
  26. ConfigHandler_56663624
  27.  
  28. ColumnConverter_56963312
  29.  
  30. PredicatesFactory_56963424
  31.  
  32. PredicateConverter_56963648
  33.  
  34. ConfigHandler_80134888
  35.  
  36. from collections import OrderedDict
  37.  
  38. with open('/home/jon/testdata.txt') as fin:
  39. lines = (line.rstrip() for line in fin)
  40. unique_lines = OrderedDict.fromkeys( (line for line in lines if line) )
  41.  
  42. print unique_lines.keys()
  43. # ['None_None', 'ConfigHandler_56663624', 'ColumnConverter_56963312',PredicatesFactory_56963424', 'PredicateConverter_56963648', 'ConfigHandler_80134888']
  44.  
  45. lines = open('workfile.txt', 'r').readlines()
  46.  
  47. lines_set = set(lines)
  48.  
  49. out = open('workfile.txt', 'w')
  50.  
  51. for line in lines_set:
  52. out.write(line)
  53.  
  54. from pprint import pprint
  55.  
  56. with open('input.txt', 'r') as f:
  57. print pprint(set(f.readlines()))
  58.  
  59. hvn@lappy: /tmp () $ sort -nr dup | uniq
  60. PredicatesFactory_56963424
  61. PredicateConverter_56963648
  62. None_None
  63. ConfigHandler_80134888
  64. ConfigHandler_56663624
  65. ColumnConverter_56963312
  66.  
  67. In [2]: with open("dup", 'rt') as f:
  68. lines = f.readlines()
  69. ...:
  70.  
  71. In [3]: lines
  72. Out[3]:
  73. ['None_Nonen',
  74. 'n',
  75. 'ConfigHandler_56663624n',
  76. 'ConfigHandler_56663624n',
  77. 'ConfigHandler_56663624n',
  78. 'ConfigHandler_56663624n',
  79. 'n',
  80. 'None_Nonen',
  81. 'n',
  82. 'ColumnConverter_56963312n',
  83. 'ColumnConverter_56963312n',
  84. 'n',
  85. 'PredicatesFactory_56963424n',
  86. 'PredicatesFactory_56963424n',
  87. 'n',
  88. 'PredicateConverter_56963648n',
  89. 'PredicateConverter_56963648n',
  90. 'n',
  91. 'ConfigHandler_80134888n',
  92. 'ConfigHandler_80134888n',
  93. 'ConfigHandler_80134888n',
  94. 'ConfigHandler_80134888n']
  95.  
  96. In [4]: set(lines)
  97. Out[4]:
  98. set(['ColumnConverter_56963312n',
  99. 'n',
  100. 'PredicatesFactory_56963424n',
  101. 'ConfigHandler_56663624n',
  102. 'PredicateConverter_56963648n',
  103. 'ConfigHandler_80134888n',
  104. 'None_Nonen'])
  105.  
  106. import json
  107. myfile = json.load(open('yourfile', 'r'))
  108. uniq = set()
  109. for p in myfile:
  110. if p in uniq:
  111. print "duplicate : " + p
  112. del p
  113. else:
  114. uniq.add(p)
  115. print uniq
  116.  
  117. import uuid
  118.  
  119. def _remove_duplicates(filePath):
  120. lines=open(filePath, 'r').readlines()
  121. lines_set = set(lines)
  122. tmp_file=str(uuid.uuid4())
  123. out=open(tmp_file, 'w')
  124. for line in lines_set:
  125. out.write(line)
  126. os.rename(tmp_file,filePath)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement