Advertisement
Typhoon

E-Mail - Check 2 Files for duplicates

Apr 3rd, 2015
272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.07 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4. import re
  5.  
  6. print 'Starting cleaning'
  7. # Open first file and add emails from file to array
  8. with open("Unregistered.txt", "r") as ins:
  9.     array = []
  10.     for line in ins:
  11.         array.append(line.replace('\n','').replace('\r',''))
  12.  
  13. # Debug Array for "\r" and "\n" characters
  14. # print "Array : \n" + str(array)
  15.  
  16. unique = []
  17. duplicate = []
  18. # Open second file, chack for duplicates in first file and then save it to new file.
  19. with open("Firmy.txt","r") as input:
  20.     with open("newfile.txt","wb") as output:
  21.         for line in input:
  22.             if line.replace('\r','').replace('\n','') in array :
  23.                 print "Duplicate: " + line
  24.                 duplicate.append(line.replace('\n',''))
  25.             else :
  26.                 output.write(line)
  27.                 unique.append(line)
  28.  
  29. # Print unique emails to screen
  30. unique.sort()
  31.  
  32. print ''.join(unique)
  33. print "\nUnique E-Mails :" , len(unique);
  34. print "\nDuplicate E-Mails :" , len(duplicate);
  35. #print '\n' , str(duplicate).replace("\n","").replace('\r','')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement