Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import sys
- import re
- print 'Starting cleaning'
- # Open first file and add emails from file to array
- with open("Unregistered.txt", "r") as ins:
- array = []
- for line in ins:
- array.append(line.replace('\n','').replace('\r',''))
- # Debug Array for "\r" and "\n" characters
- # print "Array : \n" + str(array)
- unique = []
- duplicate = []
- # Open second file, chack for duplicates in first file and then save it to new file.
- with open("Firmy.txt","r") as input:
- with open("newfile.txt","wb") as output:
- for line in input:
- if line.replace('\r','').replace('\n','') in array :
- print "Duplicate: " + line
- duplicate.append(line.replace('\n',''))
- else :
- output.write(line)
- unique.append(line)
- # Print unique emails to screen
- unique.sort()
- print ''.join(unique)
- print "\nUnique E-Mails :" , len(unique);
- print "\nDuplicate E-Mails :" , len(duplicate);
- #print '\n' , str(duplicate).replace("\n","").replace('\r','')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement