Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import sys
- import os
- from time import strftime, gmtime, time
- import re
- from hashlib import sha1
- help = '''
- Simple EMM sorter by Mr.Blue (set for TNK/TNK HD)
- ------------------------
- Program needs file with logs (EMMs) in hexadecimal (tekst) format.
- One EMM must be in one line.
- Program removes repeated EMMs.
- Example of usage:
- pyton slog filename.log
- or
- ./slog filename.log
- As output it creates 1 file:
- emm_tnk_filename.log
- The general file with logs stay untouched.
- '''
- #8270 B4 000000 77398BFC 70AB6500 SD
- #8270 82 000000 003B9ACA 70796410 SD
- #8270 8B 000000 003B9FFC 70826510 SD
- #8270 B4 000000 78FFD9F9 70AB6500 HD
- #8270 B4 000000 003C833E 70AB6510 HD
- #8270 B4 000000 00334492 70AB6410 HD
- emm = '^8270..000000........70..6[45][01]0'
- def RemoveDuplicates(fname) :
- # Create set where the EMMs hashes will be stored
- emmlist = set()
- dup_cnt = 0
- # Create temporary filename
- fname_tmp = fname + ".tmp"
- print "Removing duplicates from file:", fname
- # Open input and temporary file
- fi = open(fname, "r")
- fo = open(fname_tmp, "w")
- while 1 :
- line = fi.readline()
- # Check end of file
- if line == "" :
- break
- # Create SHA1 sum for current line
- dig = sha1(line).digest()
- # Check if this line was ever analysed
- if dig in emmlist :
- # if yes, show msg and continue with next line
- dup_cnt += 1
- print "\rFound duplicates:",dup_cnt,
- #print "Duplicate:", line
- continue
- else :
- # if not append it to the list
- emmlist.add(dig)
- # Write this line to temporary file
- fo.write(line)
- # Close files
- fo.close()
- fi.close()
- print
- # Rename output file to input
- os.remove(fname)
- os.rename(fname_tmp, fname)
- # Show message if not duplicates found
- if dup_cnt == 0 :
- print "Duplicates not found"
- print
- def Analyse (fn) :
- # Create filenames for each group
- f_emms = "emm_tnk_" + fn
- # Open main input file in readonly mode
- fl = open(fn, "r")
- # Create output files
- es = open(f_emms, "w")
- start = time()
- print "Start at", strftime("%d-%m-%Y %H:%M:%S", gmtime())
- print "Analysing file:", fn
- print
- # Start selection and filtering
- lcnt = emms_cnt = 0
- fe = re.compile(emm).search
- while 1:
- line = fl.readline()
- if line == "" :
- break
- lcnt += 1
- if lcnt % 5000 == 0 :
- print "\rProceed line:",lcnt,
- #if re.findall(emm, line):
- if fe(line):
- es.write(line);
- emms_cnt += 1
- # Print simple summry
- print
- print
- print "Lines :", lcnt
- print "EMM TNK :", emms_cnt
- print
- # Close all files
- fl.close()
- es.close()
- # Remove duplicates in output files
- RemoveDuplicates(f_emms)
- print "Stop at", strftime("%H:%M:%S", gmtime()), "Total time: ",time()-start, "sec."
- # -----------------------------------------------------------------------------
- if __name__ == '__main__' :
- # Check input parameters (presence of the filename with logs)
- if len(sys.argv) == 2 :
- Analyse(sys.argv[1])
- else :
- print help
- sys.exit(1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement