simple_slog_tnk.py

#!/usr/bin/python

import sys
import os
from time import strftime, gmtime, time
import re
from hashlib import sha1

help = '''
Simple EMM sorter by Mr.Blue (set for TNK/TNK HD)

      ------------------------

Program needs file with logs (EMMs) in hexadecimal (tekst) format.
One EMM must be in one line.

Program removes repeated EMMs.

Example of usage:
        pyton slog filename.log
    or
        ./slog filename.log

    As output it creates 1 file:
        emm_tnk_filename.log

The general file with logs stay untouched.

'''
#8270 B4 000000 77398BFC 70AB6500 SD
#8270 82 000000 003B9ACA 70796410 SD
#8270 8B 000000 003B9FFC 70826510 SD

#8270 B4 000000 78FFD9F9 70AB6500 HD
#8270 B4 000000 003C833E 70AB6510 HD
#8270 B4 000000 00334492 70AB6410 HD


emm = '^8270..000000........70..6[45][01]0'

def RemoveDuplicates(fname) :
    # Create set where the EMMs hashes will be stored
    emmlist = set()
    dup_cnt = 0
    # Create temporary filename
    fname_tmp = fname + ".tmp"

    print "Removing duplicates from file:", fname

    # Open input and temporary file
    fi = open(fname, "r")
    fo = open(fname_tmp, "w")

    while 1 :
        line = fi.readline()
        # Check end of file
        if line == "" :
            break

        # Create SHA1 sum for current line
        dig = sha1(line).digest()

        # Check if this line was ever analysed
        if dig in emmlist :
            # if yes, show msg and continue with next line
            dup_cnt += 1
            print "\rFound duplicates:",dup_cnt,
            #print "Duplicate:", line
            continue
        else :
            # if not append it to the list
            emmlist.add(dig)

        # Write this line to temporary file
        fo.write(line)

    # Close files
    fo.close()
    fi.close()

    print

    # Rename output file to input
    os.remove(fname)
    os.rename(fname_tmp, fname)

    # Show message if not duplicates found
    if dup_cnt == 0 :
        print "Duplicates not found"

    print

def Analyse (fn) :
    # Create filenames for each group
    f_emms = "emm_tnk_" + fn

    # Open main input file in readonly mode
    fl = open(fn, "r")

    # Create output files
    es = open(f_emms, "w")

    start = time()
    print "Start at", strftime("%d-%m-%Y %H:%M:%S", gmtime())
    print "Analysing file:", fn
    print

    # Start selection and filtering
    lcnt = emms_cnt = 0
    fe = re.compile(emm).search

    while 1:
        line = fl.readline()
        if line == "" :
            break

        lcnt += 1
        if lcnt % 5000 == 0 :
            print "\rProceed line:",lcnt,

        #if re.findall(emm, line):
        if fe(line):
            es.write(line);
            emms_cnt += 1

    # Print simple summry
    print
    print
    print "Lines   :", lcnt
    print "EMM TNK :", emms_cnt
    print

    # Close all files
    fl.close()
    es.close()

    # Remove duplicates in output files
    RemoveDuplicates(f_emms)
    print "Stop at", strftime("%H:%M:%S", gmtime()), "Total time: ",time()-start, "sec."

# -----------------------------------------------------------------------------
if __name__ == '__main__' :
    # Check input parameters (presence of the filename with logs)
    if len(sys.argv) == 2 :
        Analyse(sys.argv[1])
    else :
        print help
        sys.exit(1)