id_helper.py

# walk through all config files and get a list of ids with collisions
# fix them

# author michael clark
# date June 21 2013
# licence, GNUv3
# version 3
import os, re
import numpy as np
from shutil import move
from os import remove, close
from tempfile import mkstemp
import itertools
#import raw_input
import random

i=re.compile(r'[IB]:(.+)=(\d+)') # use this to find numbers with regular expressions

MAX_ID=31743+1 # add one for pythons range function
ids=dict() # the id's we find
collisions=dict() # list of collisions we find
configdir=os.path.join(os.path.abspath(os.curdir),'config')
IGNORE_IF_SAME_CONFIG_FILE=True
mark_item_section=['id','item']
def used_ids(ids):
    return list(sorted(ids.keys()))

def free_ids(ids):
    return list(set(range(159, MAX_ID)) - set(ids.keys()))

def collision_ids(collisions):
    return list(sorted(collisions.keys()))

def main():
    # run one time to get the list of ids and collisions
    # walk the files in the config directory

    for root, dirs, files in os.walk(configdir):
        files = [os.path.join(root, f) for f in files]
        for f in files: # look in each file
            ff=open(f,'r')
            first_line=ff.readline()
            if not "# Configuration file" in first_line:
                continue # skip this file
            l=0
            cur_ids=[]
            in_items=False
            for line in ff: # look at each line of the file
                ls=line.strip()
                l+=1
                if '#' == line.strip()[:1]: # skip as its a comment
                    continue
                if (('item' in ls) or ('id' in ls)) and ('{' in ls):
                    in_items=True
                    continue
                if '}' in ls :
                    in_items=False
                    continue
                if in_items:
                    if ('I:' in line) or ('B:' in line):
                        id=get_id(f,l,line.strip()) # also records the id into ids and collisions
                        if id: cur_ids.append(id)
            if len(cur_ids)>0: print f, " contained ids:", str_ranges(list(set(cur_ids))), "\n"
            ff.close()

    # now write to command line, and a csv file
    write_to_stdout()
    write_to_csv()

    # now interactivly fix each one
    print ("\nFound {} conflicts, interactivly fixing now:".format(len(collisions.keys())))

    for key in sorted(collisions.keys()):
        for collision in collisions[key][1:]: # ignore the first which is the first in first served
            fix(key,collision)

def fix(key,collision):
    # To autofix collisions: uncomment this
    # watch out this might change other numbers in your config!
    fixed=False
    r=0
    fs=collision[0]
    while not fixed:
        new_id=n_free_block(key,r) # get a free block nearby
        q=''
        q+="\nFix (y,n,r(Reroll))?\n {}->{}\t in file {} for line: \n{}\nAll collisions:".format(key,new_id,fs,collision[2])
        for c in collisions[key]:
            q+="\n    file:{},line:{}".format(c[0],c[2])
        q+="\n:"
        a = raw_input(q)
        if a=='y':
            print os.path.join(os.curdir,collision[0])
            replace(os.path.join(os.curdir,collision[0]), str(key), str(new_id) )
            ids[new_id]=collision
            fixed=True
            print "fixed collision ({}->{}) at {} line {}".format(key,new_id,fs,collision[1])
        elif a=='n':
            return 0
        elif a=='r':
            r+=1
            pass
        else:
            print "User input was not a y,n, or r. You need to enter one of these."

def n_free_block(old_id,n=0):
    "Return a nearest free block"
    freeids=free_ids(ids)
    s_fids=[(i,np.abs(i-old_id)) for i in freeids]
    dtype=[('id', int), ('distance', int)]
    a = np.array(s_fids, dtype=dtype)
    a = np.sort(a,order=['distance','id']) # now we have an array of free ids sorted by how close they are to our old id
    free_id=a[n][0]
    while free_id in ids.keys(): # check it worked, if not make a new one
        n+=1
        free_id=a[n][0]
    return free_id

def get_id(f,l,line):
    "this will grab an id from a line and check it against the ids we have found"
    #fs=f.replace(configdir,'')
    fs=os.path.relpath(f)
    if i.search(line):
        try:
            item,id=i.search(line).groups()
        except:
            print "could not parse ", line
            return ''
        #print id, item
        id=int(id) # convert to int
        if id in range(159, MAX_ID):
            if id in ids.keys():
                if ids[id][3]==item: # if they just repeat a previous statement
                    ids[id]=[fs,l,line,item]
                elif IGNORE_IF_SAME_CONFIG_FILE and (ids[id][0]==fs):
                    ids[id]=[fs,l,line,item] # if they come from the same config file
                else:
                    if not id in collisions.keys(): # if it doesn't exist add the previous config that registered this id
                        collisions[id]=[ids[id]]
                    collisions[id].append([fs,l,line,item]) # add the latest conflict
            else:
                ids[id]=[fs,l,line,item] # remember the id registration
        return id

def write_to_stdout():
    print collisions
    # print results to standard out
    for key in collision_ids(collisions):
        print "Collisions for id {}:".format(key)
        for f,l,line,item in collisions[key]:
            print "    {}\t at line#: {} in file: \t{}".format(line,l,f)
        print '\n'
    print "Found these keys:  \t", str_ranges(used_ids(ids)), '\n'
    print "Free keys are:  \t", str_ranges(free_ids(ids)), '\n'
    print "Please wait a moment..."

def write_to_csv():
    # write output to here
    output=os.path.join(os.curdir,'id_help_py_report.csv')
    o=open(output,'w')

    # cache this
    usedids=used_ids(ids)
    collisions_keys=sorted(collisions.keys())
    ids_keys=ids.keys()

    # write to output
    o.write('ID table\nID, Used?, In Collision?, file, line#\n')
    outstr=''
    for key in range(159, MAX_ID):
        if key in ids_keys:
            f,l,line,item = ids[key]
        else:
            f,l,line,item='',-999,'',''
        outstr+='{},{},{},{},{},{}\n'.format(key, 1*(key in usedids),1*(key in collisions_keys),f,l,line)
    o.write(outstr)

    outstr=''
    for key in collision_ids(collisions):
        outstr+="\nCollisions for id {}: (id,line,line$,file)\n".format(key)
        for f,l,line,item in collisions[key]:
            outstr+=" {},{},{},{}\n".format(key,line,l,f )
        outstr+='\n'
    o.write(outstr)
    o.close()

    print "\nOutput writen as csv's to", output

def replace(file_path, pattern, subst):
    "Replace a string in a file from http://stackoverflow.com/questions/39086/search-and-replace-a-line-in-a-file-in-python"
    #Create temp file
    fh, abs_path = mkstemp()
    new_file = open(abs_path,'w')
    old_file = open(file_path)
    done=False
    for line in old_file:
        if (pattern in line) and not done:
            new_file.write(line.replace(pattern, subst))
            done=True
        else:
            new_file.write(line.replace(pattern, subst))
    #close temp file
    new_file.close()
    close(fh)
    old_file.close()
    #Remove original file
    remove(file_path)
    #Move new file
    move(abs_path, file_path)

def ranges(i):
    "Return a list of tuples summarising range of numbers. E.g [1,2,3,6,7,9] would be [(1,3),(6,7),(9,9)]. From stackoverflow"
    for a, b in itertools.groupby(enumerate(i), lambda (x, y): y - x):
        b = list(b)
        yield b[0][1], b[-1][1]

def str_ranges(i):
    "Return a string summarising range of numbers. E.g [1,2,3,6,7,9,13,14,14] would be '1-3,6-7,9,1-14,14'."
    out=''
    rng=list(ranges(i))
    for a,b in rng:
        if a==b:
            out+="{}, ".format(a)
        else:
            out+="{}-{}, ".format(a,b)
    return out

if __name__ == "__main__":

    main()