find_replace.py

################################################################################
#
# find_replace.py - Finds and replaces all values keys in STR_DICT with their
# corresponding value.  STR_DICT can be a list instead of dictionary if you
# are not using the replace flag (-rep).  You can also populate STR_DICT with
# a CSV file from the command line.  Use the -h or --help command line option
# for usage instructions.
#
# Example usage with:
#   1.) STR_DICT populated via CSV
#   2.) Regex to ignore any file with ".git" in the name
#   3.) Regex to match only ".c" and ".h" files
#   4.) Recursive find (no replace) in the "D:\Git" directory
#
# python find_replace.py -csv "map.csv" -rec -reg_i "^.*?\.git.*$" -reg_m "^.*?\.[ch]$" "D:\Git"
#
# Use CMD instead of BASH if you are using the -reg_i or -reg_m flags.  BASH
# chokes on the regex for some reason.
#
################################################################################

import argparse
import csv
import os.path
import re
import sys

CSV = None
FILE_CNT = 0
LOG = 'find_replace.log'
REPLACE = None
RECURSE = None
REGEX_FIND = None
REGEX_IGNORE = None
REGEX_MATCH = None

STR_DICT = {
#   "FIND_THIS": "REPLACE_WITH_THIS",
#   "ALSO_FIND_THIS": "REPLACE_WITH_THIS",
}

def bldStrDict():
    global STR_DICT

    # we are using a CSV file to populate STR_DICT, so reset it before we start
    STR_DICT = dict()
    with open(CSV, 'rb') as csvFile:
        rows = csv.reader(csvFile, delimiter=getCsvDelimiter(), quotechar=getCsvQuoteChar())
        for row in rows:
            key = None
            val = ''

            try:
                key = row[0]
            except IndexError:
                pass

            try:
                val = row[1]
            except IndexError:
                pass

            if key:
                STR_DICT[key] = val

def findAndReplace(inputs):
    for input in inputs:
        if os.path.isdir(input):
            findAndReplaceInDir(input)
        elif os.path.isfile(input):
            findAndReplaceInFile(input)
        else:
            logAndPrint(input + ' does not exist')

def findAndReplaceInDir(dir):
    if RECURSE:
        fileCnt = 0
        for root, dirs, files in os.walk(dir):
            fileCnt = fileCnt + len(files)
            for file in files:
                file = os.path.join(root, file)
                findAndReplaceInFile(file)
    else:
        for item in os.listdir(dir):
            if os.path.isfile(os.path.join(dir, item)):
                findAndReplaceInFile(item)

def findAndReplaceInFile(file):
    file = os.path.abspath(file)
    if (not REGEX_MATCH or (REGEX_MATCH and re.search(REGEX_MATCH, file))) and (not REGEX_IGNORE or (REGEX_IGNORE and not re.search(REGEX_IGNORE, file))):
        global FILE_CNT
        FILE_CNT = FILE_CNT + 1

        f = None
        s = None
        try:
            f = open(file,'r+')
            s = f.read()
        except:
            logAndPrint('Couldn\'t open file ' + file)
            return

        for oldStr in STR_DICT:
            if (not REGEX_FIND and oldStr in s) or (REGEX_FIND and re.findall(oldStr, s)):
                if REPLACE:
                    newStr = STR_DICT[oldStr]
                    s = s.replace(oldStr, newStr)
                    f.seek(0)
                    f.write(s)
                    f.truncate()
                    logAndPrint('Updated ' + file)
                else:
                    logAndPrint('Found ' + oldStr + ' in ' + file)
        f.close()

def getCsvDelimiter():
    delimiter = ','
    with open (CSV, "r") as csvFile:
        csvData = csvFile.read()
        sniffer = csv.Sniffer()
        try:
            dialect = sniffer.sniff(csvData)
            delimiter = dialect.delimiter
        except:
            pass
    return delimiter

def getCsvQuoteChar():
    quotechar = '"'
    with open (CSV, "r") as csvFile:
        csvData = csvFile.read()
        sniffer = csv.Sniffer()
        try:
            dialect = sniffer.sniff(csvData)
            quotechar = dialect.quotechar
        except:
            pass
    return quotechar

def getHms(ms):
    h, ms = divmod(ms, 3600000)
    m, ms = divmod(ms, 60000)
    s = float(ms) / 1000
    return "%i:%02i:%06.3f" % (h, m, s)

def getTime():
    import time
    millis = int(round(time.time() * 1000))
    return millis

def logAndPrint(s):
    with open(LOG, 'a') as log:
        log.write(s + '\n')
    print s

def main():
    # build the argument parser
    startTime = getTime()
    parser = argparse.ArgumentParser(prog='grep.py', description='This script will find/replace all instances of the keys in nameDict with their corresponding value in all the supplied files.  No special procesing is taking place, just a standard find/replace.'             )
    parser.add_argument('-c',     '--csv',          action='store',      default=None, help='A CSV file that specifies find/replace pairs.  Find what is in column "A" and replace it with what is in column "B".'                                                                )
    parser.add_argument('-rec',   '--recurse',      action='store_true', default=None, help='Recursive find in directories'                                                                                                                                                       )
    parser.add_argument('-reg_f', '--regex_find',   action='store_true', default=None, help='The strings supplied for finding should be treated as regexes instead of plain text'                                                                                                 )
    parser.add_argument('-reg_i', '--regex_ignore', action='store',      default=None, help='Regex that a file\'s absolute path must not match before find/replace is executed on file.  A file that matches both "match" regex and the "ignore" regex will be ignored.'          )
    parser.add_argument('-reg_m', '--regex_match',  action='store',      default=None, help='Regex that a file\'s absolute path must match before find/replace is executed on file'                                                                                               )
    parser.add_argument('-rep',   '--replace',      action='store_true', default=None, help='Find and replace as opposed to just find'                                                                                                                                            )
    parser.add_argument('inputs',                                                      help='The files/directories to update',                                                                                                                                           nargs='+')
    args = parser.parse_args()

    # get args
    global CSV
    CSV = args.csv
    global RECURSE
    RECURSE = args.recurse
    global REGEX_FIND
    REGEX_FIND = args.regex_find
    global REGEX_IGNORE
    REGEX_IGNORE = args.regex_ignore
    global REGEX_MATCH
    REGEX_MATCH = args.regex_match
    global REPLACE
    REPLACE = args.replace
    inputs = args.inputs

    # prepare log
    log = open(LOG, 'w+')
    log.truncate()
    log.close()

    # build STR_DICT
    if CSV:
        if os.path.isfile(CSV):
            bldStrDict()
        else:
            logAndPrint(CSV + ' does not exist... aborting...')
            return

    # do find and replace
    findAndReplace(inputs)

    # log stats
    stopTime = getTime()
    runTime = stopTime - startTime
    logAndPrint('searched ' + str(FILE_CNT) + ' files in ' + getHms(runTime))

if __name__ == "__main__":
    sys.exit(main())