Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Daniel Neri
- # 4/1/2012
- # University of Central Florida
- # Computer Services & Telecommunications
- # Enterprise Application Development
- import locale
- import os
- import sys
- import urllib2
- import fnmatch
- def boyermoore_horspool(fd, needle):
- #print "main method called"
- needle = needle.strip()
- #print "needle:"+needle+":"
- fd.seek(0)
- nlen = len(needle)
- nlast = nlen - 1
- skip = []
- for k in range(256):
- skip.append(nlen)
- for k in range(nlast):
- skip[ord(needle[k])] = nlast - k
- skip = tuple(skip)
- pos = 0
- consumed = 0
- haystack = bytes()
- while True:
- more = nlen - (consumed - pos)
- morebytes = fd.read(more)
- haystack = haystack[more:] + morebytes
- if len(morebytes) < more:
- return -1
- consumed = consumed + more
- i = nlast
- while i >= 0 and haystack[i] == needle[i]:
- i = i - 1
- if i == -1:
- return pos
- pos = pos + skip[ord(haystack[nlast])]
- return -1
- if __name__ == "__main__":
- print ('Starting...')
- if len(sys.argv) < 4:
- for item in sys.argv:
- print "item: " + item
- print "Usage: horspool.py <peoplecode directory> <ucf_sqr dir> <ucf_dms dir> <input file>"
- sys.exit(-1)
- #rl = sys.argv[1]
- pplcode_dir = sys.argv[1]
- ucf_sqr_dir = sys.argv[2]
- ucf_dms_dir = sys.argv[3]
- input_file = sys.argv[4]
- directories = [pplcode_dir, ucf_sqr_dir, ucf_dms_dir]
- print directories
- with open(input_file) as f:
- input_tables = f.readlines()
- #print variables
- print "Input file: " + input_file
- #print "Searching directory: " + direc
- filter_by = ['*.sqr', '*.sqc', '*.txt', '*.dms']
- for item in filter_by:
- print "Filtering by: " + item
- sqr_input = []
- keep_tables = []
- for index, case in enumerate(input_tables):
- print "matching #"+str(index+1)+" of #"+str(len(input_tables))+": " + case
- noluck = "false"
- broken = "false"
- for direc in directories:
- print "HELLO: ", direc
- for path, dirs, files in os.walk(os.path.abspath(direc)):
- for extension in filter_by:
- if broken == "true":
- break
- else:
- for filename in fnmatch.filter(files, extension):
- filepath = os.path.join(path, filename)
- #print filename
- #print filepath
- with open(filepath) as f:
- offset = boyermoore_horspool(f, case)
- if offset != -1:
- #table reference exists, do not drop
- print "result found in " + filepath
- noluck = "false"
- broken = "true"
- f.close()
- break
- else:
- #table definition does not exist
- #write to file for further processing by SQR
- noluck = "true"
- broken = "false"
- formatted_case = case[3:]
- #print ' matching formatted: ', formatted_case
- offset = boyermoore_horspool(f, formatted_case)
- if offset != -1:
- #table reference exists, do not drop
- print "result found in " + filepath
- noluck = "false"
- broken = "true"
- f.close()
- break
- f.close()
- if noluck == "true":
- sqr_input.append(case)
- else:
- keep_tables.append(case)
- # Open a file
- fo = open("C:\\temp\\horspool_output.txt", "w+")
- for table_name in sqr_input:
- print "writing " + table_name + " to SQR input file"
- fo.write( table_name );
- # Close opend file
- fo.close()
- # Open a file
- fo = open("C:\\temp\\horspool_output_keep_these_tables.txt", "w+")
- for table_name in keep_tables:
- print "writing " + table_name + " to keep tables file"
- fo.write( table_name );
- # Close opend file
- fo.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement