Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #@profile
- def foo():
- from timeit import default_timer as timer
- start = timer()
- from operator import itemgetter
- from more_itertools import locate
- with open("output_text.txt", "w", encoding="utf") as f:
- f.write("Begin\n")
- start_1 = timer()
- #with open("hidden_text.txt", "r", encoding="utf") as f2:
- # open file, read lines
- with open("input.txt", "r", encoding="utf") as f2:
- end_1 = timer()
- s = "Timer 0: " + str(end_1 - start_1)
- f.write(s + "\n")
- print(s)
- start_1 = timer()
- textlines = f2.readlines()
- end_1 = timer()
- s = "Timer 1: " + str(end_1 - start_1)
- f.write(s + "\n")
- print(s)
- #first, we need to sort this stuff.
- #so somebody already posted a version where they read this directly into a sorted array. Which I might just steal at some point. Later.
- start_1 = timer()
- def getKey(item):
- return (int(item[0]))
- lines = []
- for line in textlines:
- lines.append(line.split(' # '))
- lines = sorted(lines, key=getKey)
- sorted_text = []
- for index, line in lines:
- sorted_text.append(line[:-1])
- end_1 = timer()
- s = "Timer 2: " + str(end_1 - start_1)
- f.write(s + "\n")
- f.write("\n-----------------------\n")
- print (s)
- start_1 = timer()
- # now we only need the words, not the line numbers
- textblob = " ".join(sorted_text)
- textblob.replace("\n", "")
- # and a list of the words
- start_1 = timer()
- textblob = textblob.strip()
- end_1 = timer()
- s = "Timer 3: " + str(end_1 - start_1)
- f.write(s + "\n")
- f.write("\n-----------------------\n")
- print (s)
- start_1 = timer()
- words = textblob.split(" ")
- end_1 = timer()
- s = "Timer 4: " + str(end_1 - start_1)
- f.write(s + "\n")
- f.write("\n-----------------------\n")
- print (s)
- #finally, we have a list of all the words in the correct order
- s = "Timer 5: " + str(end_1 - start_1)
- f.write(s + "\n")
- print (s)
- f.write("\n-----------------------\n")
- start_1 = timer()
- #now we count the occurence of all words
- unique_words = set(words)
- occurence = {}
- for index, word in enumerate(unique_words):
- occurence[word] = []
- for index, word in enumerate(words):
- occurence[word].append(index)
- end_1 = timer()
- s = "Timer 5: " + str(end_1 - start_1)
- f.write(s + "\n")
- f.write("\n-----------------------\n")
- print (s)
- start_1 = timer()
- resultspace = {}
- for key, item in occurence.items():
- if len(item) > 3:
- add = True
- last = item[1]
- interval = item[1] - item[0]
- for i in range(2, len(item)):
- if item[i] - item[i-1] != interval:
- add = False
- if add:
- print (key)
- resultspace[key] = (interval, item)
- for key in resultspace:
- interval = resultspace[key][0]
- item = resultspace[key][1]
- f.write(key + str(interval) + str(len(item)) + "\n")
- result = []
- for index in item:
- result.append(words[index + 1])
- print ("Keyword: " + str(key))
- print ("Interval: " + str(interval))
- print("Result: " + " ".join(result))
- f.write("Keyword: " + str(key))
- f.write("Interval: " + str(interval))
- f.write("Result: " + " ".join(result))
- s = "Timer 6: " + str(end_1 - start_1)
- f.write(s + "\n")
- f.write("\n-----------------------\n")
- print (s)
- end = timer()
- s = "This took: " + str(end - start)
- f.write(s + "\n")
- print (s)
- #input('Enter anything to close') #in case you want to leave the program running
- if __name__ == '__main__':
- foo()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement