Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # see http://stackoverflow.com/questions/6722985/fastest-way-in-python-to-find-a-startswith-substring-in-a-long-sorted-list-of-s
- import random
- import time
- import string
- from bisect import bisect_left
- WORDLIST_FILENAME = "words.txt"
- def load_words():
- """
- Returns a list of valid words. Words are strings of lowercase letters.
- Depending on the size of the word list, this function may
- take a while to finish.
- """
- print "Loading word list from file..."
- # inFile: file
- inFile = open(WORDLIST_FILENAME, 'r', 0)
- # wordlist: list of strings
- wordlist = []
- for line in inFile:
- wordlist.append(line.strip().lower())
- print " ", len(wordlist), "words loaded."
- return wordlist
- def frag_wordexists_Peter_simple(wordlist, word_fragment):
- return any(w.startswith(word_fragment) for w in wordlist)
- def frag_wordexists_Peter_bisect_left(wordlist, word_fragment):
- try:
- return wordlist[bisect_left(wordlist, word_fragment)].startswith(word_fragment)
- except IndexError:
- return False # word_fragment is greater than all entries in wordlist
- def trim_list_comprehension(wordlist, word_fragment):
- """returns the wordlist containing only words starting with 'word_fragment'"""
- wordlist = [w for w in wordlist if w.startswith(word_fragment)]
- return wordlist
- def trim_bisect(wordlist, word_fragment):
- """returns the wordlist containing only words starting with 'word_fragment'"""
- wordlist[bisect_left(wordlist, word_fragment):
- bisect_left(wordlist, word_fragment[:-1] + chr(ord(word_fragment[-1])+1))]
- return wordlist
- # Actually load the dictionary of words and point to it with
- # the wordlist variable so that it can be accessed from anywhere
- # in the program.
- wordlist = load_words()
- trimwordlist_comprehension = wordlist[:]
- trimwordlist_bisect = wordlist[:]
- peter_simple_total = peter_bisect_total = trim_comp_total = trim_bisect_total = 0
- for word_fragment in ("p", "py", "pyt", "pyth", "pytho"):
- print "Testing '" + word_fragment + "' with Peter's simple test"
- start = time.time()
- if frag_wordexists_Peter_simple(wordlist, word_fragment):
- runtime = time.time() - start
- print runtime
- peter_simple_total += runtime
- print "Testing '" + word_fragment + "' with Peter's bisect left test"
- start = time.time()
- if frag_wordexists_Peter_bisect_left(wordlist, word_fragment):
- runtime = time.time() - start
- print runtime
- peter_bisect_total += runtime
- print "Testing '" + word_fragment + "' with trimming the list using a list comprehension"
- start = time.time()
- trimwordlist_comprehension = trim_list_comprehension(trimwordlist_comprehension, word_fragment)
- if len(trimwordlist_comprehension):
- runtime = time.time() - start
- print runtime
- trim_comp_total += runtime
- print "Testing '" + word_fragment + "' with trimming the list using Neil G's bisect"
- start = time.time()
- trimwordlist_bisect = trim_bisect(trimwordlist_bisect, word_fragment)
- if len(trimwordlist_bisect):
- runtime = time.time() - start
- print runtime
- trim_bisect_total += runtime
- print ""
- print 'The totals for all strings "p", "py", "pyt", "pyth", "pytho" are as follows:'
- print "In total, Peter's simple test took " + str(peter_simple_total)
- print "In total, Peter's bisect left test took " + str(peter_bisect_total)
- print "In total, the list comprehension took " + str(trim_comp_total)
- print "In total, Neil G's bisect took " + str(trim_bisect_total)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement