Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## words/word.py
- import scriptutil as SU
- import re
- import psycopg2
- from psycopg2.extras import DictCursor
- from psycopg2.extensions import adapt
- try:
- db = psycopg2.connect(database="scrabble", user="python", password="python")
- cur = db.cursor(cursor_factory=psycopg2.extras.DictCursor)
- # cur.execute ("CREATE TABLE words (name varchar, probability int, frequency int, catches varchar, hangs varchar);")
- except:
- print "I am unable to connect to the database"
- sys.ext()
- try:
- "trying to find a wordlist reference file"
- except:
- "failing to find a wordlist reference file. You're on your own, you database-dependent chump!"
- class Word:
- """legal scrabble words
- 1) in official lists, and
- 2) have point/frequency attributes that are derived --- not from it's own letters ---
- but rather from the point/prob sums of all the possible _derivative_ scrabble-legal words
- # raw data from official scrabble lists. Can be downloaded from hasbro
- """
- letters = "_ a b c d e f g h i j k l m n o p q r s t u v w x y z".split()
- frequencies = (2, 9, 2, 2, 4, 12, 2, 3, 2, 9, 1, 1, 4, 2, 6, 8, 2, 1, 6, 4, 6, 4, 2, 2, 1, 2, 1)
- points = (0, 1, 3, 3, 2, 1, 4, 2, 4, 1, 8, 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, 1, 4, 4, 8, 4, 10)
- letter_frequencies = dict(zip(letters,frequencies))
- letter_points = dict(zip(letters,frequencies))
- def calculate_probability(self):
- return sum(map(lambda letter: letter_points[letter], self.catches))
- def calculate_frequencies(self):
- return sum(map(lambda letter: letter_frequencies[letter], self.catches))
- def __init__(self,name,points=None,frequency=None,catches=None,hangs=None):
- self.name = name
- if catches is None: self.catches = catches
- if frequency is None: self.frequency = frequency
- if points is None: self.points = points
- if hangs is None: self.hangs = hangs
- @staticmethod
- def count(finder_sql = ""):
- """rails-style finder
- """
- cur.execute("select * from words {0}".format(finder_sql))
- return cur.rowcount
- def hangs(self):
- """ one-lettter shorter
- """
- return self.name[0:-1]
- # def catches(self):
- ## var = re.compile(self.name())
- # print self.name
- # return var.split()
- @staticmethod
- def find_or_create_all_by_name(names):
- """
- merge
- VS
- cur.copy_in( ... scratch ... )
- insert into words select * from (select distinct * from scratch) uniq where not exists (select 1 from words where words.name = uniq.name);
- """
- # MYTODO escape names ... learning exercise.
- matches = Word.find_all("""where words.name in {0}""".format(tuple(names)))
- unmatched = set(names) - set(map(lambda w: w.name, matches))
- pdb.set_trace()
- #MYTODO: transactions?
- invalid_words = []
- created_words = []
- for n in unmatched:
- w = Word(n)
- try:
- w.new()
- created_words.append(w)
- except NameError:
- invalid_words.append(n)
- # MYTODO: hose invalid words over to the output somehow ... through a logger, if nothing else
- if not len(created_words) == 0: db.commit()
- return created_words.extend(matches) or []
- def new(self):
- """ vaguely rails-AR-like new()
- validates, find-greps for catches, and pre-commits instance to the db
- #MYTODO: profiling. Is it worth it to split up the two grep searches? (above)
- """
- self.validate_against_local_lists()
- grepd_catches = self.fgrep_catches_in_directories(("./words",))
- flat_catches = []
- for c in grepd_catches: flat_catches.extend(c) #split()
- self.catches = "".join(map(lambda catch: catch+" ", set(flat_catches))).strip()
- cur.execute("""INSERT INTO words VALUES {0}""".format(
- (
- self.name,
- self.calculate_probability(),
- self.calculate_frequencies(),
- self.catches,
- # hangs
- self.name[1:] + " " + self.name[:-1],
- )
- ))
- def validate_against_local_lists(self, lists=(".",)):
- """if not found in any text file => not a legal word!
- this will also catch all the weird things people might throw. Like numbers.
- """
- if [self.name] not in self.fgrep_in_directories(lists):
- raise NameError, "not in ./words/*.txt. Look again, shall we?"
- pass
- def fgrep_in_directories(self, directories=(".",),search_string=None):
- """ grep in dir ("." by default)
- find a word in local .txt files
- """
- if search_string is None:
- search_tuple = (("^{0}$".format(self.name), re.I),)
- else:
- search_tuple = ((search_string, re.M),)
- result = map(lambda directory:
- SU.ffindgrep(directory, namefs=(lambda s: s.endswith('.txt'),),
- regexl=search_tuple
- ).values(),
- directories)
- return [catch[0] for catch in result if len(catch) is not 0]
- def fgrep_catches_in_directories(self, directories=(".",)):
- """find all _catches_
- find a word in local .txt files
- """
- temp = []
- temp.extend(self.fgrep_in_directories(("./words",), "^{0}.$".format(self.name)))
- temp.extend(self.fgrep_in_directories(("./words",), "^.{0}$".format(self.name)))
- return temp
- # raise ArgumentError
- @staticmethod
- def find_all(finder_sql = ""):
- """rails-style finder
- """
- cur.execute("select * from words {0}".format(finder_sql))
- return map(lambda properties: Word(*properties), cur.fetchall())
- def flatten(l):
- if l is []:
- pass
- elif isinstance(l,list):
- return sum(map(flatten,l))
- else:
- return l
Add Comment
Please, Sign In to add comment