Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import collections
- import string
- from string import ascii_lowercase
- from sys import stderr
- pair_counts = {}
- # the pair_counts dict will have a key for each character that we've seen. The
- # values associated to these keys will themselves be dictionaries, whose keys
- # are also characters, such that pair_counts['a']['k'] would be the number of
- # occurrences of the pair "ak".
- pair_totals = {}
- # pair_totals will also containt a key for each character that we've seen. The
- # value for each one will be the total number of pairs we've seen that have
- # that character as their first character.
- pair_probabilities = {}
- # pair_probabilities is like pair_counts, but with probabilities to see pairs
- # instead of just their raw number of occurencess.
- def get_probabilities():
- pass
- def add_pair(left, right):
- """Given two characters (left and right), adds the catenation of those
- characters to our dictionary of pairs. Returns the number of times that
- pair has been seen so far."""
- if left not in pair_counts:
- pair_counts[left] = {}
- if left not in pair_totals:
- pair_totals[left] = 1
- else:
- pair_totals[left] += 1
- if right in pair_counts[left]:
- pair_counts[left][right] += 1
- else:
- pair_counts[left][right] = 1
- return pair_counts[left][right]
- def tabulate_pairs(word):
- """Given a word, tabulates the pair of adjacent letters in them and updates
- our pair_counts dict."""
- if len(word) < 2:
- return
- prev = word[0]
- for i in range(1, len(word)):
- add_pair(prev, word[i])
- prev = word[i]
- def add_word(word):
- tabulate_pairs(word)
- get_probabilities()
- f = open('/usr/share/dict/words', 'r')
- for line in f:
- word = line.strip().lower()
- add_word(word)
Add Comment
Please, Sign In to add comment