Guest User

Untitled

a guest
Sep 22nd, 2018
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.76 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import collections
  3. import string
  4. from string import ascii_lowercase
  5. from sys import stderr
  6.  
  7. pair_counts = {}
  8. # the pair_counts dict will have a key for each character that we've seen. The
  9. # values associated to these keys will themselves be dictionaries, whose keys
  10. # are also characters, such that pair_counts['a']['k'] would be the number of
  11. # occurrences of the pair "ak".
  12.  
  13. pair_totals = {}
  14. # pair_totals will also containt a key for each character that we've seen. The
  15. # value for each one will be the total number of pairs we've seen that have
  16. # that character as their first character.
  17.  
  18. pair_probabilities = {}
  19. # pair_probabilities is like pair_counts, but with probabilities to see pairs
  20. # instead of just their raw number of occurencess.
  21.  
  22. def get_probabilities():
  23. pass
  24.  
  25. def add_pair(left, right):
  26. """Given two characters (left and right), adds the catenation of those
  27. characters to our dictionary of pairs. Returns the number of times that
  28. pair has been seen so far."""
  29. if left not in pair_counts:
  30. pair_counts[left] = {}
  31.  
  32. if left not in pair_totals:
  33. pair_totals[left] = 1
  34. else:
  35. pair_totals[left] += 1
  36.  
  37. if right in pair_counts[left]:
  38. pair_counts[left][right] += 1
  39. else:
  40. pair_counts[left][right] = 1
  41.  
  42. return pair_counts[left][right]
  43.  
  44. def tabulate_pairs(word):
  45. """Given a word, tabulates the pair of adjacent letters in them and updates
  46. our pair_counts dict."""
  47. if len(word) < 2:
  48. return
  49.  
  50. prev = word[0]
  51. for i in range(1, len(word)):
  52. add_pair(prev, word[i])
  53. prev = word[i]
  54.  
  55. def add_word(word):
  56. tabulate_pairs(word)
  57. get_probabilities()
  58.  
  59.  
  60. f = open('/usr/share/dict/words', 'r')
  61. for line in f:
  62. word = line.strip().lower()
  63. add_word(word)
Add Comment
Please, Sign In to add comment