Guest User

Untitled

a guest
Mar 6th, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.89 KB | None | 0 0
  1. #python3
  2. from collections import defaultdict, Counter
  3. import pprint as pp
  4. DV_KEYS = [
  5. "',.pyfgcrl",
  6. "aoeuidhtns",
  7. ";qjkxbmwvz",
  8. ]
  9. KEYS = [
  10. "qwertyuiop",
  11. "asdfghjkl;",
  12. "zxcvbnm,./",
  13. ]
  14. MIRRORS = {}
  15. for row in KEYS:
  16. rev = "".join(reversed(row))
  17. for x in range(len(row)//2):
  18. o = -1*(x + 1)
  19. MIRRORS[row[x]] = row[o]
  20. MIRRORS[row[o]] = row[x]
  21. pp.pprint(MIRRORS)
  22.  
  23.  
  24. def key(word):
  25. return "".join(min(letter, MIRRORS[letter]) for letter in word.lower())
  26.  
  27. words_by_key = defaultdict(set)
  28. total_words = 0
  29. bad_words = set()
  30. with open("/usr/share/dict/words") as f:
  31. for word in f:
  32. word = word.strip().lower()
  33. try:
  34. words_by_key[key(word)].add(word)
  35. except KeyError:
  36. bad_words.add(word)
  37. total_words += 1
  38. processed_words = total_words - len(bad_words)
  39.  
  40. print(total_words, "words total")
  41. print(len(bad_words), "words unable to process: ", list(bad_words)[:10])
  42.  
  43. lens = Counter((len(val) for val in words_by_key.values()))
  44. print("Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.")
  45. print(lens.most_common())
  46.  
  47. print("Probability of a word having N collisions:")
  48. for numcoll, count in sorted(lens.most_common()):
  49. probability = numcoll * count / processed_words * 100
  50. print(numcoll-1, probability)
  51.  
  52. cc = 0
  53. print("Some sample collisions:")
  54. for wds in words_by_key.values():
  55. if len(wds) > 1:
  56. cc += 1
  57. print(wds)
  58. if cc > 10:
  59. break
  60.  
  61. # QWERTY
  62. # 235886 words total
  63. # 2 words unable to process: ['jean-pierre', 'jean-christophe']
  64. # Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
  65. # [(1, 221334), (2, 5101), (3, 602), (4, 165), (5, 38), (6, 20), (7, 7), (8, 1)]
  66. # Probability of a word having N collisions:
  67. # 0 93.83171389326958
  68. # 1 4.325007206932221
  69. # 2 0.7656305641756117
  70. # 3 0.27979854504756574
  71. # 4 0.08054806599854165
  72. # 5 0.05087246273592105
  73. # 6 0.020772922283834427
  74. # 7 0.00339149751572807
  75. # Some sample collisions:
  76. # {'dub', 'dun'}
  77. # {'killable', 'kissable'}
  78. # {'percival', 'perceval'}
  79. # {'it', 'ey'}
  80. # {'scruf', 'scurf'}
  81. # {'silverness', 'silverbill'}
  82. # {'singer', 'linger'}
  83. # {'wade', 'wake', 'wadi'}
  84. # {'jag', 'fag'}
  85. # {'wryly', 'outly'}
  86. # {'pegasian', 'pegasean'}
  87.  
  88. # DVORAK
  89. # 235886 words total
  90. # 2 words unable to process: ['jean-pierre', 'jean-christophe']
  91. # Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
  92. # [(1, 227220), (2, 3017), (3, 305), (4, 46), (5, 2), (6, 1)]
  93. # Probability of a word having N collisions:
  94. # 0 96.3270081904665
  95. # 1 2.5580370012378966
  96. # 2 0.387902528361398
  97. # 3 0.0780044428617456
  98. # 4 0.004239371894660088
  99. # 5 0.002543623136796052
  100. # Some sample collisions:
  101. # {'apathism', 'agathism'}
  102. # {'balk', 'balm'}
  103. # {'unary', 'hoary'}
  104. # {'cypris', 'cypria'}
  105. # {'indiscreetly', 'indiscretely'}
  106. # {'pump', 'gump'}
  107. # {'yond', 'food'}
  108. # {'getae', 'geest'}
  109. # {'trig', 'trip'}
  110. # {'apselaphesia', 'apselaphesis'}
  111. # {'tach', 'each'}
Add Comment
Please, Sign In to add comment