Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #python3
- from collections import defaultdict, Counter
- import pprint as pp
- DV_KEYS = [
- "',.pyfgcrl",
- "aoeuidhtns",
- ";qjkxbmwvz",
- ]
- KEYS = [
- "qwertyuiop",
- "asdfghjkl;",
- "zxcvbnm,./",
- ]
- MIRRORS = {}
- for row in KEYS:
- rev = "".join(reversed(row))
- for x in range(len(row)//2):
- o = -1*(x + 1)
- MIRRORS[row[x]] = row[o]
- MIRRORS[row[o]] = row[x]
- pp.pprint(MIRRORS)
- def key(word):
- return "".join(min(letter, MIRRORS[letter]) for letter in word.lower())
- words_by_key = defaultdict(set)
- total_words = 0
- bad_words = set()
- with open("/usr/share/dict/words") as f:
- for word in f:
- word = word.strip().lower()
- try:
- words_by_key[key(word)].add(word)
- except KeyError:
- bad_words.add(word)
- total_words += 1
- processed_words = total_words - len(bad_words)
- print(total_words, "words total")
- print(len(bad_words), "words unable to process: ", list(bad_words)[:10])
- lens = Counter((len(val) for val in words_by_key.values()))
- print("Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.")
- print(lens.most_common())
- print("Probability of a word having N collisions:")
- for numcoll, count in sorted(lens.most_common()):
- probability = numcoll * count / processed_words * 100
- print(numcoll-1, probability)
- cc = 0
- print("Some sample collisions:")
- for wds in words_by_key.values():
- if len(wds) > 1:
- cc += 1
- print(wds)
- if cc > 10:
- break
- # QWERTY
- # 235886 words total
- # 2 words unable to process: ['jean-pierre', 'jean-christophe']
- # Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
- # [(1, 221334), (2, 5101), (3, 602), (4, 165), (5, 38), (6, 20), (7, 7), (8, 1)]
- # Probability of a word having N collisions:
- # 0 93.83171389326958
- # 1 4.325007206932221
- # 2 0.7656305641756117
- # 3 0.27979854504756574
- # 4 0.08054806599854165
- # 5 0.05087246273592105
- # 6 0.020772922283834427
- # 7 0.00339149751572807
- # Some sample collisions:
- # {'dub', 'dun'}
- # {'killable', 'kissable'}
- # {'percival', 'perceval'}
- # {'it', 'ey'}
- # {'scruf', 'scurf'}
- # {'silverness', 'silverbill'}
- # {'singer', 'linger'}
- # {'wade', 'wake', 'wadi'}
- # {'jag', 'fag'}
- # {'wryly', 'outly'}
- # {'pegasian', 'pegasean'}
- # DVORAK
- # 235886 words total
- # 2 words unable to process: ['jean-pierre', 'jean-christophe']
- # Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
- # [(1, 227220), (2, 3017), (3, 305), (4, 46), (5, 2), (6, 1)]
- # Probability of a word having N collisions:
- # 0 96.3270081904665
- # 1 2.5580370012378966
- # 2 0.387902528361398
- # 3 0.0780044428617456
- # 4 0.004239371894660088
- # 5 0.002543623136796052
- # Some sample collisions:
- # {'apathism', 'agathism'}
- # {'balk', 'balm'}
- # {'unary', 'hoary'}
- # {'cypris', 'cypria'}
- # {'indiscreetly', 'indiscretely'}
- # {'pump', 'gump'}
- # {'yond', 'food'}
- # {'getae', 'geest'}
- # {'trig', 'trip'}
- # {'apselaphesia', 'apselaphesis'}
- # {'tach', 'each'}
Add Comment
Please, Sign In to add comment