• API
• FAQ
• Tools
• Archive
SHARE
TWEET

# Untitled

a guest Sep 17th, 2019 97 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. from itertools import cycle, islice
2. import re
3.
4. ascii_alphabet = 'abcdefghijklmnopqrstuvwxyz'
5. alpha_numerics = 'abcdefghijklmnopqrstuvwxyz0123456789'
6. vowels = 'aeiou'
7. consonants = 'bcdfghjklmnpqrstvwxyz'
8. vowels_and_consonants = (vowels, consonants)
9.
10.
11. def number_to_multi_base(n, b):
12.     """
13.     Convert a number to a multi-base (generalization of base projection).
14.
15.     Args:
16.         n: The number to convert
17.         b: The base to convert it to
18.
19.     Returns: A list representing the number in the desired base.
20.
21.     # When b is just one number, it's the base (for e.g. b=2 means binary base)
22.     >>> number_to_multi_base(3, 2)
23.     [1, 1]
24.     >>> number_to_multi_base(4, 2)
25.     [1, 0, 0]
26.     >>> number_to_multi_base(5, 2)
27.     [1, 0, 1]
28.     # But the cool thing about number_to_multi_base is that you can have a more complex base (any iterable, really)
29.     >>> number_to_multi_base(11, [2, 3])
30.     [1, 2, 1]
31.     >>> number_to_multi_base(12, [2, 3])
32.     [1, 0, 0, 0]
33.     >>> number_to_multi_base(13, [2, 3])
34.     [1, 0, 0, 1]
35.     >>> number_to_multi_base(14, [2, 3])
36.     [1, 0, 1, 0]
37.     >>> number_to_multi_base(15, [2, 3])
38.     [1, 0, 1, 1]
39.     >>> number_to_multi_base(16, [2, 3])
40.     [1, 0, 2, 0]
41.     """
42.     if isinstance(b, (int, float)):
43.         b = [b]
44.     base = cycle(b)
45.
46.     if n == 0:
47.         return [0]
48.     digits = []
49.     while n:
50.         b = next(base)
51.         digits.append(int(n % b))
52.         n //= b
53.     return digits[::-1]
54.
55.
56. def str_from_num_list(coord, symbols_for_base_idx=vowels_and_consonants, base_phase=0):
57.     """
58.     Make a string from the coordinates (a) of a number in a given base system (infered from symbols_for_base_idx and
59.     base_phase).
60.
61.     NOTE: symbols_for_base_idx sets should (in most cases) all be disjoint (but this is not validated!)
62.
63.     Args:
64.         coord: An array of integers. Coordinates of a number in a given base system
65.         base_phase: Which base (of symbols_for_base_idx) to start with (and then cycle)
66.         symbols_for_base_idx: Sets of symbols for each base
67.
68.     Returns:
69.         A string (which is the mapping of the number (represented by coord).
70.
71.     >>> str_from_num_list([1,2,1,2], ['ae', 'xyz'])
72.     'ezez'
73.     >>> str_from_num_list([1,2,1,0], ['ae', 'xyz'])
74.     'ezex'
75.     >>>
76.     >>> # [1,2,0,1] is [1,2,1,0], with the last two digits flipped, but you don't get ezxe in the following:
77.     >>> str_from_num_list([1,2,0,1], ['ae', 'xyz'])
78.     'ezay'
79.     """
80.     n = len(symbols_for_base_idx)
81.     s = ''
82.     for letter_idx, collection_idx in zip(coord, islice(cycle(range(n)), base_phase, None)):
83.         #         print(f"{letter_idx} === {collection_idx}")
84.         s += symbols_for_base_idx[collection_idx][letter_idx]
85.     return s
86.
87.
88. # TODO: Look into coverage. Couldn't produce 'magic' with ['ai', 'mgc'] or ['mgc', 'ai']
89. def text_for_num(num, symbols_for_base_idx=vowels_and_consonants):
90.     """
91.     Map a number to a string.
92.     The map is bijective (a.k.a. "1-to-1" if the set of symbols in symbols_for_base_idx are non-overlapping.
93.
94.     Args:
95.         num: A number to map to text
96.         symbols_for_base_idx: The sets of symbols to use: A list of strings, each string representing a
97.             collection of symbols to use in each base.
98.
99.     Returns:
100.         A string representing the input number.
101.
102.     >>> # using the default symbols_for_base_idx (vowels and consonants):
103.     >>> text_for_num(1060)
104.     'caca'
105.     >>> text_for_num(14818)
106.     'sapu'
107.     >>> text_for_num(335517)
108.     'tecon'
109.     >>>
110.     >>> # using custom ones:
111.     >>> text_for_num(153, ['ai', 'gcm'])
112.     'magic'
113.     """
114.     base_cardinalities = list(map(len, symbols_for_base_idx))
115.     n_bases = len(base_cardinalities)
116.     base_phase = num % n_bases
117.
118.     num = (num - base_phase) // n_bases
119.     base = list(islice(cycle(base_cardinalities), base_phase, n_bases + base_phase))
120.     coord = number_to_multi_base(num, base)
121.
122.     return str_from_num_list(coord[::-1], symbols_for_base_idx, base_phase)[::-1]
123.
124.
125. inf = float('infinity')
126.
127.
128. def text_to_pronounceable_text(text,
129.                                symbols_for_base_idx=vowels_and_consonants,
130.                                captured_alphabet=alpha_numerics,
131.                                case_sensitive=False,
132.                                max_word_length=30,
133.                                artificial_word_sep='_',
134.                                assert_no_word_sep_in_text=False
135.                                ):
136.     """
137.
138.     Args:
139.         text: text you want to map
140.         symbols_for_base_idx: symbols you want to map TO (default is vowels and consonants)
141.         captured_alphabet: the symbols of the words you want to map FROM (essentially, in contrast to filler characters)
142.         case_sensitive: Whether the input text should be lower cased before being processed
143.         max_word_length: The maximum length of a pronounceable word
144.         artificial_word_sep: The separator to separate pronounceable words when the word is too long
145.         assert_no_word_sep_in_text: Whether to assert that artificial_word_sep is not already in the input text
146.             (to avoid clashing and non-invertibility)
147.
148.     Returns:
149.         A more pronounceable text, where pronounceable is defined by you, so not my fault if it's not.
150.
151.     >>> text_to_pronounceable_text('asd8098 098df')
152.     'izokagamuta osuhoju'
153.     >>> text_to_pronounceable_text('asd8098 098df', max_word_length=4, artificial_word_sep='_')
154.     'izo_kaga_muta osu_hoju'
155.     """
156.     if not case_sensitive:
157.         text = text.lower()
158.
159.     p = re.compile(f'[{captured_alphabet}]+')  # to match the text to be mapped
160.     anti_p = re.compile(f'[^{captured_alphabet}]+')  # to match the chunks of separator (not matched) text
161.
162.     matched_text = anti_p.split(text)
163.     num_of_character = {c: i for i, c in enumerate(captured_alphabet)}  # the numerical mapping of alphabet
164.     base_n = len(captured_alphabet)
165.     # function to get the (base_n) number for a chk
166.     num_of_chk = lambda chk: sum(num_of_character[c] * (base_n ** i) for i, c in enumerate(chk))
167.
168.     _text_for_num = lambda num: text_for_num(num, symbols_for_base_idx)
169.     pronounceable_words = [_text_for_num(num_of_chk(chk)) for chk in matched_text]
170.
171.     if max_word_length < inf:
172.         def post_process_word(word):
173.             if len(word) > max_word_length:
174.                 if assert_no_word_sep_in_text:
175.                     assert artificial_word_sep not in text, \
176.                         f"Your artificial_word_sep ({artificial_word_sep}) was in the text (so no bijective mapping)"
177.                 r = (len(word) % max_word_length)
178.                 word_suffix = word[:r]
179.                 word_prefix = word[r:]
180.                 word = artificial_word_sep.join(map(''.join, zip(*([iter(word_prefix)] * max_word_length))))
181.                 if word_suffix:
182.                     word = word_suffix + artificial_word_sep + word
183.                 return word
184.             else:
185.                 return word
186.
187.         pronounceable_words = list(map(post_process_word, pronounceable_words))
188.
189.     separator_text = p.split(text)
190.
191.     if len(pronounceable_words) < len(separator_text):
192.         return ''.join(map(''.join, zip(separator_text, pronounceable_words)))
193.     else:
194.         return ''.join(map(''.join, zip(pronounceable_words, separator_text)))
195.
196.
197. class FunTests:
198.     @staticmethod
199.     def print_sequences_in_columns(start_num=3000, end_num=3060):
200.         for i in range(start_num, end_num):
201.             #     print(f"-----{i}")
202.             if i % 2:
203.                 print("".join(map(str, (text_for_num(i)))))
204.             else:
205.                 print("\t" + "".join(map(str, (text_for_num(i)))))
206.
207.
208. if __name__ == '__main__':
209.     try:
210.         import argh
211.     except ImportError:
212.         raise ImportError("You don't have argh. You can install it by doing:\n"
213.                           "     pip install argh\n"