Advertisement
Guest User

Untitled

a guest
Sep 17th, 2019
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.75 KB | None | 0 0
  1. from itertools import cycle, islice
  2. import re
  3.  
  4. ascii_alphabet = 'abcdefghijklmnopqrstuvwxyz'
  5. alpha_numerics = 'abcdefghijklmnopqrstuvwxyz0123456789'
  6. vowels = 'aeiou'
  7. consonants = 'bcdfghjklmnpqrstvwxyz'
  8. vowels_and_consonants = (vowels, consonants)
  9.  
  10.  
  11. def number_to_multi_base(n, b):
  12. """
  13. Convert a number to a multi-base (generalization of base projection).
  14.  
  15. Args:
  16. n: The number to convert
  17. b: The base to convert it to
  18.  
  19. Returns: A list representing the number in the desired base.
  20.  
  21. # When b is just one number, it's the base (for e.g. b=2 means binary base)
  22. >>> number_to_multi_base(3, 2)
  23. [1, 1]
  24. >>> number_to_multi_base(4, 2)
  25. [1, 0, 0]
  26. >>> number_to_multi_base(5, 2)
  27. [1, 0, 1]
  28. # But the cool thing about number_to_multi_base is that you can have a more complex base (any iterable, really)
  29. >>> number_to_multi_base(11, [2, 3])
  30. [1, 2, 1]
  31. >>> number_to_multi_base(12, [2, 3])
  32. [1, 0, 0, 0]
  33. >>> number_to_multi_base(13, [2, 3])
  34. [1, 0, 0, 1]
  35. >>> number_to_multi_base(14, [2, 3])
  36. [1, 0, 1, 0]
  37. >>> number_to_multi_base(15, [2, 3])
  38. [1, 0, 1, 1]
  39. >>> number_to_multi_base(16, [2, 3])
  40. [1, 0, 2, 0]
  41. """
  42. if isinstance(b, (int, float)):
  43. b = [b]
  44. base = cycle(b)
  45.  
  46. if n == 0:
  47. return [0]
  48. digits = []
  49. while n:
  50. b = next(base)
  51. digits.append(int(n % b))
  52. n //= b
  53. return digits[::-1]
  54.  
  55.  
  56. def str_from_num_list(coord, symbols_for_base_idx=vowels_and_consonants, base_phase=0):
  57. """
  58. Make a string from the coordinates (a) of a number in a given base system (infered from symbols_for_base_idx and
  59. base_phase).
  60.  
  61. NOTE: symbols_for_base_idx sets should (in most cases) all be disjoint (but this is not validated!)
  62.  
  63. Args:
  64. coord: An array of integers. Coordinates of a number in a given base system
  65. base_phase: Which base (of symbols_for_base_idx) to start with (and then cycle)
  66. symbols_for_base_idx: Sets of symbols for each base
  67.  
  68. Returns:
  69. A string (which is the mapping of the number (represented by coord).
  70.  
  71. >>> str_from_num_list([1,2,1,2], ['ae', 'xyz'])
  72. 'ezez'
  73. >>> str_from_num_list([1,2,1,0], ['ae', 'xyz'])
  74. 'ezex'
  75. >>>
  76. >>> # [1,2,0,1] is [1,2,1,0], with the last two digits flipped, but you don't get ezxe in the following:
  77. >>> str_from_num_list([1,2,0,1], ['ae', 'xyz'])
  78. 'ezay'
  79. """
  80. n = len(symbols_for_base_idx)
  81. s = ''
  82. for letter_idx, collection_idx in zip(coord, islice(cycle(range(n)), base_phase, None)):
  83. # print(f"{letter_idx} === {collection_idx}")
  84. s += symbols_for_base_idx[collection_idx][letter_idx]
  85. return s
  86.  
  87.  
  88. # TODO: Look into coverage. Couldn't produce 'magic' with ['ai', 'mgc'] or ['mgc', 'ai']
  89. def text_for_num(num, symbols_for_base_idx=vowels_and_consonants):
  90. """
  91. Map a number to a string.
  92. The map is bijective (a.k.a. "1-to-1" if the set of symbols in symbols_for_base_idx are non-overlapping.
  93.  
  94. Args:
  95. num: A number to map to text
  96. symbols_for_base_idx: The sets of symbols to use: A list of strings, each string representing a
  97. collection of symbols to use in each base.
  98.  
  99. Returns:
  100. A string representing the input number.
  101.  
  102. >>> # using the default symbols_for_base_idx (vowels and consonants):
  103. >>> text_for_num(1060)
  104. 'caca'
  105. >>> text_for_num(14818)
  106. 'sapu'
  107. >>> text_for_num(335517)
  108. 'tecon'
  109. >>>
  110. >>> # using custom ones:
  111. >>> text_for_num(153, ['ai', 'gcm'])
  112. 'magic'
  113. """
  114. base_cardinalities = list(map(len, symbols_for_base_idx))
  115. n_bases = len(base_cardinalities)
  116. base_phase = num % n_bases
  117.  
  118. num = (num - base_phase) // n_bases
  119. base = list(islice(cycle(base_cardinalities), base_phase, n_bases + base_phase))
  120. coord = number_to_multi_base(num, base)
  121.  
  122. return str_from_num_list(coord[::-1], symbols_for_base_idx, base_phase)[::-1]
  123.  
  124.  
  125. inf = float('infinity')
  126.  
  127.  
  128. def text_to_pronounceable_text(text,
  129. symbols_for_base_idx=vowels_and_consonants,
  130. captured_alphabet=alpha_numerics,
  131. case_sensitive=False,
  132. max_word_length=30,
  133. artificial_word_sep='_',
  134. assert_no_word_sep_in_text=False
  135. ):
  136. """
  137.  
  138. Args:
  139. text: text you want to map
  140. symbols_for_base_idx: symbols you want to map TO (default is vowels and consonants)
  141. captured_alphabet: the symbols of the words you want to map FROM (essentially, in contrast to filler characters)
  142. case_sensitive: Whether the input text should be lower cased before being processed
  143. max_word_length: The maximum length of a pronounceable word
  144. artificial_word_sep: The separator to separate pronounceable words when the word is too long
  145. assert_no_word_sep_in_text: Whether to assert that artificial_word_sep is not already in the input text
  146. (to avoid clashing and non-invertibility)
  147.  
  148. Returns:
  149. A more pronounceable text, where pronounceable is defined by you, so not my fault if it's not.
  150.  
  151. >>> text_to_pronounceable_text('asd8098 098df')
  152. 'izokagamuta osuhoju'
  153. >>> text_to_pronounceable_text('asd8098 098df', max_word_length=4, artificial_word_sep='_')
  154. 'izo_kaga_muta osu_hoju'
  155. """
  156. if not case_sensitive:
  157. text = text.lower()
  158.  
  159. p = re.compile(f'[{captured_alphabet}]+') # to match the text to be mapped
  160. anti_p = re.compile(f'[^{captured_alphabet}]+') # to match the chunks of separator (not matched) text
  161.  
  162. matched_text = anti_p.split(text)
  163. num_of_character = {c: i for i, c in enumerate(captured_alphabet)} # the numerical mapping of alphabet
  164. base_n = len(captured_alphabet)
  165. # function to get the (base_n) number for a chk
  166. num_of_chk = lambda chk: sum(num_of_character[c] * (base_n ** i) for i, c in enumerate(chk))
  167.  
  168. _text_for_num = lambda num: text_for_num(num, symbols_for_base_idx)
  169. pronounceable_words = [_text_for_num(num_of_chk(chk)) for chk in matched_text]
  170.  
  171. if max_word_length < inf:
  172. def post_process_word(word):
  173. if len(word) > max_word_length:
  174. if assert_no_word_sep_in_text:
  175. assert artificial_word_sep not in text, \
  176. f"Your artificial_word_sep ({artificial_word_sep}) was in the text (so no bijective mapping)"
  177. r = (len(word) % max_word_length)
  178. word_suffix = word[:r]
  179. word_prefix = word[r:]
  180. word = artificial_word_sep.join(map(''.join, zip(*([iter(word_prefix)] * max_word_length))))
  181. if word_suffix:
  182. word = word_suffix + artificial_word_sep + word
  183. return word
  184. else:
  185. return word
  186.  
  187. pronounceable_words = list(map(post_process_word, pronounceable_words))
  188.  
  189. separator_text = p.split(text)
  190.  
  191. if len(pronounceable_words) < len(separator_text):
  192. return ''.join(map(''.join, zip(separator_text, pronounceable_words)))
  193. else:
  194. return ''.join(map(''.join, zip(pronounceable_words, separator_text)))
  195.  
  196.  
  197. class FunTests:
  198. @staticmethod
  199. def print_sequences_in_columns(start_num=3000, end_num=3060):
  200. for i in range(start_num, end_num):
  201. # print(f"-----{i}")
  202. if i % 2:
  203. print("".join(map(str, (text_for_num(i)))))
  204. else:
  205. print("\t" + "".join(map(str, (text_for_num(i)))))
  206.  
  207.  
  208. if __name__ == '__main__':
  209. try:
  210. import argh
  211. except ImportError:
  212. raise ImportError("You don't have argh. You can install it by doing:\n"
  213. " pip install argh\n"
  214. "In your terminal/environment,")
  215.  
  216. argh.dispatch_command(text_to_pronounceable_text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement