SHARE
TWEET

Untitled

a guest Sep 17th, 2019 97 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from itertools import cycle, islice
  2. import re
  3.  
  4. ascii_alphabet = 'abcdefghijklmnopqrstuvwxyz'
  5. alpha_numerics = 'abcdefghijklmnopqrstuvwxyz0123456789'
  6. vowels = 'aeiou'
  7. consonants = 'bcdfghjklmnpqrstvwxyz'
  8. vowels_and_consonants = (vowels, consonants)
  9.  
  10.  
  11. def number_to_multi_base(n, b):
  12.     """
  13.     Convert a number to a multi-base (generalization of base projection).
  14.  
  15.     Args:
  16.         n: The number to convert
  17.         b: The base to convert it to
  18.  
  19.     Returns: A list representing the number in the desired base.
  20.  
  21.     # When b is just one number, it's the base (for e.g. b=2 means binary base)
  22.     >>> number_to_multi_base(3, 2)
  23.     [1, 1]
  24.     >>> number_to_multi_base(4, 2)
  25.     [1, 0, 0]
  26.     >>> number_to_multi_base(5, 2)
  27.     [1, 0, 1]
  28.     # But the cool thing about number_to_multi_base is that you can have a more complex base (any iterable, really)
  29.     >>> number_to_multi_base(11, [2, 3])
  30.     [1, 2, 1]
  31.     >>> number_to_multi_base(12, [2, 3])
  32.     [1, 0, 0, 0]
  33.     >>> number_to_multi_base(13, [2, 3])
  34.     [1, 0, 0, 1]
  35.     >>> number_to_multi_base(14, [2, 3])
  36.     [1, 0, 1, 0]
  37.     >>> number_to_multi_base(15, [2, 3])
  38.     [1, 0, 1, 1]
  39.     >>> number_to_multi_base(16, [2, 3])
  40.     [1, 0, 2, 0]
  41.     """
  42.     if isinstance(b, (int, float)):
  43.         b = [b]
  44.     base = cycle(b)
  45.  
  46.     if n == 0:
  47.         return [0]
  48.     digits = []
  49.     while n:
  50.         b = next(base)
  51.         digits.append(int(n % b))
  52.         n //= b
  53.     return digits[::-1]
  54.  
  55.  
  56. def str_from_num_list(coord, symbols_for_base_idx=vowels_and_consonants, base_phase=0):
  57.     """
  58.     Make a string from the coordinates (a) of a number in a given base system (infered from symbols_for_base_idx and
  59.     base_phase).
  60.  
  61.     NOTE: symbols_for_base_idx sets should (in most cases) all be disjoint (but this is not validated!)
  62.  
  63.     Args:
  64.         coord: An array of integers. Coordinates of a number in a given base system
  65.         base_phase: Which base (of symbols_for_base_idx) to start with (and then cycle)
  66.         symbols_for_base_idx: Sets of symbols for each base
  67.  
  68.     Returns:
  69.         A string (which is the mapping of the number (represented by coord).
  70.  
  71.     >>> str_from_num_list([1,2,1,2], ['ae', 'xyz'])
  72.     'ezez'
  73.     >>> str_from_num_list([1,2,1,0], ['ae', 'xyz'])
  74.     'ezex'
  75.     >>>
  76.     >>> # [1,2,0,1] is [1,2,1,0], with the last two digits flipped, but you don't get ezxe in the following:
  77.     >>> str_from_num_list([1,2,0,1], ['ae', 'xyz'])
  78.     'ezay'
  79.     """
  80.     n = len(symbols_for_base_idx)
  81.     s = ''
  82.     for letter_idx, collection_idx in zip(coord, islice(cycle(range(n)), base_phase, None)):
  83.         #         print(f"{letter_idx} === {collection_idx}")
  84.         s += symbols_for_base_idx[collection_idx][letter_idx]
  85.     return s
  86.  
  87.  
  88. # TODO: Look into coverage. Couldn't produce 'magic' with ['ai', 'mgc'] or ['mgc', 'ai']
  89. def text_for_num(num, symbols_for_base_idx=vowels_and_consonants):
  90.     """
  91.     Map a number to a string.
  92.     The map is bijective (a.k.a. "1-to-1" if the set of symbols in symbols_for_base_idx are non-overlapping.
  93.  
  94.     Args:
  95.         num: A number to map to text
  96.         symbols_for_base_idx: The sets of symbols to use: A list of strings, each string representing a
  97.             collection of symbols to use in each base.
  98.  
  99.     Returns:
  100.         A string representing the input number.
  101.  
  102.     >>> # using the default symbols_for_base_idx (vowels and consonants):
  103.     >>> text_for_num(1060)
  104.     'caca'
  105.     >>> text_for_num(14818)
  106.     'sapu'
  107.     >>> text_for_num(335517)
  108.     'tecon'
  109.     >>>
  110.     >>> # using custom ones:
  111.     >>> text_for_num(153, ['ai', 'gcm'])
  112.     'magic'
  113.     """
  114.     base_cardinalities = list(map(len, symbols_for_base_idx))
  115.     n_bases = len(base_cardinalities)
  116.     base_phase = num % n_bases
  117.  
  118.     num = (num - base_phase) // n_bases
  119.     base = list(islice(cycle(base_cardinalities), base_phase, n_bases + base_phase))
  120.     coord = number_to_multi_base(num, base)
  121.  
  122.     return str_from_num_list(coord[::-1], symbols_for_base_idx, base_phase)[::-1]
  123.  
  124.  
  125. inf = float('infinity')
  126.  
  127.  
  128. def text_to_pronounceable_text(text,
  129.                                symbols_for_base_idx=vowels_and_consonants,
  130.                                captured_alphabet=alpha_numerics,
  131.                                case_sensitive=False,
  132.                                max_word_length=30,
  133.                                artificial_word_sep='_',
  134.                                assert_no_word_sep_in_text=False
  135.                                ):
  136.     """
  137.  
  138.     Args:
  139.         text: text you want to map
  140.         symbols_for_base_idx: symbols you want to map TO (default is vowels and consonants)
  141.         captured_alphabet: the symbols of the words you want to map FROM (essentially, in contrast to filler characters)
  142.         case_sensitive: Whether the input text should be lower cased before being processed
  143.         max_word_length: The maximum length of a pronounceable word
  144.         artificial_word_sep: The separator to separate pronounceable words when the word is too long
  145.         assert_no_word_sep_in_text: Whether to assert that artificial_word_sep is not already in the input text
  146.             (to avoid clashing and non-invertibility)
  147.  
  148.     Returns:
  149.         A more pronounceable text, where pronounceable is defined by you, so not my fault if it's not.
  150.  
  151.     >>> text_to_pronounceable_text('asd8098 098df')
  152.     'izokagamuta osuhoju'
  153.     >>> text_to_pronounceable_text('asd8098 098df', max_word_length=4, artificial_word_sep='_')
  154.     'izo_kaga_muta osu_hoju'
  155.     """
  156.     if not case_sensitive:
  157.         text = text.lower()
  158.  
  159.     p = re.compile(f'[{captured_alphabet}]+')  # to match the text to be mapped
  160.     anti_p = re.compile(f'[^{captured_alphabet}]+')  # to match the chunks of separator (not matched) text
  161.  
  162.     matched_text = anti_p.split(text)
  163.     num_of_character = {c: i for i, c in enumerate(captured_alphabet)}  # the numerical mapping of alphabet
  164.     base_n = len(captured_alphabet)
  165.     # function to get the (base_n) number for a chk
  166.     num_of_chk = lambda chk: sum(num_of_character[c] * (base_n ** i) for i, c in enumerate(chk))
  167.  
  168.     _text_for_num = lambda num: text_for_num(num, symbols_for_base_idx)
  169.     pronounceable_words = [_text_for_num(num_of_chk(chk)) for chk in matched_text]
  170.  
  171.     if max_word_length < inf:
  172.         def post_process_word(word):
  173.             if len(word) > max_word_length:
  174.                 if assert_no_word_sep_in_text:
  175.                     assert artificial_word_sep not in text, \
  176.                         f"Your artificial_word_sep ({artificial_word_sep}) was in the text (so no bijective mapping)"
  177.                 r = (len(word) % max_word_length)
  178.                 word_suffix = word[:r]
  179.                 word_prefix = word[r:]
  180.                 word = artificial_word_sep.join(map(''.join, zip(*([iter(word_prefix)] * max_word_length))))
  181.                 if word_suffix:
  182.                     word = word_suffix + artificial_word_sep + word
  183.                 return word
  184.             else:
  185.                 return word
  186.  
  187.         pronounceable_words = list(map(post_process_word, pronounceable_words))
  188.  
  189.     separator_text = p.split(text)
  190.  
  191.     if len(pronounceable_words) < len(separator_text):
  192.         return ''.join(map(''.join, zip(separator_text, pronounceable_words)))
  193.     else:
  194.         return ''.join(map(''.join, zip(pronounceable_words, separator_text)))
  195.  
  196.  
  197. class FunTests:
  198.     @staticmethod
  199.     def print_sequences_in_columns(start_num=3000, end_num=3060):
  200.         for i in range(start_num, end_num):
  201.             #     print(f"-----{i}")
  202.             if i % 2:
  203.                 print("".join(map(str, (text_for_num(i)))))
  204.             else:
  205.                 print("\t" + "".join(map(str, (text_for_num(i)))))
  206.  
  207.  
  208. if __name__ == '__main__':
  209.     try:
  210.         import argh
  211.     except ImportError:
  212.         raise ImportError("You don't have argh. You can install it by doing:\n"
  213.                           "     pip install argh\n"
  214.                           "In your terminal/environment,")
  215.  
  216.     argh.dispatch_command(text_to_pronounceable_text)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top