Advertisement
Guest User

Untitled

a guest
Sep 15th, 2019
152
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.53 KB | None | 0 0
  1. #!/usr/bin/env python3.6
  2.  
  3. import pdb
  4. import random
  5. import argparse
  6.  
  7. from pathlib import Path
  8.  
  9. class WeedLMAO:
  10.  
  11. latin_fragments = [
  12. 'F', 'U', 'TH', 'O', 'R', 'C', 'G', 'W', 'H', 'N', 'I',
  13. 'J', 'EO', 'P', 'X', 'S', 'T', 'B', 'E', 'M', 'L', 'NG',
  14. 'OE', 'D', 'A', 'AE', 'Y', 'IA', 'EA',
  15. ]
  16.  
  17. gematria = [
  18. 'ᚠ', 'ᚢ', 'ᚦ', 'ᚩ', 'ᚱ', 'ᚳ', 'ᚷ', 'ᚹ', 'ᚻ', 'ᚾ',
  19. 'ᛁ', 'ᛄ', 'ᛇ', 'ᛈ', 'ᛉ', 'ᛋ', 'ᛏ', 'ᛒ', 'ᛖ', 'ᛗ',
  20. 'ᛚ', 'ᛝ', 'ᛟ', 'ᛞ', 'ᚪ', 'ᚫ', 'ᚣ', 'ᛡ', 'ᛠ',
  21. ]
  22.  
  23. latin_to_gematria = dict(zip(latin_fragments, gematria))
  24. latin_to_gematria.update({
  25. 'K': 'ᚳ', 'Q': 'ᚳ', 'Z': 'ᛋ', 'ING': 'ᛝ', 'IO': 'ᛡ',
  26. 'V': 'ᚢ'
  27. })
  28.  
  29. gutenberg_header_jump = 30
  30.  
  31. def __init__(self, args):
  32. self.root = args.source
  33. self.target = args.target
  34.  
  35. if args.single:
  36. self.process(args.single)
  37.  
  38. # file_list = list(self.root.glob('*'))
  39. # for f in [random.choice(file_list)]:
  40. c = 0
  41. for f in self.root.glob('*'):
  42. if f.is_dir():
  43. continue
  44. self.process(f)
  45.  
  46. def process(self, f): # central hub for our processing
  47. with f.open() as fd:
  48. try:
  49. lines = fd.readlines()
  50. except Exception as e:
  51. pdb.post_mortem()
  52.  
  53. lines = self.cleanup(lines)
  54. # runetext = self.translate_to_gematria("\n".join(lines))
  55.  
  56. new_filename = "%s.gematria" % f.name.split("-")[0]
  57. new_path = self.target / new_filename
  58.  
  59. print("%s ---> %s" % (f, new_path))
  60.  
  61. with new_path.open('w') as fd:
  62. fd.write(runetext)
  63.  
  64. """
  65. project gutenberg has annoying header and footer sections, they are
  66. indicated by three stars at the beginning of a line. it is definitely
  67. advised to clean the dump files up before processing them in any way; in
  68. test runs, i forgot to remove them and some programs i ran calculated
  69. part of the header text for assumed plaintext. this program strips those
  70. headers plus a few lines that follow or preceed, depending on context.
  71. sometimes you have annoying glossaries in the end or other unrelated stuff
  72. right after the header that you dont care about.
  73.  
  74. my purposes included stripping all whitespaces before processing. for your
  75. purposes, i'd advise commenting out the .replace(" ", "") lines in
  76. `find_gutenberg_start_header` and `find_gutenberg_end_header` to preserve
  77. whitespaces.
  78. """
  79.  
  80. def cleanup(self, lines):
  81. gsh = self.find_gutenberg_start_header(lines)
  82. if gsh:
  83. lines = lines[gsh+self.gutenberg_header_jump:]
  84. esh = self.find_gutenberg_end_header(lines)
  85. if esh:
  86. lines = lines[:esh-60]
  87. return lines
  88.  
  89. def find_gutenberg_start_header(self, lines):
  90. for i, l in enumerate(lines):
  91. tl = l.upper().replace(" ", "")
  92. if tl.startswith('***STARTOFTH'):
  93. return i
  94.  
  95. def find_gutenberg_end_header(self, lines):
  96. for i, l in enumerate(lines):
  97. tl = l.upper().replace(" ", "")
  98. if tl.startswith('***ENDOFTH'):
  99. return i
  100.  
  101. def translate_to_gematria(self, t):
  102. res = ""
  103. skip = 0
  104. bigram = ['th', 'eo', 'ng', 'oe', 'ae', 'ia', 'io', 'ea']
  105. t = t.upper()
  106. ltg = self.latin_to_gematria
  107.  
  108. for i, val in enumerate(t):
  109. if skip:
  110. skip -= 1
  111. continue
  112. frag_short = t[i:i+2]
  113. frag_long = t[i:i+3]
  114.  
  115. if frag_long == 'ING':
  116. res += ltg[frag_long]
  117. skip += 2
  118. continue
  119. elif frag_short in bigram:
  120. res += ltg[frag_short]
  121. skip += 1
  122. continue
  123. res += ltg.get(val, val)
  124. return res
  125.  
  126. def main():
  127. parser = argparse.ArgumentParser()
  128. parser.add_argument(
  129. "-s", "--source", required=True, type=Path,
  130. help="Selects the folder to traverse"
  131. )
  132. parser.add_argument(
  133. "-t", "--target", required=True, type=Path,
  134. help="Selects folder to dump collected and converted files into"
  135. )
  136. parser.add_argument(
  137. "-f", "--single", type=Path,
  138. help="Select single file for analysis"
  139. )
  140.  
  141. args = parser.parse_args()
  142. WeedLMAO(args)
  143.  
  144. if __name__ == "__main__":
  145. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement