Untitled

#!/usr/bin/env python3.6

import pdb
import random
import argparse

from pathlib import Path

class WeedLMAO:

    latin_fragments = [
        'F', 'U', 'TH', 'O', 'R', 'C', 'G', 'W', 'H', 'N', 'I',
        'J', 'EO', 'P', 'X', 'S', 'T', 'B', 'E', 'M', 'L', 'NG',
        'OE', 'D', 'A', 'AE', 'Y', 'IA', 'EA',
    ]

    gematria = [
        'ᚠ', 'ᚢ', 'ᚦ', 'ᚩ', 'ᚱ', 'ᚳ', 'ᚷ', 'ᚹ', 'ᚻ', 'ᚾ',
        'ᛁ', 'ᛄ', 'ᛇ', 'ᛈ', 'ᛉ', 'ᛋ', 'ᛏ', 'ᛒ', 'ᛖ', 'ᛗ',
        'ᛚ', 'ᛝ', 'ᛟ', 'ᛞ', 'ᚪ', 'ᚫ', 'ᚣ', 'ᛡ', 'ᛠ',
    ]

    latin_to_gematria = dict(zip(latin_fragments, gematria))
    latin_to_gematria.update({
        'K': 'ᚳ', 'Q': 'ᚳ', 'Z': 'ᛋ', 'ING': 'ᛝ', 'IO': 'ᛡ',
        'V': 'ᚢ'
    })

    gutenberg_header_jump = 30

    def __init__(self, args):
        self.root = args.source
        self.target = args.target

        if args.single:
            self.process(args.single)

        # file_list = list(self.root.glob('*'))
        # for f in [random.choice(file_list)]:
        c = 0
        for f in self.root.glob('*'):
            if f.is_dir():
                continue
            self.process(f)

    def process(self, f):  # central hub for our processing
        with f.open() as fd:
            try:
                lines = fd.readlines()
            except Exception as e:
                pdb.post_mortem()

        lines = self.cleanup(lines)
        # runetext = self.translate_to_gematria("\n".join(lines))

        new_filename = "%s.gematria" % f.name.split("-")[0]
        new_path = self.target / new_filename

        print("%s ---> %s" % (f, new_path))

        with new_path.open('w') as fd:
            fd.write(runetext)

    """
    project gutenberg has annoying header and footer sections, they are
    indicated by three stars at the beginning of a line. it is definitely
    advised to clean the dump files up before processing them in any way; in
    test runs, i forgot to remove them and some programs i ran calculated
    part of the header text for assumed plaintext. this program strips those
    headers plus a few lines that follow or preceed, depending on context.
    sometimes you have annoying glossaries in the end or other unrelated stuff
    right after the header that you dont care about.

    my purposes included stripping all whitespaces before processing. for your
    purposes, i'd advise commenting out the .replace(" ", "") lines in
    `find_gutenberg_start_header` and `find_gutenberg_end_header` to preserve
    whitespaces.
    """

    def cleanup(self, lines):
        gsh = self.find_gutenberg_start_header(lines)
        if gsh:
            lines = lines[gsh+self.gutenberg_header_jump:]
        esh = self.find_gutenberg_end_header(lines)
        if esh:
            lines = lines[:esh-60]
        return lines

    def find_gutenberg_start_header(self, lines):
        for i, l in enumerate(lines):
            tl = l.upper().replace(" ", "")
            if tl.startswith('***STARTOFTH'):
                return i

    def find_gutenberg_end_header(self, lines):
        for i, l in enumerate(lines):
            tl = l.upper().replace(" ", "")
            if tl.startswith('***ENDOFTH'):
                return i

    def translate_to_gematria(self, t):
        res = ""
        skip = 0
        bigram = ['th', 'eo', 'ng', 'oe', 'ae', 'ia', 'io', 'ea']
        t = t.upper()
        ltg = self.latin_to_gematria

        for i, val in enumerate(t):
            if skip:
                skip -= 1
                continue
            frag_short = t[i:i+2]
            frag_long = t[i:i+3]

            if frag_long == 'ING':
                res += ltg[frag_long]
                skip += 2
                continue
            elif frag_short in bigram:
                res += ltg[frag_short]
                skip += 1
                continue
            res += ltg.get(val, val)
        return res

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-s", "--source", required=True, type=Path,
        help="Selects the folder to traverse"
    )
    parser.add_argument(
        "-t", "--target", required=True, type=Path,
        help="Selects folder to dump collected and converted files into"
    )
    parser.add_argument(
        "-f", "--single", type=Path,
        help="Select single file for analysis"
    )

    args = parser.parse_args()
    WeedLMAO(args)

if __name__ == "__main__":
    main()