Untitled

# importy bibliotek, pandas to operacje na macierzach, argparse to przyjmowanie argumentow z commandlina, sys do wychodzenia w razie zlego pliku, logging do ustawiania czy program ma pisac co robie (opcja --verbose (-v)), re to regexpy - wyrazenia regularne do weryfikacji czy inputem jest niepusty numer w sys, dwojkowym
import pandas as pd
import argparse
import sys
import logging as log
import re

# definiuje argumenty przyjmowane przez program
parser = argparse.ArgumentParser(description="Program encodes given sentence to binary format using ANS encoding")
parser.add_argument("-i", "--interactive", action="store_true", help="Launch program in interactive mode")
parser.add_argument("-t", "--encoding-table", metavar="FILE_NAME", nargs="?", default="",
    help="Path to encoding table. Encoding table should be csv file, comma separated, with characters in first row and '-' if no value is available for given cell.")
parser.add_argument("-e", "--encoding-sequence", nargs="?", default="", help="Starting encoding sequence in binary")
parser.add_argument("-s", "--sentence", nargs="?", default="", help="Sentence to be encoded")
parser.add_argument("-v", "--verbose", action="store_true", help="Set output to verbose mode, so algorithm logs what its doing")

args = parser.parse_args() # zczytanie argumentów z wiersza polecen

if(args.verbose): # ustawienie poziomu logowania w zaleznosci czy jest opcja verbose ustawiona
    log.basicConfig(format="%(message)s", level=log.DEBUG)
else:
    log.basicConfig(format="%(message)s", level=log.WARN)


# funkcja przyjmująca znak i dataframe (dataframe to macierz)
def get_encoding_list(character, dataframe):
    encoding_list = dataframe[character].tolist() # pobiera z macierzy kolumnę o nazwie podanego znaku i robi z niej liste (bo domyślnie jest to obiekt Series - czyli taki wektor)

    for x in encoding_list: # przechodzi po całej liście encoding list testujac znaki
        try:
            if(x != "-"): # jesli znak nie jest pauza to sproboj zrzutowac go na liczbe calkowita
                int(x)
        except ValueError: # jesli znak wywali blad przy konwersji ze stringa na inta (czyli nim nie jest) to wypisz blad i wyjdz
            print("Error, found character '" + str(x) + "' in csv file. In column '" + character + "'")
            if(str(x) == "nan"): # nan to bedzie pusty znak
                print("Search for empty columns in file (,,)")
            print("\nFaulty column values: " + encoding_list)
            sys.exit()

    return [int(x) for x in encoding_list if x != "-"] # list comprehension w pythonie - skoro tu doszlismy to mozemy spokojnie zrzutowac cala liste znakow na liste liczb calkowitych wylaczajac myslniki (mozna to czytac jako dla kazdego znaku w encoding_list ktory jest rozny od '-' zrzutuj go na inta)

# funkcja zamieniajaca liczbe dziesietna na stringa binarnego
def tobin(x):
    return "{:b}".format(x)

# i na odwrot
def frombin(x):
    return int(x, 2)

#
def get_next_encoding(encoding_list, encoding_sequence):
    encoded_sequence = ""
    log.debug("Trying to get index " + encoding_sequence + " (" + str(frombin(encoding_sequence)) + ") from character column that have " + str(len(encoding_list)) +  " elements.")

    while(frombin(encoding_sequence) >= len(encoding_list)): # petla dodaje nowe kolejne znaki do encoded_sequence dopoki ilosc zer/jedynek w encoding_sequence nie przekroczy ilosci elementow w encoding_list
        encoded_sequence += encoding_sequence[-1] # dodaje do encoded_sequence ostatni element encoding_sequence
        encoding_sequence = encoding_sequence[:-1] # usuwa z encoding_sequence ostatni element

    encoded_sequence = encoded_sequence[::-1] # odwraca listę

    if(encoded_sequence == ""):
        log.debug("Removed nothing from sequence, found " + str(encoding_list[frombin(encoding_sequence) - 1]) + " at index " + encoding_sequence + " (" + str(frombin(encoding_sequence)) + ")")
    else:
        log.debug("Removed " + encoded_sequence + " from sequence and found " + str(tobin(encoding_list[frombin(encoding_sequence) - 1])) + " (" + str(encoding_list[frombin(encoding_sequence) - 1]) + ") at index " + encoding_sequence + " (" + str(frombin(encoding_sequence)) + ")")

    return encoded_sequence, tobin(encoding_list[frombin(encoding_sequence) - 1]) # zwracamy tablicę 2 elementowa gdzie 1 element to encoded_sequence, a drugi to encoding_sequence pomniejszony o 1 (zwracany w formie stringa zer/jedynek)

def load_table(file_name): # pobiera dane z pliku csv
    df = pd.read_csv(file_name, dtype=str) # wczytanie csv do macierzy
    encoding_dict = {character:get_encoding_list(character, df) for character in list(df)} # stworzenia slownika gdzie klucz to znak a wartosc to jego kodowanie
    log.debug("Characters loaded: " + str([x[0] for x in encoding_dict.items()]))
    return encoding_dict

def encode(encoding_dict, sentence, starting_sequence): # funkcja kodujaca podanny tekst przy uzyciu slownika i poczatkowej sekwencji
    log.debug("Starting encoding '" + sentence + "' with sequence " + starting_sequence)

    sentence_encoded = ""
    encoding_sequence = starting_sequence

    for character in sentence: # przechodzi po kazdym znaku po kolei w sekwencji
        log.debug("Encoding character " + character)
        character_encoded, encoding_sequence = get_next_encoding(encoding_dict[character], encoding_sequence) # wywolanie funcji get_next_encoding zwraca 2 rzeczy na raz, mozna to robic w pythonie
        sentence_encoded += character_encoded # konkatenacja zakodowanego znaku do ogolnego ciagu kodowania
        log.debug("Current encoded sequence is " + (sentence_encoded if sentence_encoded else "empty"))
    log.debug("Adding last sequence (" + encoding_sequence + ") to the result")
    sentence_encoded += encoding_sequence
    log.debug("Current encoded sequence is " + (sentence_encoded if sentence_encoded else "empty"))

    log.debug("Encoding finished")
    return sentence_encoded

def get_user_input(prompt, default_value): # funkcja do pobierania danych od uzytkownika w wersji interaktywnej, nic ciekawego
    if(default_value == ""):
        print(prompt)
        return input(), True
    else:
        print(prompt + "(default '" + default_value + "') ")
        user_input = input()
        if(user_input != ""):
            return user_input, True
        else:
            return default_value, False

def check_sentence_characters(available_characters, sentence): # funkcja sprawdza czy w sentence sa uzywane znaki tylko zawarte w loscie available_characters
    for character in sentence:
        if character not in available_characters:
            return False
    return True

def interactive_encoding(): # wersja interaktywna, czyli zamiast podac przy wywolaniu to sobie wpisujemy po kolei co tam rząda program
    file_name = ""
    encoding_sequence = ""
    sentence = ""
    encoding_dict = {}
    while True:
        while True:
            file_name, changed = get_user_input("Enter encoding table path: ", file_name)
            if(changed):
                print("Loading file...")
            try:
                encoding_dict = load_table(file_name)
                break
            except FileNotFoundError:
                print("File '" + file_name + "' not found. Please check file path.")
                file_name = ""
        encoding_sequence, _ = get_user_input("Enter starting encoding sequence in binary: ", encoding_sequence)
        sentence, _ = get_user_input("Enter sentence to encode: ", sentence)
        if(not re.match("[01]+", encoding_sequence)):
            print("ERROR: Starting sequence should be non-empty binary number. Skipping encoding. Please enter valid starting sequence.")
            continue
        if(sentence == ""):
            print("WARNING: sentence to encode is empty")
        if(not check_sentence_characters([x[0] for x in encoding_dict.items()], sentence)):
            print("ERROR: sentence contains chracters not available in encoding table. Please note its case sensitive.")
            continue
        print("Encoded sentence: " + encode(encoding_dict, sentence, encoding_sequence) + "\n\n")

if(args.interactive): # tutaj zaczyna sie glowne wywolanie programu
    interactive_encoding() # jesli wersja interaktywna to wywolac wersje interaktywna xD
elif(len(sys.argv) == 1): # jak jest za malo argumentow to dochodzimy do wniosku ze uzytkownik nie wie co robi i  i tak odpalamy wersje interaktywna
    print("Warning: no command line arguments, falling back to interactive mode.\n" +
        "Please use -i option to launch interactive mode without warnings")
    interactive_encoding()
else: # a jak juz podaje jakies argumenty no to jazda z wersja standardowa
    encoding_dict = {}
    try:
        encoding_dict = load_table(args.encoding_table)
    except FileNotFoundError:
        print("File '" + args.encoding_table + "' not found. Please check file path.")
        exit()
    if(not re.match("[01]+", args.encoding_sequence)): # sprawdzenie czy podany encoding_sequence zawiera same zera i jedynki i jest niepusty przy uzyciu wyrazenia regularnego [01]+ ktore oznacza "ciag ma zawierać jedno lub wiecej (+) wystapienie znakow z grupy 0,1 ([] to ozznaczenie grupy)"
        print("ERROR: Starting sequence should be non-empty binary number. Skipping encoding. Please enter valid starting sequence.")
        exit()
    if(args.sentence == ""):
        print("WARNING: sentence to encode is empty")
    if(not check_sentence_characters([x[0] for x in encoding_dict.items()], args.sentence)): # [x[0] for x in encoding_dict.items()] konwertuje encoding_dict na liste kluczy słownika, jestem dzbanem bo jest metoda .keys(), nie wiem czemu jej nie uzylem
        print("ERROR: sentence contains chracters not available in encoding table. Please note its case sensitive.")
        exit()
    print(encode(encoding_dict, args.sentence, args.encoding_sequence))