Untitled

import subprocess
import pandas as pd
import os

def convert_pdf_to_text(pdf_path, text_path):
    # use Ghostscript for PDF in text file
    subprocess.run(["gs", "-q", "-dNOPAUSE", "-dBATCH", "-sDEVICE=txtwrite", f"-sOutputFile={text_path}", pdf_path])

def convert_text_to_xlsx(text_path, xlsx_path):
    if os.path.exists(text_path):
        with open(text_path, 'r') as file:
            # Read lines of text
            lines = file.readlines()
            data = [tuple(line.split()) for line in lines]

        df = pd.DataFrame(data)
        df.to_excel(xlsx_path, index=False, header=False)
        print(f"Plik XLSX {xlsx_path} utworzony.")
    else:
        print(f"Plik tekstowy {text_path} nie istnieje.")

if __name__ == "__main__":
    hostname = "michael"
    pdf_file = f"/home/{hostname}/SIWB/TEST_XLSX/inf-s1.pdf"
    text_file = f"/home/{hostname}/SIWB/TEST_XLSX/plik.txt"
    xlsx_file = f"/home/{hostname}/SIWB/TEST_XLSX/plik.xlsx"

    convert_pdf_to_text(pdf_file, text_file)
    print(f"Plik tekstowy {text_file} utworzony.")
    convert_text_to_xlsx(text_file, xlsx_file)