Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tabula
- from tabulate import tabulate
- import camelot
- import os
- os.environ['PATH'] += os.pathsep + r'D:\DEV\Ghostcript\gs10.01.2\bin'
- def extract_lecturers_table(file_path):
- tables = tabula.read_pdf(file_path, pages='all')
- lecturers_table = tables[0].values.tolist() # Załóżmy, że pierwsza tabela zawiera dane wykładowców
- return lecturers_table
- def extract_schedule_table(file_path):
- tables = camelot.read_pdf(file_path, pages='all', backend='ghostscript')
- schedule_table = []
- for table in tables:
- table_data = table.df.iloc[:, :4].values.tolist() # Załóżmy, że cztery kolumny zawierają godziny, daty i sale
- schedule_table.extend(table_data)
- return schedule_table
- def save_table_to_txt(table_contents, output_file):
- table_str = tabulate(table_contents, headers="firstrow", tablefmt='grid')
- with open(output_file, 'w', encoding='utf-8') as file:
- file.write(table_str)
- # Ścieżka do Twojego pliku PDF
- pdf_file_path = r'C:\Users\deade\Desktop\inz_03072023\st-informatyka-sem-6.pdf'
- output_file_path = r'C:\Users\deade\Desktop\tabela.txt'
- # Ekstrahuj dane z tabel
- lecturers_table = extract_lecturers_table(pdf_file_path)
- schedule_table = extract_schedule_table(pdf_file_path)
- # Wyświetl zawartość tabel
- lecturers_table_str = tabulate(lecturers_table, headers="firstrow", tablefmt='grid')
- print("Tabela wykładowców:")
- print(lecturers_table_str)
- print()
- schedule_table_str = tabulate(schedule_table, headers=["Godzina", "Data", "Sala"], tablefmt='grid')
- print("Tabela planu zajęć:")
- print(schedule_table_str)
- # Zapisz tabele do pliku tekstowego
- save_table_to_txt(lecturers_table, output_file_path + '_wykladowcy.txt')
- save_table_to_txt(schedule_table, output_file_path + '_plan_zajec.txt')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement