Advertisement
Guest User

PdftoTXT

a guest
Jul 5th, 2023
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.78 KB | Source Code | 0 0
  1. import tabula
  2. from tabulate import tabulate
  3. import camelot
  4. import os
  5. os.environ['PATH'] += os.pathsep + r'D:\DEV\Ghostcript\gs10.01.2\bin'
  6.  
  7.  
  8. def extract_lecturers_table(file_path):
  9.     tables = tabula.read_pdf(file_path, pages='all')
  10.     lecturers_table = tables[0].values.tolist()  # Załóżmy, że pierwsza tabela zawiera dane wykładowców
  11.     return lecturers_table
  12.  
  13. def extract_schedule_table(file_path):
  14.     tables = camelot.read_pdf(file_path, pages='all', backend='ghostscript')
  15.     schedule_table = []
  16.     for table in tables:
  17.         table_data = table.df.iloc[:, :4].values.tolist()  # Załóżmy, że cztery kolumny zawierają godziny, daty i sale
  18.         schedule_table.extend(table_data)
  19.     return schedule_table
  20.  
  21. def save_table_to_txt(table_contents, output_file):
  22.     table_str = tabulate(table_contents, headers="firstrow", tablefmt='grid')
  23.     with open(output_file, 'w', encoding='utf-8') as file:
  24.         file.write(table_str)
  25.  
  26. # Ścieżka do Twojego pliku PDF
  27. pdf_file_path = r'C:\Users\deade\Desktop\inz_03072023\st-informatyka-sem-6.pdf'
  28. output_file_path = r'C:\Users\deade\Desktop\tabela.txt'
  29.  
  30. # Ekstrahuj dane z tabel
  31. lecturers_table = extract_lecturers_table(pdf_file_path)
  32. schedule_table = extract_schedule_table(pdf_file_path)
  33.  
  34. # Wyświetl zawartość tabel
  35. lecturers_table_str = tabulate(lecturers_table, headers="firstrow", tablefmt='grid')
  36. print("Tabela wykładowców:")
  37. print(lecturers_table_str)
  38. print()
  39.  
  40. schedule_table_str = tabulate(schedule_table, headers=["Godzina", "Data", "Sala"], tablefmt='grid')
  41. print("Tabela planu zajęć:")
  42. print(schedule_table_str)
  43.  
  44. # Zapisz tabele do pliku tekstowego
  45. save_table_to_txt(lecturers_table, output_file_path + '_wykladowcy.txt')
  46. save_table_to_txt(schedule_table, output_file_path + '_plan_zajec.txt')
  47.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement