Advertisement
Guest User

Untitled

a guest
Aug 20th, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.74 KB | None | 0 0
  1. from tabula import read_pdf
  2. import pandas as pd
  3. import unicodedata
  4.  
  5. #from tabulate import tabulate
  6. file_path = r"{}"
  7. df_list = read_pdf(file_path, pages='all',multiple_tables=True,pandas_options={'header':None})
  8.  
  9. print("Total number of tables scanned: ", len(df_list))
  10.  
  11. #Create a dataframe containing the first table
  12. df=pd.DataFrame(data=df_list[0]) #,columns=("Assets","Code","Debits No","Debits Amount","Credit No","Credit Amount","Book Balance","Book AVR. BAL(Month)"))
  13.  
  14. #binding all the rest tables
  15. for i in range(0,len(df_list)):
  16. if i<len(df_list)-1:
  17. df = pd.concat([df,df_list[i+1]], axis=0)
  18.  
  19. #Name all the columns
  20. #df.columns = ["Assets","Code","Debits No","Debits Amount","Credit No","Credit Amount","Book Balance","Book AVR. BAL(Month)"]
  21.  
  22. df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement