Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from tabula import read_pdf
- import pandas as pd
- import unicodedata
- #from tabulate import tabulate
- file_path = r"{}"
- df_list = read_pdf(file_path, pages='all',multiple_tables=True,pandas_options={'header':None})
- print("Total number of tables scanned: ", len(df_list))
- #Create a dataframe containing the first table
- df=pd.DataFrame(data=df_list[0]) #,columns=("Assets","Code","Debits No","Debits Amount","Credit No","Credit Amount","Book Balance","Book AVR. BAL(Month)"))
- #binding all the rest tables
- for i in range(0,len(df_list)):
- if i<len(df_list)-1:
- df = pd.concat([df,df_list[i+1]], axis=0)
- #Name all the columns
- #df.columns = ["Assets","Code","Debits No","Debits Amount","Credit No","Credit Amount","Book Balance","Book AVR. BAL(Month)"]
- df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement