Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import PyPDF2
- import pandas as pd
- import numpy as np
- lines = []
- sites = []
- kinds = []
- total_offqc_wip_inv = []
- total_offqc_scale_inv = []
- total_offqc_truck_inv = []
- total_offqc_rail_inv = []
- total_offqc_boat_inv = []
- # creating a pdf file object
- pdfFileObj = open('PDFs/Inventory_Summary.pdf', 'rb')
- # creating a pdf reader object
- pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
- count = pdfReader.numPages
- # creating a page object
- pageObj0 = pdfReader.getPage(0)
- pageObj1 = pdfReader.getPage(1)
- pageObj2 = pdfReader.getPage(2)
- pageObj3 = pdfReader.getPage(3)
- pageObj4 = pdfReader.getPage(4)
- pageObj5 = pdfReader.getPage(5)
- # extracting text from page
- page0 = pageObj0.extractText().strip()
- page1 = pageObj1.extractText().strip()
- page2 = pageObj2.extractText().strip()
- page3 = pageObj3.extractText().strip()
- page4 = pageObj4.extractText().strip()
- page5 = pageObj5.extractText().strip()
- corrected_page0 = page0.split('07:43am')[+1]
- corrected_page1 = page1.split('07:43am')[+1]
- corrected_page2 = page2.split('07:43am')[+1]
- corrected_page3 = page3.split('07:43am')[+1]
- corrected_page4 = page4.split('07:43am')[+1]
- corrected_page5 = page5.split('07:43am')[+1]
- for line in page0.splitlines():
- if 'Site' in line:
- for word in line.split():
- if word != 'Site':
- sites.append(word)
- if 'All Shifts' in line:
- for word in line.split():
- if word != 'All':
- if word != 'Shifts':
- kinds.append(word)
- if 'Total OffQc WIP Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'WIP':
- if word != 'Inv':
- total_offqc_wip_inv.append(word)
- if 'Total OffQc Scale Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'Scale':
- if word != 'Inv':
- total_offqc_scale_inv.append(word)
- if 'Total OffQc Truck Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'Truck':
- if word != 'Inv':
- total_offqc_truck_inv.append(word)
- for line in page1.splitlines():
- if 'Total OffQc Rail Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'Rail':
- if word != 'Inv':
- total_offqc_rail_inv.append(word)
- if 'Total OffQc Boat Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'Boat':
- if word != 'Inv':
- total_offqc_boat_inv.append(word)
- for line in page3.splitlines():
- if 'Site' in line:
- for word in line.split():
- if word != 'Site':
- sites.append(word)
- if 'All Shifts' in line:
- for word in line.split():
- if word != 'All':
- if word != 'Shifts':
- kinds.append(word)
- if 'Total OffQc WIP Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'WIP':
- if word != 'Inv':
- total_offqc_wip_inv.append(word)
- if 'Total OffQc Scale Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'Scale':
- if word != 'Inv':
- total_offqc_scale_inv.append(word)
- if 'Total OffQc Truck Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'Truck':
- if word != 'Inv':
- total_offqc_truck_inv.append(word)
- for line in page4.splitlines():
- if 'Total OffQc Rail Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'Rail':
- if word != 'Inv':
- total_offqc_rail_inv.append(word)
- if 'Total OffQc Boat Inv' in line:
- for word in line.split():
- if word != 'Total':
- if word != 'OffQc':
- if word != 'Boat':
- if word != 'Inv':
- total_offqc_boat_inv.append(word)
- sites.append("Total")
- d = np.column_stack([sites, kinds, total_offqc_wip_inv, total_offqc_scale_inv, total_offqc_truck_inv, total_offqc_rail_inv, total_offqc_boat_inv])
- df = pd.DataFrame(d)
- # closing the pdf file object
- pdfFileObj.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement