Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import argparse
- import base64
- import requests
- import fitz # PyMuPDF
- import io
- def process_pdf(file_path):
- try:
- doc = fitz.open(file_path)
- processed_images = []
- for page in doc:
- pix = page.get_pixmap()
- img_bytes = pix.tobytes("jpeg")
- base64_encoded = base64.b64encode(img_bytes).decode('utf-8')
- processed_images.append(base64_encoded)
- return processed_images, doc.page_count
- except Exception as e:
- print(f"Error processing PDF: {str(e)}")
- return None, None
- class LLMProcessor:
- def __init__(self, api_url, api_password):
- self.api_url = api_url
- self.headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_password}",
- }
- def send_image_to_llm(self, base64_image, page_number, total_pages):
- prompt = f"<|im_start|>user\nRepeat verbatim all text on the image.<|im_end|>\n<|im_start|>assistant\n"
- payload = {
- "prompt": prompt,
- "max_length": 2048,
- "images": [base64_image],
- "temp": 0,
- }
- response = requests.post(f"{self.api_url}/api/v1/generate", json=payload, headers=self.headers)
- if response.status_code == 200:
- return response.json()["results"][0].get("text")
- else:
- print(f"Error: {response.status_code} - {response.text}")
- return None
- def main():
- parser = argparse.ArgumentParser(description="Send all PDF images to LLM API")
- parser.add_argument("pdf_path", help="Path to the PDF file")
- parser.add_argument("--api-url", default="http://localhost:5001", help="URL for the LLM API")
- parser.add_argument("--api-password", default="", help="Password for the LLM API")
- args = parser.parse_args()
- llm_processor = LLMProcessor(args.api_url, args.api_password)
- base64_images, total_pages = process_pdf(args.pdf_path)
- if base64_images and total_pages:
- print(f"Processing PDF with {total_pages} pages.")
- for i, base64_image in enumerate(base64_images, start=1):
- print(f"\nProcessing page {i} of {total_pages}:")
- result = llm_processor.send_image_to_llm(base64_image, i, total_pages)
- if result:
- print("LLM Response:")
- print(result)
- else:
- print(f"Failed to get a response from the LLM for page {i}.")
- else:
- print("Failed to process the PDF.")
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment