Untitled

from unsloth import FastVisionModel
from pypdf import PdfReader
import pypdfium2 as pdfium
from PIL import Image
import io
import json
from unsloth import FastLanguageModel

model, tokenizer = FastVisionModel.from_pretrained(
    "lora_model",
    load_in_4bit=True
)

def process_invoice(pdf_path):

    with open(pdf_path, 'rb') as f:
        pdf_bytes = f.read()

    pdf_document = pdfium.PdfDocument(pdf_bytes)
    images = []

    FastLanguageModel.for_inference(model)

    for page_number, page in enumerate(pdf_document):
        image_pil = page.render(scale=.5).to_pil()
        images.append(image_pil)

    image_inputs = [{"type": "image", "image": image} for image in images]

    user_instruction = """You are an image processing language model specialized in extracting invoice details from images. Given an image of an invoice, extract the following fields:

            invoice_number: The invoice number.
            billing_date: The billing date.
            supplier_vat: The supplier's VAT number.
            supplier_country: The supplier's country.
            supplier_name: The supplier's name.
            customer_vat: The customer's VAT number.
            customer_country: The customer's country.
            customer_address_street: The customer's address street.
            customer_address_zip_code: The customer's zip code.
            customer_address_city: The customer's city.
            total_invoice_without_taxes: The total amount of the invoice without taxes.
            total_invoice_with_taxes: The total amount of the invoice with taxes.
            invoice_currency: The currency of the invoice.
            invoice_description: A description of the invoice.
            invoice_type: The type of the invoice.
            observation: Any additional observations.
            If any field is not present in the image, set its value to null.

            Return the extracted data as a JSON object with the keys exactly as listed above.
    """

    system_message = "You are an AI assistant for invoice fields extraction. You will receive the text of an invoice. You will receive a list of fields to fill in. Think step by step and extract the requested fields, always in text, one by one, with attention to detail"

    messages = [
        {"role": "system", "content": [{"type": "text", "text": system_message}]},
        {"role": "user", "content": image_inputs + [{"type": "text", "text": user_instruction}]}
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt = True,
        tokenize=True,
        return_tensors="pt").to('cuda')

    print(inputs)

    outputs = model.generate(
        inputs
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    try:
        extracted_data = json.loads(response)
        return extracted_data
    except:
        return {"error": "Failed to parse model output", "raw_output": response}

result = process_invoice("CPV 206.pdf")
print(result)