Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from PIL import Image
- from transformers import AutoModelForCausalLM
- from transformers import AutoProcessor
- import torch
- model_path = "my_model_path"
- # Note: set _attn_implementation='eager' if you don't have flash_attn installed
- model = AutoModelForCausalLM.from_pretrained(
- model_path,
- device_map="auto",
- trust_remote_code=True,
- torch_dtype="auto",
- _attn_implementation='flash_attention_2'
- )
- # for best performance, use num_crops=4 for multi-frame, num_crops=16 for single-frame.
- processor = AutoProcessor.from_pretrained(model_path,
- trust_remote_code=True,
- num_crops=16
- )
- image = Image.open("image_path")
- messages = [
- {"role": "user", "content": f"<|image_1|>\n Extract all the text you see. The language is Roamanian."},
- ]
- prompt = processor.tokenizer.apply_chat_template(
- messages,
- tokenize=False,
- add_generation_prompt=True
- )
- inputs = processor(prompt, image, return_tensors="pt")
- generation_args = {
- "max_new_tokens": 1000,
- "temperature": 0.0,
- "do_sample": False,
- }
- with torch.no_grad():
- generate_ids = model.generate(**inputs,
- eos_token_id=processor.tokenizer.eos_token_id,
- **generation_args
- )
- # remove input tokens
- generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
- response = processor.batch_decode(generate_ids,
- skip_special_tokens=True,
- clean_up_tokenization_spaces=False)[0]
- print(response)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement