Advertisement
Guest User

Untitled

a guest
Jun 17th, 2025
8
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.85 KB | None | 0 0
  1. import torch
  2. from PIL import Image
  3. import open_clip
  4.  
  5. model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
  6. model.eval()
  7. tokenizer = open_clip.get_tokenizer('ViT-B-32')
  8.  
  9. image = preprocess(Image.open("source.webp")).unsqueeze(0)
  10. classes = [
  11. "a bird",
  12. "a plane",
  13. "a helicopter",
  14. "superman"
  15. ]
  16. text = tokenizer(classes)
  17.  
  18. with torch.no_grad(), torch.autocast("cuda"):
  19. image_features = model.encode_image(image)
  20. text_features = model.encode_text(text)
  21. image_features /= image_features.norm(dim=-1, keepdim=True)
  22. text_features /= text_features.norm(dim=-1, keepdim=True)
  23.  
  24. text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)[0]
  25.  
  26. best_guess = torch.argmax(text_probs)
  27.  
  28. print(classes[best_guess] if text_probs[best_guess] > 0.1 else "unknown")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement