Untitled

import torch
import cv2
import numpy as np
import json
from torchvision import transforms
from PIL import Image

model = torch.hub.load('pytorch/vision', 'mobilenet_v2', pretrained=True).cuda()
model.eval()

with open("labels.json") as labels:
    labels = json.load(labels)


preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

cap = cv2.VideoCapture(2)

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    # Our operations on the 1frame come here
    frame2 = Image.fromarray(frame)

    frame2 = preprocess(frame2).cuda()
    output = model(frame2.unsqueeze(0))
    print(output.argmax())
    output = int(output.argmax().cpu().numpy())
    print(labels[str(output)])
    cv2.imshow("frame",frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()