Data hosted with ♥ by Pastebin.com - Download Raw - See Original
  1. from pathlib import Path
  2. import numpy as np
  3. import cv2
  4. import depthai as dai
  5. import time
  6.  
  7. CONF_THRESHOLD = 0.4
  8. SHAPE = 320
  9. coco_90 = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "12", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "26", "backpack", "umbrella", "29", "30", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "45", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "66", "dining table", "68", "69", "toilet", "71", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "83", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
  10.  
  11. p = dai.Pipeline()
  12. p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_3)
  13.  
  14. class FPSHandler:
  15.     def __init__(self, cap=None):
  16.         self.timestamp = time.time()
  17.         self.start = time.time()
  18.         self.frame_cnt = 0
  19.     def next_iter(self):
  20.         self.timestamp = time.time()
  21.         self.frame_cnt += 1
  22.     def fps(self):
  23.         return self.frame_cnt / (self.timestamp - self.start)
  24.  
  25. camRgb = p.create(dai.node.ColorCamera)
  26. camRgb.setPreviewSize(SHAPE, SHAPE)
  27. camRgb.setInterleaved(False)
  28. camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
  29. camRgb.setFp16(True) # Model requires FP16 input (due to incorrect conversation)
  30. """
  31. By default, pool size of cam preview is 4, and queue size of nn.input is 5. This means that by default,
  32. because NN is slower than cam preview, all frames will be buffered in the NN input queue,
  33. and therefore camera won't be able to create new frames (no space in the pool). If we set either
  34. preview pool size to >5, or queue size <4, we will be streaming color frames at camera FPS,
  35. and NN will be running on the latest frame only at it's own pace (about 5FPS for this model).
  36. """
  37. camRgb.setNumFramesPool(10,10,10,10,1)
  38.  
  39. # NN that detects faces in the image
  40. nn = p.create(dai.node.NeuralNetwork)
  41. nn.setBlobPath(str(Path("models/efficientdet_lite0_2021.3_6shaves.blob").resolve().absolute()))
  42. nn.input.setBlocking(False)
  43. camRgb.preview.link(nn.input)
  44.  
  45. # Send bouding box from the NN to the host via XLink
  46. nn_xout = p.create(dai.node.XLinkOut)
  47. nn_xout.setStreamName("nn")
  48. nn.out.link(nn_xout.input)
  49. # Send rgb frames to the host
  50. rgb_xout = p.create(dai.node.XLinkOut)
  51. rgb_xout.setStreamName("rgb")
  52. camRgb.preview.link(rgb_xout.input)
  53.  
  54. # Pipeline is defined, now we can connect to the device
  55. with dai.Device(p) as device:
  56.     qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
  57.     qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
  58.     fps = FPSHandler()
  59.     shape = (3, SHAPE, SHAPE)
  60.  
  61.     while True:
  62.         inRgb = qRgb.get()
  63.         # Model needs FP16 so we have to convert color frame back to U8 on the host
  64.         frame = np.array(inRgb.getData()).view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8).copy()
  65.  
  66.         in_nn = qNn.tryGet()
  67.         if in_nn is not None:
  68.             fps.next_iter()
  69.             cv2.putText(frame, "Fps: {:.2f}".format(fps.fps()), (2, SHAPE - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255))
  70.  
  71.             # You can use the line below to print all the (output) layers and their info
  72.             # [print(f"Layer name: {l.name}, Type: {l.dataType}, Dimensions: {l.dims}") for l in in_nn.getAllLayers()]
  73.  
  74.             bb = np.array(in_nn.getLayerFp16('Identity')).reshape(25, 4)
  75.             label = in_nn.getLayerInt32('Identity_1')
  76.             conf = in_nn.getLayerFp16('Identity_2')
  77.  
  78.             for i in range(len(conf)):
  79.                 if CONF_THRESHOLD < conf[i]:
  80.                     bb_det = bb[i]
  81.                     # Limit the bounding box to 0..1
  82.                     bb_det[bb_det > 1] = 1
  83.                     bb_det[bb_det < 0] = 0
  84.                     xy_min = (int(bb_det[1]*SHAPE), int(bb_det[0]*SHAPE))
  85.                     xy_max = (int(bb_det[3]*SHAPE), int(bb_det[2]*SHAPE))
  86.                     # Display detection's BB, label and confidence on the frame
  87.                     cv2.rectangle(frame, xy_min , xy_max, (255, 0, 0), 2)
  88.                     cv2.putText(frame, coco_90[label[i]], (xy_min[0] + 10, xy_min[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
  89.                     cv2.putText(frame, f"{int(conf[i] * 100)}%", (xy_min[0] + 10, xy_min[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
  90.  
  91.         cv2.imshow("rgb", frame)
  92.         if cv2.waitKey(1) == ord('q'):
  93.             break
  94.