Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Download and create graph
- maybe_download_and_extract()
- create_graph()
- # Variables declarations
- frame_count=0
- score=0
- start = time.time()
- pygame.mixer.init()
- pred=0
- last=0
- human_str=None
- font=cv2.FONT_HERSHEY_TRIPLEX
- font_color=(255,255,255)
- # Init video stream
- vs = VideoStream(src=0).start()
- # Start tensroflow session
- with tf.Session() as sess:
- softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
- while True:
- frame = vs.read()
- frame_count+=1
- # Only run every 5 frames
- if frame_count%5==0:
- # Save the image as the fist layer of inception is a DecodeJpeg
- cv2.imwrite("current_frame.jpg",frame)
- image_data = tf.gfile.FastGFile("./current_frame.jpg", 'rb').read()
- predictions=sess.run(softmax_tensor,{'DecodeJpeg/contents:0':image_data})
- predictions = np.squeeze(predictions)
- node_lookup = NodeLookup()
- # change n_pred for more predictions
- n_pred=1
- top_k = predictions.argsort()[-n_pred:][::-1]
- for node_id in top_k:
- human_str_n = node_lookup.id_to_string(node_id)
- score = predictions[node_id]
- if score>.5:
- # Some manual corrections
- if human_str_n=="stethoscope":human_str_n="Headphones"
- if human_str_n=="spatula":human_str_n="fork"
- if human_str_n=="iPod":human_str_n="iPhone"
- human_str=human_str_n
- lst=human_str.split()
- human_str=" ".join(lst[0:2])
- human_str_filename=str(lst[0])
- current= time.time()
- fps=frame_count/(current-start)
- # Speech module
- if last>40 and not pygame.mixer.music.get_busy() and human_str==human_str_n:
- pred+=1
- name=human_str_filename+".mp3"
- # Only get from google if we dont have it
- if not os.path.isfile(name):
- tts = gTTS(text="I see a "+human_str, lang='en')
- tts.save(name)
- last=0
- pygame.mixer.music.load(name)
- pygame.mixer.music.play()
- # Show info during some time
- if last<40 and frame_count>10:
- # Change the text position depending on your camera resolution
- cv2.putText(frame,human_str, (20,400),font, 1, font_color)
- cv2.putText(frame,str(np.round(score,2))+"%",(20,440),font,1,font_color)
- if frame_count>20:
- fps_text="fps: "+str(np.round(fps,2))
- cv2.putText(frame, fps_text, (460,460), font, 1, font_color)
- cv2.imshow("Frame", frame)
- last+=1
- # if the 'q' key is pressed, stop the loop
- if cv2.waitKey(1) & 0xFF == ord("q"):break
- # cleanup everything
- vs.stop()
- cv2.destroyAllWindows()
- sess.close()
- print("Done")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement