Advertisement
Guest User

Untitled

a guest
May 28th, 2017
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.77 KB | None | 0 0
  1. # classify_video.py will classify a video using
  2. # (1) singleFrame RGB model
  3. # (2) singleFrame flow model
  4. # (3) 0.5/0.5 singleFrame RGB/singleFrame flow fusion
  5. # (4) 0.33/0.67 singleFrame RGB/singleFrame flow fusion
  6. # (5) LRCN RGB model
  7. # (6) LRCN flow model
  8. # (7) 0.5/0.5 LRCN RGB/LRCN flow model
  9. # (8) 0.33/0.67 LRCN RGB/LRCN flow model
  10.  
  11. # Before using, change RGB_video_path and flow_video_path.
  12. # Use: classify.py video, where video is the video you wish to classify.
  13. # If no video is specified, the video "v_Archery_g01_c01" will be classified.
  14.  
  15. import numpy as np
  16. import glob
  17. import sys; sys.path.insert(0, '/opt/caffe/python')
  18. import caffe
  19. import pickle
  20.  
  21. caffe.set_mode_gpu()
  22.  
  23. RGB_video_path = '/data/frames/'
  24. flow_video_path = '/data/flow_images/'
  25. if len(sys.argv) > 1:
  26. video = sys.argv[1]
  27. else:
  28. video = 'v_Archery_g01_c01'
  29.  
  30. # Initialize transformers
  31. def initialize_transformer(image_mean, is_flow):
  32. shape = (10 * 16, 3, 227, 227)
  33. transformer = caffe.io.Transformer({'data': shape})
  34. channel_mean = np.zeros((3, 227, 227))
  35. for channel_index, mean_val in enumerate(image_mean):
  36. channel_mean[channel_index, ...] = mean_val
  37. transformer.set_mean('data', channel_mean)
  38. transformer.set_raw_scale('data', 255)
  39. transformer.set_channel_swap('data', (2, 1, 0))
  40. transformer.set_transpose('data', (2, 0, 1))
  41. transformer.set_is_flow('data', is_flow)
  42. return transformer
  43.  
  44.  
  45. ucf_mean_RGB = np.zeros((3, 1, 1))
  46. ucf_mean_flow = np.zeros((3, 1, 1))
  47. ucf_mean_flow[:, :, :] = 128
  48. ucf_mean_RGB[0, :, :] = 103.939
  49. ucf_mean_RGB[1, :, :] = 116.779
  50. ucf_mean_RGB[2, :, :] = 128.68
  51.  
  52. transformer_RGB = initialize_transformer(ucf_mean_RGB, False)
  53. transformer_flow = initialize_transformer(ucf_mean_flow, True)
  54.  
  55. # Extract list of frames in video
  56. RGB_frames = glob.glob('%s%s/*.jpg' % (RGB_video_path, video))
  57. flow_frames = glob.glob('%s%s/*.jpg' % (flow_video_path, video))
  58.  
  59. RGB_videos = glob.glob('%s/*' % RGB_video_path)
  60.  
  61. RGB_v = []
  62. for v in RGB_videos:
  63. RGB_v.append(glob.glob('%s/*.jpg' % v))
  64.  
  65. # classify video with LRCN model
  66. def LRCN_classify_video(frames, net, transformer, is_flow):
  67. clip_length = 16
  68. offset = 8
  69. input_images = []
  70. for im in frames:
  71. input_im = caffe.io.load_image(im)
  72. if (input_im.shape[0] < 240):
  73. input_im = caffe.io.resize_image(input_im, (240, 320))
  74. input_images.append(input_im)
  75. vid_length = len(input_images)
  76. input_data = []
  77. for i in range(0, vid_length, offset):
  78. if (i + clip_length) < vid_length:
  79. input_data.extend(input_images[i:i + clip_length])
  80. else: # video may not be divisible by clip_length
  81. input_data.extend(input_images[-clip_length:])
  82. output_predictions = np.zeros((len(input_data), 101))
  83. for i in range(0, len(input_data), clip_length):
  84. clip_input = input_data[i:i + clip_length]
  85. clip_input = caffe.io.oversample(clip_input, [227, 227])
  86. clip_clip_markers = np.ones((clip_input.shape[0], 1, 1, 1))
  87. clip_clip_markers[0:10, :, :, :] = 0
  88. # if is_flow: #need to negate the values when mirroring
  89. # clip_input[5:,:,:,0] = 1 - clip_input[5:,:,:,0]
  90. caffe_in = np.zeros(np.array(clip_input.shape)[[0, 3, 1, 2]], dtype=np.float32)
  91. for ix, inputs in enumerate(clip_input):
  92. caffe_in[ix] = transformer.preprocess('data', inputs)
  93. out = net.forward_all(data=caffe_in, clip_markers=np.array(clip_clip_markers))
  94. output_predictions[i:i + clip_length] = np.mean(out['probs'], 1)
  95. return np.mean(output_predictions, 0).argmax(), output_predictions
  96.  
  97.  
  98. # classify video with singleFrame model
  99. def singleFrame_classify_video(frames, net, transformer, is_flow):
  100. batch_size = 16
  101. input_images = []
  102. for im in frames:
  103. input_im = caffe.io.load_image(im)
  104. if (input_im.shape[0] < 240):
  105. input_im = caffe.io.resize_image(input_im, (240, 320))
  106. input_images.append(input_im)
  107. vid_length = len(input_images)
  108.  
  109. output_predictions = np.zeros((len(input_images), 101))
  110. for i in range(0, len(input_images), batch_size):
  111. clip_input = input_images[i:min(i + batch_size, len(input_images))]
  112. clip_input = caffe.io.oversample(clip_input, [227, 227])
  113. clip_clip_markers = np.ones((clip_input.shape[0], 1, 1, 1))
  114. clip_clip_markers[0:10, :, :, :] = 0
  115. if is_flow: # need to negate the values when mirroring
  116. clip_input[5:, :, :, 0] = 1 - clip_input[5:, :, :, 0]
  117. caffe_in = np.zeros(np.array(clip_input.shape)[[0, 3, 1, 2]], dtype=np.float32)
  118. for ix, inputs in enumerate(clip_input):
  119. caffe_in[ix] = transformer.preprocess('data', inputs)
  120. net.blobs['data'].reshape(caffe_in.shape[0], caffe_in.shape[1], caffe_in.shape[2], caffe_in.shape[3])
  121. out = net.forward_all(data=caffe_in)
  122. output_predictions[i:i + batch_size] = np.mean(out['probs'].reshape(10, caffe_in.shape[0] / 10, 101), 0)
  123. return np.mean(output_predictions, 0).argmax(), output_predictions
  124.  
  125.  
  126. # Models and weights
  127. singleFrame_model = 'deploy_singleFrame.prototxt'
  128. lstm_model = 'deploy_lstm.prototxt'
  129. RGB_singleFrame = 'single_frame_all_layers_hyb_RGB_iter_5000.caffemodel'
  130. flow_singleFrame = 'single_frame_all_layers_hyb_flow_iter_50000.caffemodel'
  131. RGB_lstm = 'RGB_lstm_model_iter_30000.caffemodel'
  132. flow_lstm = 'flow_lstm_model_iter_50000.caffemodel'
  133.  
  134. RGB_singleFrame_net = caffe.Net(singleFrame_model, RGB_singleFrame, caffe.TEST)
  135.  
  136. for v in RGB_v:
  137. label, predictions = singleFrame_classify_video(RGB_frames, RGB_singleFrame_net, transformer_RGB, False)
  138. import code; code.interact(local=dict(globals(), **locals()))
  139.  
  140. # class_RGB_singleFrame, predictions_RGB_singleFrame = \
  141. # singleFrame_classify_video(RGB_frames, RGB_singleFrame_net, transformer_RGB, False)
  142. # del RGB_singleFrame_net
  143.  
  144. # flow_singleFrame_net = caffe.Net(singleFrame_model, flow_singleFrame, caffe.TEST)
  145. # class_flow_singleFrame, predictions_flow_singleFrame = \
  146. # singleFrame_classify_video(flow_frames, flow_singleFrame_net, transformer_flow, True)
  147. # del flow_singleFrame_net
  148. #
  149. # RGB_lstm_net = caffe.Net(lstm_model, RGB_lstm, caffe.TEST)
  150. # class_RGB_LRCN, predictions_RGB_LRCN = \
  151. # LRCN_classify_video(RGB_frames, RGB_lstm_net, transformer_RGB, False)
  152. # del RGB_lstm_net
  153. #
  154. # flow_lstm_net = caffe.Net(lstm_model, flow_lstm, caffe.TEST)
  155. # class_flow_LRCN, predictions_flow_LRCN = \
  156. # LRCN_classify_video(flow_frames, flow_lstm_net, transformer_flow, True)
  157. # del flow_lstm_net
  158. #
  159. #
  160. # def compute_fusion(RGB_pred, flow_pred, p):
  161. # return np.argmax(p * np.mean(RGB_pred, 0) + (1 - p) * np.mean(flow_pred, 0))
  162.  
  163.  
  164. # Load activity label hash
  165. # action_hash = pickle.load(open('action_hash_rev.p', 'rb'))
  166.  
  167. # print "RGB single frame model classified video as: %s.\n" % (action_hash[class_RGB_singleFrame])
  168. # print "Flow single frame model classified video as: %s.\n" % (action_hash[class_flow_singleFrame])
  169. # print "RGB LRCN model classified video as: %s.\n" % (action_hash[class_RGB_LRCN])
  170. # print "Flow LRCN frame model classified video as: %s.\n" % (action_hash[class_flow_LRCN])
  171. # print "0.5/0.5 single frame fusion model classified video as: %s. \n" % (
  172. # action_hash[compute_fusion(predictions_RGB_singleFrame, predictions_flow_singleFrame, 0.5)])
  173. # print "0.33/0.67 single frame fusion model classified video as: %s. \n" % (
  174. # action_hash[compute_fusion(predictions_RGB_singleFrame, predictions_flow_singleFrame, 0.33)])
  175. # print "0.5/0.5 LRCN fusion model classified video as: %s. \n" % (
  176. # action_hash[compute_fusion(predictions_RGB_LRCN, predictions_flow_LRCN, 0.5)])
  177. # print "0.33/0.67 LRCN fusion model classified video as: %s. \n" % (
  178. # action_hash[compute_fusion(predictions_RGB_LRCN, predictions_flow_LRCN, 0.33)])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement