Advertisement
Guest User

Timer Detector v2

a guest
Feb 8th, 2015
307
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 54.88 KB | None | 0 0
  1. #
  2. # ruby <this-script> <input-video-file>
  3. #
  4.  
  5. # also needs ffmpeg
  6. require 'json'
  7. require 'narray'
  8. require 'fftw3'
  9.  
  10. Default = {}
  11. Default[:debug] = $stderr
  12. Default[:ffmpeg] = 'ffmpeg'
  13. Default[:transcode_before_analysis] = true # some video files are broken and acquiring frames directly will produce bad results
  14.  
  15. Default[:period_peak_width] ||= Rational(2,10) # width of peaks / period length
  16. Default[:peak_height] ||= 0.5 # ratio of peak base to peak height
  17. Default[:peak_min_height] ||= 1E-15 # data/data[0] below this threshold is consider equal to zero, conservative default, depends upon your choice of period_analyze
  18. Default[:period_analyze] ||= Rational(4,4) # uses only the first part of the autocorrelation diagram for analysis
  19. Default[:threshold_pixel_std] = 15 # maxiumum standard deviation for a pixel over time to be classified as static
  20. Default[:threshold_pixel_absdiff] = 15 # maxiumum absolute difference averaged over time for a pixel to be classified as static
  21. Default[:threshold_pixel_perdiff] = 7 # maximum difference of a pixel over one period averaged over time to be classified as periodic
  22. Default[:static_interval_threshold] = 0.3 # maximum distance of a static intervall of tick_seconds averaged over all frames to be classified as changing every second
  23. Default[:bounding_box_difference] = 2*4**2 # maximum sum of squared distances of two timers' bounding boxes' top-left and bottom-right corners to be classified as equal, otherwise they get grouped together, relative to the downscaled +video_x_resolution+ frame size
  24. Default[:bounding_box_area] = (10..400) # min/max area of a timer's bounding box, relative to the downscaled +video_x_resolution+ frame size
  25. Default[:bounding_box_width] = 3 # min width of a timer's bounding box, relative to the downscaled +video_x_resolution+ frame size
  26. Default[:bounding_box_height] = 3 # max width of a timer's bounding box, relative to the downscaled +video_x_resolution+ frame size
  27. Default[:bounding_box_fill_ratio] = 0.7 # ratio of pixels of a rectangle that must have been classified as valid to be classified as valid
  28. Default[:bounding_box_smooth_width] = 2 # dilates bounding boxes with a rectangle of size smooth_width*2+1
  29. Default[:location_method] = :simple # :simple, :fourier, :both, :steps
  30. Default[:dynamicity_method] = :random_comparison # method to determine whether a pixel is changing or remaining constant, :random_comparison or :standard_deviation
  31. Default[:timer_ticking_tick_threshold] = 4 # number of consecutive frames the timer must be ticking to be classified as such, eliminating short intervals
  32. Default[:timer_ticking_hold_threshold] = 4 # number of consecutive frames the timer must be on hold to be classified as such, eliminating short intervals
  33. Default[:timer_ticking_smooth_width] ||= 4 # width of gaussian smooth of the thresholded timer ticking/pausing data, applied before eliminating short intervals
  34. Default[:scan_interval_fine] = Rational(7,4) # time in seconds between frames when scanning for timer movement (used to scan the entire video)
  35. Default[:scan_interval_coarse] = Rational(7,1) # time in seconds between frames when scanning for timer movement (to improve the resolution of the result)
  36. Default[:scan_fine_interval] = true # for each detected event (timer starts/stops), scan again at increased resolution
  37. Default[:min_different_digits] = 1 # number of different digits an interval must contain to be considered a timer, the number of digits is lower then 10 for certain time steps, eg. taking a frame every 5s gives only 2 different digits (-> don't use these time steps)
  38. Default[:digits_difference] = 1.0 # difference between two digits, per pixel, takes the squared difference between two pixel's
  39.                                   # grey values
  40.                                   # Sum[(Image1-Image2)^2]/number_of_pixels < digits_difference^2
  41. Default[:digits_difference_single_pixel] = 6 # same as above, but not averaged over all pixels, used to locate the timer
  42.                                              # Abs[(Pixel1-Pixel2)] < :digits_difference_single_pixel
  43. Default[:timer_frequency_ratio] = 0.5 # area with highest frequency is classified as timer, used to recognize multiple timers, they must occur at least this often
  44. Default[:locate_temporal_resolution] = Rational(3,1) # time between two frames when scanning for the location of the timer, scans multiple time intervals at this resolution, by default 60 frames, 1 second resolution => 1min interval, scale accordingly (2 seconds=>2min interval)
  45. Default[:frames_per_interval] = 60 # number of frames/samples per interval, video is split into several intervals that get scanned separately for a running timer
  46. Default[:period_seconds] = Rational(10,1) # real-time seconds after which the seconds digit of the timer repeats, should be 10 unless the video is slowed down/sped up
  47. Default[:video_x_resolution] = 320 # scale video to this resolution before analyzing, preserving the aspect ratio
  48. Default[:video_scaler] = 'bilinear' # method to downscale the video
  49.  
  50. class Rational
  51.     def lcm(you)
  52.         Rational(self.numerator.lcm(you.numerator),self.denominator.gcd(you.denominator))
  53.     end
  54.     def gcd(you)
  55.         Rational(self.numerator.gcd(you.numerator),self.denominator.lcm(you.denominator))
  56.     end
  57. end
  58.  
  59. # 1 byte per pixel
  60. module FrameCache
  61.     CACHE = {}
  62.     ORDER = [] # the order in which frames were added
  63.     TYPE = NArray::SINT
  64.     CLEAR_SIZE = 3000
  65.     SIZE = 6000
  66.     def self.push(time,frame)
  67.         clear if CACHE.size >= SIZE
  68.         CACHE[time] = frame.typecode == TYPE ? frame : frame.to_type(TYPE)
  69.         ORDER << time
  70.     end
  71.     def self.fetch(time)
  72.         CACHE[time]
  73.     end
  74.     def self.search(frame)
  75.         CACHE.select{|t,f|f==frame}
  76.     end
  77.     def self.clear
  78.         if CLEAR_SIZE == SIZE
  79.             CACHE.clear
  80.         else
  81.             # keep only last occurence of frames requested multiple times
  82.             i = -1
  83.             ORDER.delete_if do |o|
  84.                 i+=1
  85.                 ORDER[i+1..-1].index(o)
  86.             end
  87.             # keep the most recently requested frames
  88.             del = ORDER.slice!(0,CLEAR_SIZE)
  89.             del.each do |d|
  90.                 CACHE.delete(d)
  91.             end
  92.         end
  93.     end
  94.     def self.put_frame(time,data,idx)
  95.         if frame = self.fetch(time)
  96.             data[*idx] = frame
  97.         else
  98.             push(time,yield)
  99.         end
  100.     end
  101.     def self.load(data)
  102.         data = Marshal.load(data)
  103.         CACHE.clear
  104.         ORDER.clear
  105.         data[0].each do |entry|
  106.             CACHE[entry[0]] = NArray.to_na(entry[1],entry[2],*entry[3])
  107.         end
  108.         ORDER.concat(data[1])
  109.     end
  110.     def self.dump
  111.         Marshal.dump([CACHE.map{|time,frame|[time,frame.to_s,frame.typecode,frame.shape,]},ORDER])
  112.     end
  113. end
  114.  
  115. # +res+ is the number of pixels, +data+ the array to store the pixel data.
  116. # +io+ is a stream to read data from, +pix_fmt+ the pixel format of the frame.
  117. # For example, if you've got an array with pixels as the first and frames
  118. # as the second dimension, and want to extract the frame data to the 5th frame,
  119. # specify idx=[true,4].
  120. # Return the array inserted.
  121. def read_gray_frame(io,pix_fmt,res,data,idx)
  122.     case pix_fmt
  123.     when 'yuv420p'
  124.         # 8 bit Y plane followed by 8 bit 2x2 subsampled V and U planes.
  125.         y = io.read(res)
  126.         raise StandardError, 'unable to acquire frame(s), check video file' if y.nil? || y.bytesize < res
  127.         arr = NArray.to_na(y,1)
  128.         data[*idx] = arr
  129.         io.read(res/2) # discard uv plane(s)
  130.         arr
  131.     else
  132.         raise StandardError, 'unsupported pixel format'
  133.     end
  134. end
  135.  
  136. def transcode_file(file,opts={})
  137.     raise StandardError, "no such file" unless File.exist?(file)
  138.  
  139.     opts[:debug] ||= Default[:debug]
  140.     opts[:ffmpeg] ||= Default[:ffmpeg]
  141.     opts[:video_scaler] ||= Default[:video_scaler]
  142.     opts[:video_x_resolution] ||= Default[:video_x_resolution]
  143.  
  144.     duration = VideoScanner.get_duration(file).to_f
  145.     width = VideoScanner.get_stream_part(file,'v','width')[0]['width'].to_i
  146.     height = VideoScanner.get_stream_part(file,'v','height')[0]['height'].to_i
  147.     down_x = opts[:video_x_resolution]
  148.     down_y = down_x*height/width
  149.  
  150.     file = File.expand_path(file)
  151.     tempfile = File.join(File.dirname(__FILE__),'transcode.mp4')
  152.  
  153.     cmd = "ffmpeg -y -loglevel quiet -i #{file.inspect} -c:v libx264 -an -vf scale=#{down_x}:#{down_y} -preset ultrafast -sws_flags #{opts[:video_scaler]} -g 20 -movflags faststart -crf 12 -f mp4 #{tempfile} < /dev/null"
  154.  
  155.     opts[:debug] << "Transcoding video file (#{(duration/60.0).round(2)} min) before analysis...\n"
  156.     opts[:debug] << "#{cmd}\n"
  157.  
  158.     t_start = Time.now
  159.     succ = system(cmd)
  160.     t_dur = Time.now-t_start
  161.     opts[:debug] << "Transcoding took #{t_dur}s, #{(duration.to_f/t_dur.to_f).round}x real-time.\n"
  162.  
  163.     return succ ? tempfile : nil
  164. end
  165.  
  166. # Assumes there is only one video stream, and takes the first.
  167. # +crop+ is an Array of Rectangles, a rectangle is an NArray [a_x,a_y,b_x,b_y],
  168. # a is the top-left corner, b the bottom right corner, (x,y)=(0,0) is at the
  169. # top-left # corner of the video frame.
  170. # Returns an NArray of shape [pixel,frame].
  171. def get_cropped_frames(file,start,frames,dt,crop,opts={})
  172.     opts[:debug] ||= Default[:debug]
  173.     opts[:ffmpeg] ||= Default[:ffmpeg]
  174.     opts[:video_scaler] ||= Default[:video_scaler]
  175.     opts[:video_x_resolution] ||= Default[:video_x_resolution]
  176.  
  177.     start = Rational(start)
  178.     dt = Rational(dt)
  179.  
  180.     file = File.expand_path(file)
  181.     pix_fmt = VideoScanner.get_stream_part(file,'v','pix_fmt')[0]['pix_fmt']
  182.     width = VideoScanner.get_stream_part(file,'v','width')[0]['width'].to_i
  183.     height = VideoScanner.get_stream_part(file,'v','height')[0]['height'].to_i
  184.     down_x = opts[:video_x_resolution]
  185.     down_y = down_x*height/width
  186.     resize_needed = down_x!=width || down_y!=height
  187.     data = NArray.int(down_y*down_x)
  188.     regs = Array.new(crop.size) do |i|
  189.         NArray.int((crop[i][2]-crop[i][0]+1),(crop[i][3]-crop[i][1]+1),frames)
  190.     end
  191.     frames.times do |i|
  192.         opts[:debug] << "Reading frame #{i+1}/#{frames}...\r"
  193.         data.reshape!(down_x*down_y)
  194.         time = dt*i+start
  195.         FrameCache.put_frame(time,data,[nil]) do
  196.             cmd = "#{opts[:ffmpeg]} -loglevel quiet -accurate_seek -ss #{time.to_f} -i #{file.inspect} %s -frames:v 1 -f rawvideo - < /dev/null" % (resize_needed ? "-filter:v scale=#{down_x}:#{down_y} -sws_flags #{opts[:video_scaler]}" : '')
  197.             IO.popen(cmd) do |io|
  198.                 read_gray_frame(io,pix_fmt,down_x*down_y,data,[nil])
  199.             end
  200.         end
  201.         data.reshape!(down_x,down_y)
  202.         crop.each_with_index do |c,j|
  203.             regs[j][true,true,i] = data[c[0]..c[2],c[1]..c[3]]
  204.         end
  205.     end
  206.     regs.each do |r|
  207.         r.reshape!(r.shape[0]*r.shape[1],r.shape[2])
  208.     end
  209.     return regs
  210. end
  211.  
  212. # Assumes there is only one video stream, and takes the first.
  213. # Returns an NArray of shape [pixel,frame].
  214. def get_frames(file,start,frames,dt,opts={})
  215.     opts[:debug] ||= Default[:debug]
  216.     opts[:ffmpeg] ||= Default[:ffmpeg]
  217.     opts[:video_scaler] ||= Default[:video_scaler]
  218.     opts[:video_x_resolution] ||= Default[:video_x_resolution]
  219.  
  220.     start = Rational(start)
  221.     dt = Rational(dt)
  222.  
  223.     file = File.expand_path(file)
  224.     pix_fmt = VideoScanner.get_stream_part(file,'v','pix_fmt')[0]['pix_fmt']
  225.     width = VideoScanner.get_stream_part(file,'v','width')[0]['width'].to_i
  226.     height = VideoScanner.get_stream_part(file,'v','height')[0]['height'].to_i
  227.     down_x = opts[:video_x_resolution]
  228.     down_y = down_x*height/width
  229.     dims = [down_x,down_y]
  230.     resize_needed = down_x!=width || down_y!=height
  231.     data = NArray.int(down_x*down_y,frames)
  232.     frames.times do |i|
  233.         opts[:debug] << "Reading frame #{i+1}/#{frames}...\r"
  234.         time = dt*i+start
  235.         FrameCache.put_frame(time,data,[true,i]) do
  236.             cmd = "#{opts[:ffmpeg].inspect} -loglevel quiet -accurate_seek -ss #{time.to_f} -i #{file.inspect} %s -frames:v 1 -f rawvideo - < /dev/null" % (resize_needed ? "-filter:v scale=#{down_x}:#{down_y} -sws_flags #{opts[:video_scaler]}" : '')
  237.             IO.popen(cmd) do |io|
  238.                 read_gray_frame(io,pix_fmt,down_x*down_y,data,[true,i])
  239.             end
  240.         end
  241.     end
  242.     return data, dims
  243. end
  244.  
  245. # transposes 2d-array (rectangular)
  246. def transpose(array)
  247.     return [[]] if array.first.nil?
  248.     i = array.size
  249.     j = array.first.size
  250.     Array.new(j) do |a|
  251.         Array.new(i) do |b|
  252.             array[b][a]
  253.         end
  254.     end
  255. end
  256.  
  257.  
  258. ##########################
  259. # Getting video metadata #
  260. ##########################
  261.  
  262. module VideoScanner
  263.  
  264.     def self.get_mediainfo(file,param)
  265.         info = ''
  266.         IO.popen("mediainfo --Inform=\"#{param}\" \"#{file}\"") do |io|
  267.            info = io.read.chomp.strip
  268.         end
  269.         info.empty? ? nil : info
  270.     end
  271.  
  272.     def self.s2hms_str(t, joiner=':')
  273.         hms = s2hms(t)
  274.         hms[1] = hms[1].to_s.rjust(2,'0')
  275.         hms[2] = hms[2].to_s.rjust(2,'0')
  276.         hms.shift if hms[0] == 0
  277.         hms.join(joiner)
  278.     end
  279.    
  280.     def self.s2hms(t)
  281.         h = (t/3600).to_i
  282.         t -= 3600*h
  283.         m = (t/60).to_i
  284.         t -= 60*m
  285.         [h,m,t]
  286.     end
  287.  
  288.     def self.get_start_time(file)
  289.         res = get_format_part(file,'start_time')
  290.         if res
  291.             return res['start_time'].to_f
  292.         else
  293.             return nil
  294.         end
  295.     end
  296.  
  297.     def self.get_fps(file)
  298.         file = File.expand_path(file)
  299.         res = get_stream_part(file,'v','avg_frame_rate')
  300.         if res && !res.any?{|x|x['avg_frame_rate'].match(/\/0$/)}
  301.             res.map! do |x|
  302.                 Rational(x['avg_frame_rate']).to_f
  303.             end
  304.             return res
  305.         else
  306.             minfo = get_mediainfo(file,'Video;%FrameRate_Nominal%') || get_mediainfo(file,'Video;%FrameRate%')
  307.             if minfo
  308.                 return [minfo.to_f].flatten
  309.             else
  310.                 return nil
  311.             end
  312.         end
  313.     end
  314.  
  315.     def self.get_duration(file)
  316.         file = File.expand_path(file)
  317.         res = get_format_part(file,'duration')
  318.         if res
  319.             return res['duration'].to_f
  320.         else
  321.             minfo = get_mediainfo(file,'Video;%Duration%') || get_mediainfo(file,'General;%Duration%')
  322.             if minfo
  323.                 return minfo.to_f
  324.             else
  325.                 return nil
  326.             end
  327.         end
  328.     end
  329.  
  330.     def self.get_format_part(file,part)
  331.         get_part(file,'format',part)
  332.     end
  333.  
  334.     def self.get_stream_part(file,streams,part)
  335.         get_part(file,'streams',part,streams)
  336.     end
  337.  
  338.     def self.get_part(file,category,part,streams=nil)
  339.         conv = { 'streams' => 'stream', 'format' => 'format'}
  340.         part = [part].flatten
  341.         file = File.expand_path(file)
  342.         res = nil
  343.         cmd = "ffprobe -loglevel quiet -i #{file.inspect} -print_format json -show_entries \"#{conv[category]}=#{part.join(',')}\""
  344.         cmd << " -select_streams #{streams}" if streams
  345.         IO.popen(cmd) do |io|
  346.             input = io.read
  347.             begin
  348.                 res = JSON.parse(input)
  349.             rescue StandardError => e
  350.                 return nil
  351.             end
  352.             if s = res[category]
  353.                 res = res[category]
  354.                 if res.class == Hash
  355.                     res.delete_if{|key|!part.index(key)}
  356.                 elsif res.class == Array
  357.                     res.each do |r|
  358.                         r.delete_if{|key|!part.index(key)}
  359.                     end
  360.                 end
  361.             end
  362.         end
  363.         return res unless res.empty?
  364.     end
  365.  
  366.     def self.get_frame_durations(file, stream_no, show_progress, scale_progress=1.0)
  367.         data = []
  368.         prv_time = nil
  369.         start_time = nil
  370.         progress = 0
  371.         pct_last = -1
  372.         dur = get_duration(file)
  373.         # get data (frame times)
  374.         IO.popen("ffprobe -threads 2 -loglevel quiet -i #{file.inspect} -select_streams v:#{stream_no} -show_entries frame=pkt_pts_time -print_format flat") do |io|
  375.             io.each_line do |line|
  376.                 time = line.match(/(\d+\.\d+)/)
  377.                 if time
  378.                     time = time[1].to_f
  379.                     if start_time
  380.                         if show_progress
  381.                             progress += 1
  382.                             pct = (100.0*scale_progress*(time-start_time)/dur).to_i
  383.                             if pct > pct_last
  384.                                 $stderr << "%03d%s\r" % [pct,'%']
  385.                                 pct_last = pct
  386.                             end
  387.                         end
  388.                         diff = (time-prv_time).abs
  389.                         data << [time-start_time,diff]
  390.                     else
  391.                         start_time = prv_time  
  392.                     end
  393.                     prv_time = time
  394.                 end
  395.             end
  396.         end
  397.         return data
  398.     end
  399. end
  400.  
  401. ############################################################
  402. # Different methods of filtering pixels containing a timer #
  403. ############################################################
  404.  
  405. # per... number of frames after which the timer changes, eg every 10 frames when
  406. #        sampled at 0.1 s/frame
  407. def check_step_behaviour(pixels,per,opts={})
  408.     opts[:digits_difference_single_pixel] ||= Default[:digits_difference_single_pixel]
  409.     opts[:static_interval_threshold] ||= Default[:static_interval_threshold]
  410.  
  411.     per_s = per.to_f
  412.     dsp = opts[:digits_difference_single_pixel]
  413.     sid = opts[:static_interval_threshold]**2
  414.     res = pixels.shape[0]
  415.     n = pixels.shape[1]
  416.     mask = NArray.int(res)
  417.     # threshold into two states: static or dynamic
  418.     delta = (pixels[true,0..-2]-pixels[true,1..-1]).abs >= dsp
  419.     idx = NArray.float(n-1).indgen
  420.     # check for static intervals
  421.     res.times do |i|
  422.         times = idx[delta[i,true]]
  423.         if times.size < 2
  424.             mask[i] = 0
  425.         else
  426.             ival = (times[1..-1]-times[0..-2]).abs
  427.             ival.map!{|t| t < 6*per_s ? t%per_s : t}
  428.             mask[i] = (ival**2).sum > (times.size-1)*sid ? 0 : 255
  429.         end
  430.     end
  431.     return mask
  432. end
  433.  
  434. # res...number of pixels, *files...array of files to analyze (same resolution)
  435. def pixel_difference_std(data,opts={})
  436.     opts[:threshold_pixel_std] ||= Default[:threshold_pixel_std]
  437.     sdev = data.stddev(1)
  438.     threshold_pixels!(sdev,opts[:threshold_pixel_std])
  439.     return sdev.to_i
  440. end
  441.  
  442.  
  443. # res...number of pixels, *files...array of files to analyze (same resolution)
  444. def pixel_difference_diff(data,opts={})
  445.     opts[:threshold_pixel_absdiff] ||= Default[:threshold_pixel_absdiff]
  446.     res = data.shape[0]
  447.     n = data.shape[1]    
  448.     pix_sdiff = NArray.int(res)
  449.     (1..n-1).to_a.shuffle.each do |i|
  450.         pix_sdiff += (data[true,i]-data[true,i-1]).abs
  451.     end
  452.     # normalize differences and threshold
  453.     pix_sdiff /= (n-1)
  454.     threshold_pixels!(pix_sdiff,opts[:threshold_pixel_absdiff])
  455.     return pix_sdiff.to_i
  456. end
  457.  
  458. def check_periodicity(data,period,opts={})
  459.     opts[:threshold_pixel_perdiff] ||= Default[:threshold_pixel_perdiff]
  460.     res = data.shape[0]
  461.     n = data.shape[1]
  462.     diff_avg = NArray.sint(res)
  463.     # group files for each phase of the cycle
  464.     cycles = n.times.each_slice(period).to_a
  465.     cycles.pop if cycles.last.size < period
  466.     return diff_avg if cycles.empty?
  467.     cycles = transpose(cycles)
  468.     # average difference between all images of one phase
  469.     cycles.each do |idx|
  470.         diff_avg += data[true,idx.shuffle].stddev(1)
  471.     end
  472.     diff_avg /= cycles.size
  473.     # partition into regions compatible with +period+
  474.     threshold_pixels!(diff_avg,opts[:threshold_pixel_perdiff],255,true)
  475.     return diff_avg.to_i
  476. end
  477.  
  478. # period... Number of samples (frames) that constitute one period.
  479. # Assumes that period is an integer number of frames.
  480. # To get a sensible output, the number of frames should be at least ~6*period
  481. def freq_analysis(data_t,period,opts={})
  482.     opts[:period_peak_width] ||= Default[:period_peak_width]
  483.     opts[:peak_height] ||= Default[:peak_height]
  484.     opts[:peak_min_height] ||= Default[:peak_min_height]
  485.     opts[:period_analyze] ||= Default[:period_analyze]
  486.  
  487.     res = data_t.shape[0]
  488.     n = data_t.shape[1]
  489.  
  490.     # peak width in frames
  491.     pfw = (opts[:period_peak_width]*period).to_i
  492.     pfw = 1 if pfw < 1
  493.  
  494.     # shortcuts
  495.     ph = opts[:peak_height]
  496.     pal = (opts[:period_analyze]*n/period).to_i
  497.     pal = n/period-1 if pal>=n/period
  498.     pmn = opts[:peak_min_height]
  499.  
  500.     # stores thresholded mask
  501.     diff = NArray.int(res)
  502.  
  503.     # de-trend by subtracting the mean
  504.     data_d = data_t.clone
  505.     res.times do |i|
  506.         data_d[i,true] -= data_d[i,true].mean
  507.     end    
  508.  
  509.     # auto-correlate data to find periodic timer
  510.     data_a = get_linear_autocorrelation(data_d)
  511.  
  512.     # remove low data
  513.     data_a[data_a<pmn*data_a.max] = 0.0
  514.  
  515.     # evaluate autocorrelation diagram for desired periodicity
  516.     idx = NArray.int(pal).indgen(period,period)
  517.     res.times do |i|
  518.         pix = data_a[i,true]
  519.         next if pix.max!=pix[0]
  520.         # check peak height, should be higher than the surroundings
  521.         bool = true
  522.         (1..pfw-1).each do |j|
  523.             bool &&= (((pix[0..-1-j]-pix[j..-1])[idx]>0) & ((pix[j..-1]-pix[0..-1-j])[idx-j]>0)).sum == idx.size
  524.         end
  525.         next unless bool && ((ph*pix[0..-1-pfw]-pix[pfw..-1])[idx]>0 && (ph*pix[pfw..-1]-pix[0..-1-pfw])[idx-pfw]>0).sum == idx.size
  526.         # check interval between two peaks, should be lower than both peaks
  527.         bool = true
  528.         idx.each do |j|
  529.             lpeak = pix[j-period]
  530.             mx = (lpeak-pix[j])/period.to_f
  531.             jdx = NArray.float(period-1).indgen(j+1)
  532.             bool &&= (pix[jdx-period] >= lpeak-(jdx-j)*mx).sum==0
  533.         end
  534.         diff[i] = 0xFF if bool
  535.     end
  536.     return diff
  537. end
  538.  
  539. #####################
  540. # Utility functions #
  541. #####################
  542.  
  543. # (2*3600+30*60+12.312) => 2:30:12.312
  544. def sec2hhmmssms(t)
  545.     h = t.to_i/3600
  546.     m = (t%3600).to_i/60
  547.     s = ((t%3600)%60).to_i
  548.     ms = ((t%3600%60%1)*1000.0).round
  549.     [h,m,s,ms]
  550. end
  551.  
  552. # Data is an NArray (typecode 1 char) of 0s or +/- 1s (-1==0xFF), signifying a transition into the other state
  553. def filter_short_states(data,hold,tick)
  554.     return data if data.size < 3
  555.     reg1 = /(0|^)(1{1,#{tick-1}})(0|$)/
  556.     reg2 = /(1|^)(0{1,#{hold-1}})(1|$)/
  557.     str = data.to_a.join
  558.     while str.gsub!(reg1){$1+'0'*$2.size+$3} || str.gsub!(reg2){$1+'1'*$2.size+$3} do; end
  559.     data[0..-1] = NArray.to_na(str.chars.map(&:to_i))
  560. end
  561.  
  562. def smooth_gauss(pixels,width=2)
  563.     return pixels if pixels.size < 2*width
  564.     res = NArray.float(pixels.size)
  565.     n = 2*width+1
  566.     filter = NArray.float(n)
  567.     n.times do |i|
  568.         filter[i] = Math.exp(-2.0*(i-width)**2.0/(width*width))
  569.     end
  570.     filter *= 1.0/filter.sum
  571.     # pixels to the left
  572.     (0..width-1).each do |i|
  573.         f = filter[width-i..n-1]
  574.         f *= 1.0/f.sum
  575.         res[i] = (pixels[0..width+i]*f).sum
  576.     end
  577.     # pixels on the right
  578.     (pixels.size-width..pixels.size-1).each do |i|
  579.         f = filter[0..width+pixels.size-i-1]
  580.         f *= 1.0/f.sum
  581.         res[i] = (pixels[i-width..pixels.size-1]*f).sum
  582.     end
  583.     (width..pixels.size-width-1).each do |i|
  584.         res[i] = (pixels[i-width..i+width]*filter).sum
  585.     end
  586.     return res
  587. end
  588.  
  589. def get_hanning_window(res,n)
  590.     win = NArray.float(res,n)
  591.     n.times do |i|
  592.         win[0,i] = 0.5*(1.0-Math.cos(2.0*Math::PI*i/(n-1.0)))
  593.         (1..res-1).each do |j|
  594.             win[j,i] = win[0,i]
  595.         end
  596.     end
  597.     return win
  598. end
  599.  
  600. # smooth ~size of peak at sample rate
  601. def get_linear_autocorrelation(data_t, window=nil, smooth=nil)
  602.     n = data_t.shape[0]
  603.     m = data_t.shape[1]
  604.  
  605.     # pad with zeros
  606.     data_p = NArray.float(n,2*m)
  607.     data_p[true,0..m-1] = data_t
  608.  
  609.     # window data
  610.     data_p[true,0..m-1] *= window if window
  611.  
  612.     # forward fft
  613.     data_f = FFTW3.fft(data_p,-1)
  614.  
  615.     # power spectrum (|z|^2)
  616.     data_f *= data_f.conj
  617.     data_ps = data_f.real
  618.  
  619.     # inverse fft
  620.     data_i = FFTW3.ifft(data_ps,-1)
  621.     data_i *= data_i.conj
  622.     data_i = data_i.real
  623.  
  624.     # crop to original range
  625.     data_c = data_i[true,0..m-1]
  626.  
  627.     # smooth autocorrelation
  628.     if smooth
  629.         n.times do |i|
  630.             data_i[i,true] = smooth_gauss(data_i[i,true],smooth)
  631.         end
  632.     end
  633.  
  634.     return data_c
  635. end
  636.  
  637. def threshold_pixels!(pixels,thresh,max=0xFF,invert=false)
  638.     if invert
  639.         pixels.map! do |pix|
  640.             pix >= thresh ? 0 : max
  641.         end
  642.     else
  643.         pixels.map! do |pix|
  644.             pix < thresh ? 0 : max
  645.         end
  646.     end
  647. end
  648.  
  649. def fill_rects(pixels,dims,val,rects)
  650.     x = dims[0]
  651.     rects.each do |rect|
  652.         (rect[1]..rect[3]).each do |y|
  653.             pixels[rect[0]+x*y..rect[2]+x*y] = val
  654.         end
  655.     end
  656. end
  657.  
  658. def rects_limit_size(rects,t_a,t_w,t_h)
  659.     rects.delete_if do |rect|
  660.         w = (rect[2]-rect[0]+1)
  661.         h = (rect[3]-rect[1]+1)
  662.         !t_a.include?(w*h) || w<t_w || h<t_h
  663.     end
  664. end
  665.  
  666. def rects_limit_fillratio(rects,pixels,dims,ratio)
  667.     pixels = pixels.reshape(dims[0],dims[1])
  668.     rects.delete_if do |rect|
  669.         reg = pixels[rect[0]..rect[2],rect[1]..rect[3]]
  670.         (reg>0).sum/reg.size.to_f < ratio
  671.     end
  672. end
  673.  
  674. # Get bounding boxes for each connected area.
  675. def get_rects(regions,dims)
  676.     x = dims[0]
  677.     rects = []
  678.     vals = NArray.int(x*dims[1])
  679.     vals.indgen!(0)
  680.     (1..regions.max).each do |i|
  681.         region = vals[regions.eq(i)]
  682.         next if region.size == 0
  683.         p_x = region%x
  684.         p_y = region/x
  685.         ax,ay = p_x.min,p_y.min
  686.         bx,by = p_x.max,p_y.max
  687.         rects << NArray.to_na([ax,ay,bx,by])
  688.     end
  689.     return rects
  690. end
  691.  
  692. # Group rectangles that are close to each other, controlled by +thresh+.
  693. # Takes the sum of the squared distance of the top-left and bottom-right corner.
  694. def group_rects(rects,opts={})
  695.     opts[:bounding_box_difference] ||= Default[:bounding_box_difference]
  696.     groups = []
  697.     bbd = opts[:bounding_box_difference]
  698.     rects.each do |rect|
  699.         # check for near-by rects
  700.         q = groups.find do |g|
  701.             g.index{|r| ((r-rect)**2).sum < bbd}
  702.         end
  703.         if q
  704.             q << rect
  705.         else
  706.             groups << [rect]
  707.         end
  708.     end
  709.     return groups
  710. end
  711.  
  712. def merge_overlapping_rects(rects)
  713.     new = []
  714.     rects.each do |rect|
  715.         # get overlapping rects
  716.         overlap = new.select do |r|
  717.             x = (r[0]>=rect[0]&&r[0]<=rect[2])||(r[2]>=rect[0]&&r[2]<=rect[2])
  718.             y = (r[1]>=rect[1]&&r[1]<=rect[3])||(r[3]>=rect[1]&&r[3]<=rect[3])
  719.             x && y
  720.         end
  721.         if overlap.size == 0
  722.             new << rect
  723.         else
  724.             overlap << rect
  725.             ax = overlap.min{|r,q|r[0]<=>q[0]}[0]
  726.             ay = overlap.min{|r,q|r[1]<=>q[1]}[1]
  727.             bx = overlap.max{|r,q|r[2]<=>q[2]}[2]
  728.             by = overlap.max{|r,q|r[3]<=>q[3]}[3]
  729.             overlap[-1][0] = ax
  730.             overlap[-1][1] = ay
  731.             overlap[-1][2] = bx
  732.             overlap[-1][3] = by
  733.             overlap.each{|r|new.delete(r)}
  734.             new << overlap.last
  735.         end
  736.     end
  737.     return new
  738. end
  739.  
  740. # w>0 dilate
  741. # w<0 erode
  742. def resize_rects(rects,w)
  743.     rects.each do |rect|
  744.         rect[0] -= w
  745.         rect[2] += w
  746.         rect[1] -= w
  747.         rect[3] += w
  748.     end
  749.     rects.delete_if do |rect|
  750.         rect[2]-rect[0]<0 || rect[3]-rect[1]<0
  751.     end
  752.     return rects
  753. end
  754.  
  755. # Averages (pixel intensities) of all images grouped together.
  756. def average_images!(groups)
  757.     groups.map! do |group|
  758.         res = group[0].size
  759.         group.inject(NArray.sint(res)){|img,s|s+img}/group.size
  760.     end
  761.     return groups
  762. end
  763.  
  764. # Takes an NArray of images as input. [img1,img2,...] = NArray(resolution,frames)
  765. # Mask_t is an Array/NArray of dimension +frames+, 1 if frame is to be considerd, 0 if it is to be discarded. If empty, takes all images.
  766. # Mask_x is an NArray (type byte) of the same dimension as each of the +images+, if 0 the corresponding pixel in +images+ gets ignored for  analysis. Useful when the timer's shape is not quite rectangular.
  767. # Returns an array, where each entry is an array of similar images. [[img1,img4],[img2,img5],[img3]]
  768. # All images must be of equal size.
  769. def group_images(images,thresh,mask_t,mask_x)
  770.     raise ArgumentError, 'mask_x must be of type byte' unless mask_x.typecode == NArray::BYTE
  771.     groups = []
  772.     eff_res = mask_x.sum # effective resolution
  773.     n = images.shape[1]
  774.     # take each digit
  775.     n.times do |i|
  776.         next if mask_t[i]==0
  777.         img = images[true,i]
  778.         # check how much they fit into each group
  779.         cand = groups.select do |group|
  780.             group.inject(0.0){|s,pic|s+((img-pic)[mask_x]**2).sum**0.5} < thresh*eff_res*group.size
  781.         end
  782.         if cand.size == 0
  783.             # doesn't fit, create new group
  784.             groups << [img]
  785.         else
  786.             # add to group where it fits best
  787.             cand.min{|x,y|x[0]<=>y[0]} << img
  788.         end
  789.     end
  790.     return groups
  791. end
  792.  
  793. # Digits are expected to occur with a frequency given by occ.
  794. def trim_groups!(groups,occ)
  795.     return groups.clear if occ.empty?
  796.     gsizes = groups.map(&:size).sort
  797.     trs = gsizes[[0,gsizes.size-10].max]/2
  798.     groups.delete_if do |group|
  799.         group.size < trs
  800.     end
  801. #    while groups.size > 10 do
  802. #        min = groups.min{|x,y|x[1]<=>y[1]}
  803. #        groups.delete(min)
  804. #    end
  805.     return groups
  806. end
  807.  
  808. ##############################
  809. # Image processing functions #
  810. ##############################
  811.  
  812. def flood_fill(pixels,i,val,dirs,block=0)
  813.     res = pixels.size
  814.     queue = [i]
  815.     while !queue.empty?
  816.         idx = queue.pop
  817.         next if idx<0 || idx>=res || pixels[idx] == block || pixels[idx] == val
  818.         pixels[idx] = val
  819.         queue.concat((idx+dirs).to_a)
  820.     end  
  821. end
  822.  
  823. # shaping element, pixel offsets
  824. def se_square(dims,width)
  825.     x = dims[0]
  826.     size = 2*width+1
  827.     delta = NArray.int(size,size)
  828.     delta.indgen
  829.     delta.map! do |i|
  830.         (i%size-width)+x*(i/size-width)
  831.     end
  832.     return delta.reshape(size*size)
  833. end
  834.  
  835. # These work on b/w images with 0=black, !0=white
  836. # Faster when there aren't many white pixels.
  837. def fast_dilate(pixels,se)
  838.     res = pixels.size
  839.     idx = NArray.int(res)
  840.     idx.indgen!
  841.     idx[pixels.eq(0xFF)].each do |i|
  842.         d = se+i
  843.         pixels[d[(d>=0)&(d<res)]] = 0xFF
  844.     end
  845.     return pixels
  846. end
  847. def dilate(pixels,se,thresh=0.0)
  848.     orig = pixels[true]
  849.     res = pixels.size
  850.     idx = NArray.int(res)
  851.     idx.indgen!
  852.     idx[pixels.eq(0)].each do |i|
  853.         d = se+i
  854.         d = d[(d>=0)&(d<res)]
  855.         pixels[i]=0xFF if orig[d].ne(0).sum>d.size*thresh
  856.     end
  857.     return pixels
  858. end
  859. def erode(pixels,se,thresh=0.0)
  860.     orig = pixels[true]
  861.     res = pixels.size
  862.     idx = NArray.int(res)
  863.     idx.indgen!
  864.     idx[pixels.ne(0)].each do |i|
  865.         d = se+i
  866.         d = d[(d>=0)&(d<res)]
  867.         pixels[i]=0 if orig[d].eq(0).sum>d.size*thresh
  868.     end
  869.     return pixels
  870. end
  871. def close(pixels,se,thresh_1=0.0,thresh_2=thresh_1)
  872.     dilate(pixels,se,thresh_1)
  873.     erode(pixels,se,thresh_2)
  874. end
  875. def open(pixels,se,thresh_1=0.0,thresh_2=thresh_1)
  876.     erode(pixels,se,thresh_1)
  877.     dilate(pixels,se,thresh_2)
  878. end
  879.  
  880. # pixels should be int array black-white, ie 0 or 255
  881. def bw_label(pixels,dims)
  882.     x = dims[0]
  883.     res = dims[0]*dims[1]
  884.     dirs = NArray.to_na([1,-1,x,-x,1+x,1-x,x-1,-x-1])
  885.     val = 1
  886.     idx = NArray.int(res)
  887.     idx.indgen!
  888.     pixels = pixels/255
  889.     mask = idx[pixels>0]
  890.     pixels *= res+1
  891.     mask.each do |i|
  892.         if pixels[i] == res+1
  893.             flood_fill(pixels,i,val,dirs)
  894.             val += 1
  895.         end
  896.     end
  897.     return pixels
  898. end
  899.  
  900.  
  901. ################
  902. # Locate Timer #
  903. ################
  904.  
  905. # Analyse a time interval whether it contains a continously ticking timer.
  906. # Returns an array of all rectangular areas found.
  907. def analyse_interval(file,start,frames,dt,period,opts={})
  908.     opts[:debug] ||= Default[:debug]
  909.     opts[:location_method] ||= Default[:location_method]
  910.     opts[:dynamicity_method] ||= Default[:dynamicity_method]
  911.     opts[:bounding_box_smooth_width] ||= Default[:bounding_box_smooth_width]
  912.     opts[:bounding_box_area] ||= Default[:bounding_box_area]
  913.     opts[:bounding_box_width] ||= Default[:bounding_box_width]
  914.     opts[:bounding_box_height] ||= Default[:bounding_box_height]
  915.     opts[:bounding_box_fill_ratio] ||= Default[:bounding_box_fill_ratio]
  916.     opts[:threshold_pixel_std] ||= Default[:threshold_pixel_std]
  917.  
  918.     # read frames from video file or a sequence of images (frames)
  919.     data, dims = get_frames(file,start,frames,dt,opts)
  920.  
  921.     ################
  922.     # classify data#
  923.     ################
  924.  
  925.     # not enough changes when dt is small, dynamicty method not applicable
  926.     if dt < Rational(1,1)
  927.         opts = opts.merge(:dynamicity_method => :standard_deviation)
  928.         if dt < Rational(1,3)
  929.             opts = opts.merge(:dynamicity_method => :none)
  930.         else
  931.             opts = opts.merge(:threshold_pixel_std => opts[:threshold_pixel_std]*dt)
  932.         end
  933.     elsif dt%10<1
  934.         opts = opts.merge(:dynamicity_method => :none)
  935.     end
  936.  
  937.     case opts[:dynamicity_method]
  938.     when :random_comparison
  939.         res1 = pixel_difference_diff(data,opts)
  940.     when :standard_deviation
  941.         res1 = pixel_difference_std(data,opts)
  942.     when :none
  943.         res1 = NArray.int(dims[0]*dims[1])
  944.         res1[true] = 255
  945.     end
  946.  
  947.     case opts[:location_method]
  948.     when :simple
  949.         res2 = check_periodicity(data,period,opts)
  950.     when :fourier
  951.         if frames/period < 4
  952.             raise ArgumentError, 'not enough samples for method :fourier, frames/period should be >= 4'
  953.         end
  954.         res2 = freq_analysis(data,period,opts)
  955.     when :both
  956.         res2 = freq_analysis(data,period,opts) & check_periodicity(data,period,opts)
  957.     when :steps
  958.         dt_n = Rational(1,1)/(dt%1)
  959.         if dt_n < 3 || (dt%1).lcm(Rational(1,1)) != Rational(1,1)
  960.             raise ArgumentError,'fractional part of time step must be 1/n, n>=3 for method :steps, :fouriersteps'
  961.         end
  962.         res2 = check_step_behaviour(data,dt_n)
  963.     when :fouriersteps
  964.         dt_n = Rational(1,1)/(dt%1)
  965.         if frames/period < 4
  966.             raise ArgumentError, 'not enough samples for method :fourier, frames/period should be >= 4'
  967.         end
  968.         if dt_n < 3 || (dt%1).lcm(Rational(1,1)) != Rational(1,1)
  969.             raise ArgumentError,'fractional part of time step must be 1/n, n>=3 for method :steps, :fouriersteps'
  970.         end
  971.         res2 = freq_analysis(data,period,opts) & check_step_behaviour(data,dt_n)
  972.     end
  973.     res = res1&res2
  974.  
  975.     ################
  976.     # post-process #
  977.     ################
  978.  
  979.     post = res.clone
  980.  
  981.     # save for later
  982.     r_label = bw_label(post.clone,dims)
  983.     r_nofill = get_rects(r_label,dims)
  984.  
  985.     # filter pixel noise, fill gaps
  986.     se = se_square(dims,opts[:bounding_box_smooth_width])
  987.     case opts[:location_method]
  988.     when :simple
  989.         close(post,se,0.5)
  990.     when :fourier
  991.         close(post,se,0.5)
  992.         erode(post,se,0.5)
  993.     when :both
  994.         close(post,se,0.5)
  995.     when :steps
  996.         close(post,se,0.6)
  997.     when :fouriersteps
  998.         close(post,se,0.6)
  999.     end
  1000.  
  1001.     # label connected areas
  1002.     post = bw_label(post,dims)
  1003.     opts[:debug] << " - got #{post.max} connected area(s)\n"
  1004.  
  1005.     # get bounding boxes
  1006.     rects = get_rects(post,dims)
  1007.  
  1008.     # merge boxes that overlap
  1009.     k = rects.size
  1010.     rects = merge_overlapping_rects(rects)
  1011.     opts[:debug] << " - merged some overlapping timer area(s), #{k} -> #{rects.size}\n" if rects.size<k
  1012.  
  1013.     # sometimes morphological operations may connect too much
  1014.     # add the original bounding boxes to the image
  1015.     rects.concat(r_nofill)
  1016.  
  1017.     # sane-ify
  1018.     # remove rectangles too small/large
  1019.     k = rects.size
  1020.     rects_limit_size(rects,opts[:bounding_box_area],opts[:bounding_box_width],opts[:bounding_box_height])
  1021.     opts[:debug] << " - eliminated too small/large timer areas, #{k} -> #{rects.size}\n" if rects.size<k
  1022.  
  1023.     # remove rectangles that are mostly empty (using the original mask)
  1024.     k = rects.size
  1025.     rects_limit_fillratio(rects,res,dims,opts[:bounding_box_fill_ratio])
  1026.     opts[:debug] << " - eliminated sparsely populated timer areas, #{k} -> #{rects.size}\n" if rects.size<k
  1027.  
  1028.     # get rid of those boxes we introduces above
  1029.     rects = merge_overlapping_rects(rects)
  1030.  
  1031.     # convert mask to binary
  1032.     res = res.eq(0xFF)
  1033.     res.reshape!(*dims)
  1034.  
  1035.     return rects, res
  1036. end
  1037.  
  1038. # +locate_temporal_resolution+ is the interval at which frames are taken/scanned.
  1039. # Returns array of timers [timer1,timer2,...], first entry is the most probable one
  1040. # timer_i = [a_x, a_y, b_x, b_y, digits], a=top-left corner, b=bottom_right_corner
  1041. # digits = [image_1,...] = [NArray(pixels),...]
  1042. def locate_timers(file,opts={})
  1043.     opts[:debug] ||= Default[:debug]
  1044.     opts[:digits_difference] ||= Default[:digits_difference]
  1045.     opts[:timer_frequency_ratio] ||= Default[:timer_frequency_ratio]
  1046.     opts[:locate_temporal_resolution] ||= Default[:locate_temporal_resolution]
  1047.     opts[:period_seconds] ||= Default[:period_seconds]
  1048.     opts[:frames_per_interval] ||= Default[:frames_per_interval]
  1049.     opts[:min_different_digits] ||= Default[:min_different_digits]
  1050.  
  1051.     psc = Rational(opts[:period_seconds])
  1052.     ltr = Rational(opts[:locate_temporal_resolution])
  1053.     per = ((psc.lcm(ltr).lcm(Rational(1,1)))/ltr).to_i # period as integer number of frames
  1054.     frm = opts[:frames_per_interval]
  1055.     d_diff = opts[:digits_difference]
  1056.     mdd = opts[:min_different_digits]
  1057.  
  1058.     duration = VideoScanner.get_duration(file).to_i
  1059.     len = (duration/(frm*ltr)).to_i
  1060.     active_intervals = NArray.byte(frm*len) # frames of intervals with an active timer
  1061.  
  1062.     # how often each digits appears for a given period, eg per=2, 60 frames, '0' 30 times, '5' 30 times, note that +per+ can be rational
  1063.     occ = {}
  1064.     len.times do |i|
  1065.         frm.times do |j|
  1066.             t = i*frm*ltr+ltr*j
  1067.             d = (t%10).to_i
  1068.             occ[d] ||= 0
  1069.             occ[d] += 1
  1070.         end
  1071.     end
  1072.  
  1073.     rects = [] # rectangular region(s) containing the timer(s)
  1074.     masks = [] # the original mask of which pixel is compatible with the presence of a timer
  1075.  
  1076.     # get possible location for each interval
  1077.     len.times do |i|
  1078.         opts[:debug] << "Analyzing %0d:%02d:%02d.%03d-%0d:%02d:%02d.%03d...\n" % sec2hhmmssms(i*ltr*frm).concat(sec2hhmmssms((i+1)*ltr*frm))
  1079.         r, m = analyse_interval(file,i*frm*ltr,frm,ltr,per,opts)
  1080.         if !r.empty?
  1081.             # which intervals are containing a timer
  1082.             active_intervals[i*frm..(i+1)*frm-1] = 1
  1083.             # the original mask of pixels comptabible with the presence of a timer
  1084.             masks.push(m)
  1085.         end
  1086.         rects.concat(r)
  1087.         opts[:debug] << "Found #{r.size} candidates.\n\n"
  1088.     end
  1089.  
  1090.     # groupt rects and take those with the highest frequency
  1091.     groups = group_rects(rects,opts)
  1092.     k = groups.size
  1093.     thresh = len < 18 ? 1 : (len < 36 ? 2 : len/18)
  1094.     groups.delete_if{|g|g.size<=thresh}
  1095.     opts[:debug] << "Removed some candidates that did not occur in many scanned intervals, #{k} -> #{groups.size}.\n" if groups.size<k
  1096.     best = groups.max{|x,y|x.size<=>y.size}
  1097.     timers = groups.select{|g|g.size>=best.size*opts[:timer_frequency_ratio]}
  1098.     timers.map!{|g|g.inject(&:+).map{|x|x/g.size}.to_a}
  1099.  
  1100.     # sorry, no timers found
  1101.     return timers if timers.empty? || masks.empty?
  1102.  
  1103.     # merge all masks together
  1104.     opts[:debug] << "Storing original pixel masks...\n"    
  1105.     mask = masks.inject(NArray.byte(*masks[0].shape)){|s,m|s|m}
  1106.  
  1107.     # save the original mask for each timer region
  1108.     timers.each_with_index do |timer|
  1109.         timer[5] = mask[timer[0]..timer[2],timer[1]..timer[3]]
  1110.         timer[5].flatten!
  1111.     end
  1112.  
  1113.     # extract images of the timer's digits
  1114.     opts[:debug] << "Extracting timer digit images...\n"
  1115.     timer_images = get_cropped_frames(file,0,frm*len,ltr,timers,opts)
  1116.  
  1117.     # group images into different digits and average
  1118.     opts[:debug] << "Grouping timer digit images...\n"
  1119.     timer_images.each_with_index do |images,i|
  1120.         groups = group_images(images,d_diff,active_intervals,timers[i][5])
  1121.         trim_groups!(groups,occ)
  1122.         average_images!(groups)
  1123.         timers[i][4] = groups
  1124.     end
  1125.  
  1126.     # delete timers with not enough digits
  1127.     k = timers.size
  1128.     timers.delete_if{|t|t[4].size < mdd}
  1129.     opts[:debug] << "Removed some timers that did not contain enough digits, #{k} -> #{timers.size}."
  1130.  
  1131.     return timers
  1132. end
  1133.  
  1134. def pretty_print_timer_regions(timers,io)
  1135.     timers.each_with_index do |t,i|
  1136.         io << "Timer #{i+1}: (#{t[0]},#{t[1]}), size #{t[2]-t[0]+1}x#{t[3]-t[1]+1}\n"
  1137.     end
  1138. end
  1139.  
  1140. #############################
  1141. # Check timers for activity #
  1142. #############################
  1143.  
  1144. # Returns an array +timers+ with info on when the timer starts and stops.
  1145. # timers = [timer_1, timer_2 ,...]
  1146. # timer_i = [events, initial_state, final_state]
  1147. # events = [event_1, event_2, ...]
  1148. # event_i = [time_index, type_of_event]
  1149. # type_of_event, initial_state, final_state: 1 (starts) or 255 (stops)
  1150. # time_index: time in seconds relative to the beginning of the video file
  1151. def scan_timers(file,start,frames,dt,timers,opts={})
  1152.     return timers.map{[[]]} if timers.empty? || frames < 3
  1153.  
  1154.     opts[:digits_difference] ||= Default[:digits_difference]
  1155.     opts[:timer_ticking_hold_threshold] ||= Default[:timer_ticking_hold_threshold]
  1156.     opts[:timer_ticking_tick_threshold] ||= Default[:timer_ticking_tick_threshold]
  1157.     opts[:timer_ticking_smooth_width] ||= Default[:timer_ticking_smooth_width]
  1158.  
  1159.     thresh_tick = opts[:timer_ticking_hold_threshold]
  1160.     thresh_hold = opts[:timer_ticking_hold_threshold]
  1161.     thresh_digits = opts[:digits_difference]
  1162.     smw = opts[:timer_ticking_smooth_width]
  1163.  
  1164.     timers = [timers] unless timers[0].is_a?(Array)
  1165.  
  1166.     # get video frames
  1167.     regs = get_cropped_frames(file,start,frames,dt,timers,opts)
  1168.  
  1169.     # analyze video to determine when the timer is ticking
  1170.     timer_i = -1
  1171.     regs.map! do |r|
  1172.         timer_i += 1
  1173.         mask = timers[timer_i][5]
  1174.         eff_res = mask.sum # effective resolution
  1175.  
  1176.         # list of frames where it does look like one of the digits of the timer
  1177.         alk = NArray.byte(frames)
  1178.         x = NArray.int(frames-1)
  1179.  
  1180.         # check at which frames it looks similar to the timer, consider only pixels given my +mask+
  1181.         frames.times do |i|
  1182.             alk[i] = 1 if timers[timer_i][4].index{|img|((img-r[true,i])[mask]**2).sum<=(eff_res*thresh_digits)**2}
  1183.         end
  1184.  
  1185.         # threshold into two states: ticking / on-hold, by getting the
  1186.         # difference of adjacent pixels, considering only pixels given by +mask+
  1187.         (frames-1).times do |i|
  1188.             x[i] = ((r[true,i]-r[true,i+1])[mask]**2).sum > (thresh_digits*eff_res)**2 ? 1 : 0
  1189.         end
  1190.  
  1191.         # time is paused when it is not visible
  1192.         x &= alk[0..-2]
  1193.         x &= alk[1..-1]
  1194.  
  1195.         # remove wild fluctuations
  1196.         x = smooth_gauss(x,smw)>0.5
  1197.  
  1198.         # remove short-term changes
  1199.         filter_short_states(x,thresh_hold,thresh_tick)
  1200.  
  1201.         # get transitions between the two states
  1202.         y = x[1..-1]-x[0..-2]
  1203.  
  1204.         # get initial/final state
  1205.         init = x[0] == 0 ? -1 : 1
  1206.         fin = x[-1] == 0 ? 1 : -1
  1207.  
  1208.         # translate indices into time stamps
  1209.         idx = NArray.float(y.size).indgen[y.ne(0)]
  1210.         val = y[idx]
  1211.         idx.map!{|z|(start+(z+0.5)*dt).round}
  1212.         [idx.to_a.zip(val.to_a),init,fin]
  1213.     end
  1214.     return regs
  1215. end
  1216.  
  1217. # I suggest using an odd/prime (not 5) number of seconds for scan_interval,
  1218. # to get as many different digits as possible.
  1219. # Eg, the seconds timer will always end on 0 or 5 if you choose the interval 5s.
  1220. def timer_ticking(file,timer_regions,opts={})
  1221.     return timers.map{[[]]} if timer_regions.empty?
  1222.  
  1223.     opts[:debug] ||= Default[:debug]
  1224.     opts[:scan_interval_coarse] ||= Default[:scan_interval_coarse]
  1225.     opts[:scan_interval_fine] ||= Default[:scan_interval_fine]
  1226.     opts[:scan_fine_interval] ||= Default[:scan_fine_interval]
  1227.     opts[:timer_ticking_hold_threshold] ||= Default[:timer_ticking_hold_threshold]
  1228.     opts[:timer_ticking_tick_threshold] ||= Default[:timer_ticking_tick_threshold]
  1229.  
  1230.     thresh_tick = opts[:timer_ticking_tick_threshold]
  1231.     thresh_hold = opts[:timer_ticking_hold_threshold]
  1232.  
  1233.     dt_coarse = Rational(opts[:scan_interval_coarse])
  1234.     dt_fine = Rational(opts[:scan_interval_fine])
  1235.     duration = VideoScanner.get_duration(file).to_i
  1236.     frames = (duration/dt_coarse).to_i
  1237.  
  1238.     opts[:debug] << "Scanning timers for activity...\n"
  1239.     timer_events = scan_timers(file,0,frames,dt_coarse,timer_regions,opts)
  1240.     timer_events.each_with_index do |t,i|
  1241.         opts[:debug] << "Timer #{i}: Found #{t[0].size} event(s).\n"
  1242.     end
  1243.  
  1244.     # go over all events and temporally locate them at a higher resolution
  1245.     if opts[:scan_fine_interval] && dt_fine < dt_coarse
  1246.         timer_events.each_with_index do |timer,i|
  1247.             # check start at fine resolution
  1248.             opts[:debug] << "Checking at the beginning of the video at a higher resolution for the timer #{i}...\n"
  1249.             frm = (thresh_tick*dt_coarse*2/dt_fine).to_i
  1250.             fine_start = scan_timers(file,0,frm,dt_fine,timer_regions[i])[0]
  1251.             if !fine_start[0].empty?
  1252.                 timer[1] = fine_start[1]
  1253.                 timer[0] = fine_start[0].concat(timer[0])
  1254.             end
  1255.             # check end at fine resolution
  1256.             opts[:debug] << "Checking at the end of the video at higher a resolution for the timer #{i}...\n"
  1257.             t1 = thresh_hold*dt_coarse*2
  1258.             frm = (t1/dt_fine).to_i
  1259.             fine_start = scan_timers(file,duration-2-t1,frm,dt_fine,timer_regions[i])[0]
  1260.             if !fine_start[0].empty?
  1261.                 timer[2] = fine_start[2]
  1262.                 timer[0].concat(fine_start[0])
  1263.             end
  1264.             # check events at finer resolution
  1265.             timer[0].each_with_index do |event,j|
  1266.                 opts[:debug] << "Refining estimate for timer #{i}, event #{j}...\n"
  1267.                 time = event[0]
  1268.                 type = event[1]
  1269.                 delta = type == 1 ? thresh_tick : thresh_hold
  1270.                 t0 = time - delta*dt_coarse
  1271.                 t1 = time + delta*dt_coarse
  1272.                 t0 = 0 if t0 < 0
  1273.                 t1 = duration if t1 > duration
  1274.                 frm = ((t1-t0)/dt_fine).to_i
  1275.                 fine = scan_timers(file,t0,frm,dt_fine,timer_regions[i])[0][0]
  1276.                 fine.delete_if{|f|f[1]!=type}
  1277.                 res = fine.inject(0){|s,f|s+f[0]}
  1278.                 event[0] = (res/fine.size.to_f).round unless fine.empty?
  1279.             end
  1280.         end
  1281.     end
  1282.  
  1283.     # remove events that are the same and sort events
  1284.     timer_events.each do |timer|
  1285.         timer[0].delete_if do |x|
  1286.             timer[0].index{|y|!x.equal?(y) && x[1]==y[1] && (x[0]-y[0]).abs < 2*dt_fine}
  1287.         end
  1288.         # sort by event time, then by event type
  1289.         timer[0].sort! do |x,y|
  1290.             a = x[0]<=>y[0]
  1291.             a == 0 ? x[1]<=>y[1] : a
  1292.         end
  1293.     end
  1294.     # remove timers that are the same
  1295.     array_remove_duplicates!(timer_events) {|t1,t2|
  1296.         n = t1[0].size
  1297.         if t2[0].size == n
  1298.             diff = 0
  1299.             n.times{|i|diff += t1[0][i][1]==t2[0][i][1] ? (t1[0][i][0]-t2[0][i][0]).abs : 9E99}
  1300.             t1.size == t2.size && t1[1]==t2[1] && diff < 2*n*dt_fine
  1301.         end
  1302.     }
  1303.     return timer_events
  1304. end
  1305.  
  1306. def array_remove_duplicates!(arr,&block)
  1307.     arr.delete_if do |x|
  1308.         arr.select{|y|x.equal?(y) || yield(x,y)}.size>1
  1309.     end
  1310. end
  1311.  
  1312. def pretty_print_timer_events(timers,io)
  1313.     timers.each_with_index do |timer,i|
  1314.         io << "Timer #{i+1}:\n"
  1315.         if timer[2] == timer[3] && timer[0].empty?
  1316.             # note that this script can't detect the timer if it's paused the entire time
  1317.             io << "  - keeps #{timer[1] == 1 ? 'ticking' : 'being paused'}\n"
  1318.         else
  1319.             io << "  - starts off #{timer[1] == 1 ? 'ticking' : 'paused'}.\n"
  1320.             timer[0].each do |event|
  1321.                 hmsms = sec2hhmmssms(event[0])
  1322.                 io << "  - #{event[1]==1 ? 'starts' : 'stops'} at %0d:%02d:%02d.%03d\n" % hmsms
  1323.             end
  1324.             io << "  - ends up #{timer[2] == 1 ? 'paused' : 'ticking'}.\n"
  1325.         end
  1326.     end
  1327. end
  1328.  
  1329. ########################
  1330. # Analyze a video file #
  1331. ########################
  1332.  
  1333. def scan_file(file,opts={})
  1334.     opts[:debug] ||= Default[:debug]
  1335.     file = File.expand_path(file)
  1336.     raise StandardError, 'no such file' unless File.exist?(file) && File.readable?(file)
  1337.  
  1338.     t_init = Time.now
  1339.  
  1340.     # scan for timers
  1341.     timer_regions = locate_timers(file,opts)
  1342.  
  1343.     if timer_regions.empty?
  1344.         opts[:debug] << "\n\n"
  1345.         opts[:debug] << "Failed to find any timers.\n"
  1346.         return [],[]
  1347.     end
  1348.  
  1349.     # debug
  1350.     opts[:debug] << "\n\n"
  1351.     opts[:debug] << "Timers found:\n"
  1352.     pretty_print_timer_regions(timer_regions,opts[:debug])
  1353.     opts[:debug] << "\n"
  1354.  
  1355.     # scan for timer movement and delete non-moving timers
  1356.     timer_events = timer_ticking(file,timer_regions,opts)
  1357.     timer_events.delete_if{|t|t[0].empty? && t[1]!=1}
  1358.  
  1359.     if !t_ev.find{|t|!t[0].empty? || t[1]==1}
  1360.         opts[:debug] << "\n\n"
  1361.         opts[:debug] << "Failed to detect any events, giving up.\n"
  1362.         return [],[]
  1363.     end
  1364.  
  1365.     # debug
  1366.     opts[:debug] <<  "\n\n"
  1367.     opts[:debug] <<  "This is what they're doing:\n"
  1368.     pretty_print_timer_events(timer_events,opts[:debug])
  1369.  
  1370.     opts[:debug] <<  "\n"
  1371.     opts[:debug] <<  "Scanning took #{(Time.now-t_init).round}s.\n"    
  1372.  
  1373.     return timer_events, timer_regions
  1374. end
  1375.  
  1376. # merge timers that do the same, eg see dump_000000003.mp4
  1377.  
  1378. # With timestep 10.x, seconds digit won't change much.
  1379. # Method fourier needs some frames, at least ~ 6 * period (number of frames after which digit repeats).
  1380. # Dynamicty does not work well with 0.x timesteps, so it gets disabled.
  1381. def smart_scan_file(file,opts={})
  1382.     opts[:debug] ||= Default[:debug]
  1383.     opts[:transcode_before_analysis] ||= Default[:transcode_before_analysis]
  1384.  
  1385.     file = File.expand_path(file)
  1386.     raise StandardError, 'no such file' unless File.exist?(file) && File.readable?(file)
  1387.  
  1388.     file = transcode_file(file).to_s if opts[:transcode_before_analysis]
  1389.  
  1390.     lookup1 = [
  1391.         [0,      nil, []],
  1392.         [10,     Rational(1,5),   [:steps]],
  1393.         [70,     Rational(1,4),   [:steps]],
  1394.         [5*60,   Rational(5,4),   [:simple]],
  1395.         [10*60,  1,               [:fourier]],
  1396.         [10*60,  Rational(6,5),   [:simple]],
  1397.         [15*60,  1,               [:simple,:fourier]],
  1398.         [30*60,  1,               [:simple,:fourier]],
  1399.         [60*60,  3,               [:simple,:fourier]],
  1400.         [120*60, Rational(102,10),[:steps]],
  1401.         [120*60, 3,               [:simple,:fourier]],
  1402.         [180*60, 7,               [:simple,:fourier]],
  1403.         [240*60, 7,               [:simple,:fourier]],
  1404.         [480*60, 9,               [:simple,:fourier]],
  1405.         [600*60, 9,               [:simple,:fourier]],
  1406.         [9E99,   9,               [:simple,:fourier]]
  1407.     ]
  1408.  
  1409.     lookup2 = [
  1410.         [0,      nil, []],
  1411.         [10,     Rational(11,10), Rational(21,20)],
  1412.         [10*60,  Rational(6,5),   Rational(11,10)],
  1413.         [30*60,  Rational(1,1),   Rational(4,3)],
  1414.         [60*60,  Rational(3,1),   Rational(5,4)],
  1415.         [120*60, Rational(7,1),   Rational(5,4)],
  1416.         [180*60, Rational(7,1),   Rational(5,4)],
  1417.         [240*60, Rational(7,1),   Rational(5,4)],
  1418.         [480*60, Rational(9,1),  Rational(5,4)],
  1419.         [600*60, Rational(9,1),  Rational(5,4)],
  1420.         [9E99,   Rational(9,1),  Rational(5,4)]
  1421.     ]
  1422.  
  1423.     t_init = Time.now
  1424.     t_reg, t_ev = [], []
  1425.  
  1426.     duration = VideoScanner.get_duration(file)
  1427.     fps = VideoScanner.get_fps(file)[0].to_f
  1428.  
  1429.     # locate timers
  1430.     opts[:debug] << "Analyzing <#{file}>, " + "duration %0d:%02d:%02d.%03d...\n" % sec2hhmmssms(duration)
  1431.     opts[:debug] << "\n"
  1432.     opts[:debug] << "Scanning for timers...\n"
  1433.     idx = lookup1.index{|d|duration < d[0]}
  1434.     idx = lookup1.rindex{|d|lookup1[idx][0] == d[0]}
  1435.     jdx = 0
  1436.     while idx > 0
  1437.         opts[:debug] << "\n"
  1438.         opts[:debug] << "Trying timestep #{lookup1[idx][1].to_f}, method #{lookup1[idx][2][jdx]}...\n"
  1439.         opts = opts.merge(:locate_temporal_resolution => lookup1[idx][1], :location_method => lookup1[idx][2][jdx])
  1440.         t_reg = locate_timers(file,opts)
  1441.         if t_reg.empty?
  1442.             opts[:debug] << 'Retrying...'
  1443.             jdx += 1
  1444.             if jdx >= lookup1[idx][2].size
  1445.                 idx -= 1
  1446.                 jdx = 0
  1447.             end
  1448.         else
  1449.             break
  1450.         end
  1451.     end
  1452.  
  1453.     # check for success
  1454.     if t_reg.empty?
  1455.         opts[:debug] << "\n\n"
  1456.         opts[:debug] << "Failed to locate any timers, giving up.\n"
  1457.         return [],[]
  1458.     end
  1459.  
  1460.     # debug
  1461.     opts[:debug] << "\n\n"
  1462.     opts[:debug] << "Timer(s) found:\n"
  1463.     pretty_print_timer_regions(t_reg,opts[:debug])
  1464.     opts[:debug] << "\n"
  1465.  
  1466.     # get events
  1467.     opts[:debug] <<  "\n"
  1468.     opts[:debug] << "Analyzing timer(s) for event(s)...\n"
  1469.     idx = lookup2.index{|d|duration < d[0]}
  1470.     idx = lookup2.rindex{|d|lookup2[idx][0] == d[0]}
  1471.     while idx > 0
  1472.         opts[:debug] << "\n"
  1473.         opts[:debug] << "Trying coarse timestep #{lookup2[idx][1].to_f}, fine #{lookup2[idx][2].to_f}...\n"
  1474.         opts = opts.merge(:scan_interval_coarse => lookup2[idx][1], :scan_interval_fine => lookup2[idx][2])
  1475.         t_ev = timer_ticking(file,t_reg,opts)
  1476.         if !t_ev.find{|t|!t[0].empty? || t[1]==1}
  1477.             opts[:debug] << "Retrying...\n"
  1478.             idx -= 1
  1479.         else
  1480.             break
  1481.         end
  1482.     end
  1483.  
  1484.     # remove timer that are being paused the entire time (shouldn't be possible to detect, though)
  1485.     t_ev.delete_if{|t|t[0].empty? && t[1]!=1}
  1486.  
  1487.     # check for success
  1488.     if !t_ev.find{|t|!t[0].empty? || t[1]==1}
  1489.         opts[:debug] << "\n\n"
  1490.         opts[:debug] << "Failed to detect any events, giving up.\n"
  1491.         return [],[]
  1492.     end
  1493.  
  1494.     # debug
  1495.     opts[:debug] <<  "\n\n"
  1496.     opts[:debug] <<  "This is what they're doing:\n"
  1497.     pretty_print_timer_events(t_ev,opts[:debug])
  1498.  
  1499.     # report run time
  1500.     opts[:debug] <<  "\n"
  1501.     t_elap = Time.now-t_init
  1502.     opts[:debug] <<  "Scanning took #{t_elap.round}s, #{(duration*fps/t_elap.to_f).round} fps.\n"    
  1503.  
  1504.     return t_ev, t_reg
  1505.  
  1506. end
  1507.  
  1508. #################
  1509. # CLI interface #
  1510. #################
  1511.  
  1512. if caller.empty?
  1513.     if ARGV.empty?
  1514.         $stderr << "usage: ruby #{__FILE__} <input-video-file>\n"
  1515.         exit(1)
  1516.     end
  1517.     ARGV.each do |file|
  1518.         begin
  1519.             events, regions = smart_scan_file(file, :debug => $stderr)
  1520.         rescue StandardError=>e
  1521.             $stderr << "error scanning #{file}\n"
  1522.             $stderr << "#{e.class}, #{e.message}, #{e.backtrace}\n"
  1523.         end
  1524.             $stderr << "\n########################################\n"
  1525.             $stderr << "########################################\n\n"
  1526.     end
  1527. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement