Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # ruby <this-script> <input-video-file>
- #
- # also needs ffmpeg
- require 'json'
- require 'narray'
- require 'fftw3'
- Default = {}
- Default[:debug] = $stderr
- Default[:ffmpeg] = 'ffmpeg'
- Default[:transcode_before_analysis] = true # some video files are broken and acquiring frames directly will produce bad results
- Default[:period_peak_width] ||= Rational(2,10) # width of peaks / period length
- Default[:peak_height] ||= 0.5 # ratio of peak base to peak height
- Default[:peak_min_height] ||= 1E-15 # data/data[0] below this threshold is consider equal to zero, conservative default, depends upon your choice of period_analyze
- Default[:period_analyze] ||= Rational(4,4) # uses only the first part of the autocorrelation diagram for analysis
- Default[:threshold_pixel_std] = 15 # maxiumum standard deviation for a pixel over time to be classified as static
- Default[:threshold_pixel_absdiff] = 15 # maxiumum absolute difference averaged over time for a pixel to be classified as static
- Default[:threshold_pixel_perdiff] = 7 # maximum difference of a pixel over one period averaged over time to be classified as periodic
- Default[:static_interval_threshold] = 0.3 # maximum distance of a static intervall of tick_seconds averaged over all frames to be classified as changing every second
- Default[:bounding_box_difference] = 2*4**2 # maximum sum of squared distances of two timers' bounding boxes' top-left and bottom-right corners to be classified as equal, otherwise they get grouped together, relative to the downscaled +video_x_resolution+ frame size
- Default[:bounding_box_area] = (10..400) # min/max area of a timer's bounding box, relative to the downscaled +video_x_resolution+ frame size
- Default[:bounding_box_width] = 3 # min width of a timer's bounding box, relative to the downscaled +video_x_resolution+ frame size
- Default[:bounding_box_height] = 3 # max width of a timer's bounding box, relative to the downscaled +video_x_resolution+ frame size
- Default[:bounding_box_fill_ratio] = 0.7 # ratio of pixels of a rectangle that must have been classified as valid to be classified as valid
- Default[:bounding_box_smooth_width] = 2 # dilates bounding boxes with a rectangle of size smooth_width*2+1
- Default[:location_method] = :simple # :simple, :fourier, :both, :steps
- Default[:dynamicity_method] = :random_comparison # method to determine whether a pixel is changing or remaining constant, :random_comparison or :standard_deviation
- Default[:timer_ticking_tick_threshold] = 4 # number of consecutive frames the timer must be ticking to be classified as such, eliminating short intervals
- Default[:timer_ticking_hold_threshold] = 4 # number of consecutive frames the timer must be on hold to be classified as such, eliminating short intervals
- Default[:timer_ticking_smooth_width] ||= 4 # width of gaussian smooth of the thresholded timer ticking/pausing data, applied before eliminating short intervals
- Default[:scan_interval_fine] = Rational(7,4) # time in seconds between frames when scanning for timer movement (used to scan the entire video)
- Default[:scan_interval_coarse] = Rational(7,1) # time in seconds between frames when scanning for timer movement (to improve the resolution of the result)
- Default[:scan_fine_interval] = true # for each detected event (timer starts/stops), scan again at increased resolution
- Default[:min_different_digits] = 1 # number of different digits an interval must contain to be considered a timer, the number of digits is lower then 10 for certain time steps, eg. taking a frame every 5s gives only 2 different digits (-> don't use these time steps)
- Default[:digits_difference] = 1.0 # difference between two digits, per pixel, takes the squared difference between two pixel's
- # grey values
- # Sum[(Image1-Image2)^2]/number_of_pixels < digits_difference^2
- Default[:digits_difference_single_pixel] = 6 # same as above, but not averaged over all pixels, used to locate the timer
- # Abs[(Pixel1-Pixel2)] < :digits_difference_single_pixel
- Default[:timer_frequency_ratio] = 0.5 # area with highest frequency is classified as timer, used to recognize multiple timers, they must occur at least this often
- Default[:locate_temporal_resolution] = Rational(3,1) # time between two frames when scanning for the location of the timer, scans multiple time intervals at this resolution, by default 60 frames, 1 second resolution => 1min interval, scale accordingly (2 seconds=>2min interval)
- Default[:frames_per_interval] = 60 # number of frames/samples per interval, video is split into several intervals that get scanned separately for a running timer
- Default[:period_seconds] = Rational(10,1) # real-time seconds after which the seconds digit of the timer repeats, should be 10 unless the video is slowed down/sped up
- Default[:video_x_resolution] = 320 # scale video to this resolution before analyzing, preserving the aspect ratio
- Default[:video_scaler] = 'bilinear' # method to downscale the video
- class Rational
- def lcm(you)
- Rational(self.numerator.lcm(you.numerator),self.denominator.gcd(you.denominator))
- end
- def gcd(you)
- Rational(self.numerator.gcd(you.numerator),self.denominator.lcm(you.denominator))
- end
- end
- # 1 byte per pixel
- module FrameCache
- CACHE = {}
- ORDER = [] # the order in which frames were added
- TYPE = NArray::SINT
- CLEAR_SIZE = 3000
- SIZE = 6000
- def self.push(time,frame)
- clear if CACHE.size >= SIZE
- CACHE[time] = frame.typecode == TYPE ? frame : frame.to_type(TYPE)
- ORDER << time
- end
- def self.fetch(time)
- CACHE[time]
- end
- def self.search(frame)
- CACHE.select{|t,f|f==frame}
- end
- def self.clear
- if CLEAR_SIZE == SIZE
- CACHE.clear
- else
- # keep only last occurence of frames requested multiple times
- i = -1
- ORDER.delete_if do |o|
- i+=1
- ORDER[i+1..-1].index(o)
- end
- # keep the most recently requested frames
- del = ORDER.slice!(0,CLEAR_SIZE)
- del.each do |d|
- CACHE.delete(d)
- end
- end
- end
- def self.put_frame(time,data,idx)
- if frame = self.fetch(time)
- data[*idx] = frame
- else
- push(time,yield)
- end
- end
- def self.load(data)
- data = Marshal.load(data)
- CACHE.clear
- ORDER.clear
- data[0].each do |entry|
- CACHE[entry[0]] = NArray.to_na(entry[1],entry[2],*entry[3])
- end
- ORDER.concat(data[1])
- end
- def self.dump
- Marshal.dump([CACHE.map{|time,frame|[time,frame.to_s,frame.typecode,frame.shape,]},ORDER])
- end
- end
- # +res+ is the number of pixels, +data+ the array to store the pixel data.
- # +io+ is a stream to read data from, +pix_fmt+ the pixel format of the frame.
- # For example, if you've got an array with pixels as the first and frames
- # as the second dimension, and want to extract the frame data to the 5th frame,
- # specify idx=[true,4].
- # Return the array inserted.
- def read_gray_frame(io,pix_fmt,res,data,idx)
- case pix_fmt
- when 'yuv420p'
- # 8 bit Y plane followed by 8 bit 2x2 subsampled V and U planes.
- y = io.read(res)
- raise StandardError, 'unable to acquire frame(s), check video file' if y.nil? || y.bytesize < res
- arr = NArray.to_na(y,1)
- data[*idx] = arr
- io.read(res/2) # discard uv plane(s)
- arr
- else
- raise StandardError, 'unsupported pixel format'
- end
- end
- def transcode_file(file,opts={})
- raise StandardError, "no such file" unless File.exist?(file)
- opts[:debug] ||= Default[:debug]
- opts[:ffmpeg] ||= Default[:ffmpeg]
- opts[:video_scaler] ||= Default[:video_scaler]
- opts[:video_x_resolution] ||= Default[:video_x_resolution]
- duration = VideoScanner.get_duration(file).to_f
- width = VideoScanner.get_stream_part(file,'v','width')[0]['width'].to_i
- height = VideoScanner.get_stream_part(file,'v','height')[0]['height'].to_i
- down_x = opts[:video_x_resolution]
- down_y = down_x*height/width
- file = File.expand_path(file)
- tempfile = File.join(File.dirname(__FILE__),'transcode.mp4')
- cmd = "ffmpeg -y -loglevel quiet -i #{file.inspect} -c:v libx264 -an -vf scale=#{down_x}:#{down_y} -preset ultrafast -sws_flags #{opts[:video_scaler]} -g 20 -movflags faststart -crf 12 -f mp4 #{tempfile} < /dev/null"
- opts[:debug] << "Transcoding video file (#{(duration/60.0).round(2)} min) before analysis...\n"
- opts[:debug] << "#{cmd}\n"
- t_start = Time.now
- succ = system(cmd)
- t_dur = Time.now-t_start
- opts[:debug] << "Transcoding took #{t_dur}s, #{(duration.to_f/t_dur.to_f).round}x real-time.\n"
- return succ ? tempfile : nil
- end
- # Assumes there is only one video stream, and takes the first.
- # +crop+ is an Array of Rectangles, a rectangle is an NArray [a_x,a_y,b_x,b_y],
- # a is the top-left corner, b the bottom right corner, (x,y)=(0,0) is at the
- # top-left # corner of the video frame.
- # Returns an NArray of shape [pixel,frame].
- def get_cropped_frames(file,start,frames,dt,crop,opts={})
- opts[:debug] ||= Default[:debug]
- opts[:ffmpeg] ||= Default[:ffmpeg]
- opts[:video_scaler] ||= Default[:video_scaler]
- opts[:video_x_resolution] ||= Default[:video_x_resolution]
- start = Rational(start)
- dt = Rational(dt)
- file = File.expand_path(file)
- pix_fmt = VideoScanner.get_stream_part(file,'v','pix_fmt')[0]['pix_fmt']
- width = VideoScanner.get_stream_part(file,'v','width')[0]['width'].to_i
- height = VideoScanner.get_stream_part(file,'v','height')[0]['height'].to_i
- down_x = opts[:video_x_resolution]
- down_y = down_x*height/width
- resize_needed = down_x!=width || down_y!=height
- data = NArray.int(down_y*down_x)
- regs = Array.new(crop.size) do |i|
- NArray.int((crop[i][2]-crop[i][0]+1),(crop[i][3]-crop[i][1]+1),frames)
- end
- frames.times do |i|
- opts[:debug] << "Reading frame #{i+1}/#{frames}...\r"
- data.reshape!(down_x*down_y)
- time = dt*i+start
- FrameCache.put_frame(time,data,[nil]) do
- cmd = "#{opts[:ffmpeg]} -loglevel quiet -accurate_seek -ss #{time.to_f} -i #{file.inspect} %s -frames:v 1 -f rawvideo - < /dev/null" % (resize_needed ? "-filter:v scale=#{down_x}:#{down_y} -sws_flags #{opts[:video_scaler]}" : '')
- IO.popen(cmd) do |io|
- read_gray_frame(io,pix_fmt,down_x*down_y,data,[nil])
- end
- end
- data.reshape!(down_x,down_y)
- crop.each_with_index do |c,j|
- regs[j][true,true,i] = data[c[0]..c[2],c[1]..c[3]]
- end
- end
- regs.each do |r|
- r.reshape!(r.shape[0]*r.shape[1],r.shape[2])
- end
- return regs
- end
- # Assumes there is only one video stream, and takes the first.
- # Returns an NArray of shape [pixel,frame].
- def get_frames(file,start,frames,dt,opts={})
- opts[:debug] ||= Default[:debug]
- opts[:ffmpeg] ||= Default[:ffmpeg]
- opts[:video_scaler] ||= Default[:video_scaler]
- opts[:video_x_resolution] ||= Default[:video_x_resolution]
- start = Rational(start)
- dt = Rational(dt)
- file = File.expand_path(file)
- pix_fmt = VideoScanner.get_stream_part(file,'v','pix_fmt')[0]['pix_fmt']
- width = VideoScanner.get_stream_part(file,'v','width')[0]['width'].to_i
- height = VideoScanner.get_stream_part(file,'v','height')[0]['height'].to_i
- down_x = opts[:video_x_resolution]
- down_y = down_x*height/width
- dims = [down_x,down_y]
- resize_needed = down_x!=width || down_y!=height
- data = NArray.int(down_x*down_y,frames)
- frames.times do |i|
- opts[:debug] << "Reading frame #{i+1}/#{frames}...\r"
- time = dt*i+start
- FrameCache.put_frame(time,data,[true,i]) do
- cmd = "#{opts[:ffmpeg].inspect} -loglevel quiet -accurate_seek -ss #{time.to_f} -i #{file.inspect} %s -frames:v 1 -f rawvideo - < /dev/null" % (resize_needed ? "-filter:v scale=#{down_x}:#{down_y} -sws_flags #{opts[:video_scaler]}" : '')
- IO.popen(cmd) do |io|
- read_gray_frame(io,pix_fmt,down_x*down_y,data,[true,i])
- end
- end
- end
- return data, dims
- end
- # transposes 2d-array (rectangular)
- def transpose(array)
- return [[]] if array.first.nil?
- i = array.size
- j = array.first.size
- Array.new(j) do |a|
- Array.new(i) do |b|
- array[b][a]
- end
- end
- end
- ##########################
- # Getting video metadata #
- ##########################
- module VideoScanner
- def self.get_mediainfo(file,param)
- info = ''
- IO.popen("mediainfo --Inform=\"#{param}\" \"#{file}\"") do |io|
- info = io.read.chomp.strip
- end
- info.empty? ? nil : info
- end
- def self.s2hms_str(t, joiner=':')
- hms = s2hms(t)
- hms[1] = hms[1].to_s.rjust(2,'0')
- hms[2] = hms[2].to_s.rjust(2,'0')
- hms.shift if hms[0] == 0
- hms.join(joiner)
- end
- def self.s2hms(t)
- h = (t/3600).to_i
- t -= 3600*h
- m = (t/60).to_i
- t -= 60*m
- [h,m,t]
- end
- def self.get_start_time(file)
- res = get_format_part(file,'start_time')
- if res
- return res['start_time'].to_f
- else
- return nil
- end
- end
- def self.get_fps(file)
- file = File.expand_path(file)
- res = get_stream_part(file,'v','avg_frame_rate')
- if res && !res.any?{|x|x['avg_frame_rate'].match(/\/0$/)}
- res.map! do |x|
- Rational(x['avg_frame_rate']).to_f
- end
- return res
- else
- minfo = get_mediainfo(file,'Video;%FrameRate_Nominal%') || get_mediainfo(file,'Video;%FrameRate%')
- if minfo
- return [minfo.to_f].flatten
- else
- return nil
- end
- end
- end
- def self.get_duration(file)
- file = File.expand_path(file)
- res = get_format_part(file,'duration')
- if res
- return res['duration'].to_f
- else
- minfo = get_mediainfo(file,'Video;%Duration%') || get_mediainfo(file,'General;%Duration%')
- if minfo
- return minfo.to_f
- else
- return nil
- end
- end
- end
- def self.get_format_part(file,part)
- get_part(file,'format',part)
- end
- def self.get_stream_part(file,streams,part)
- get_part(file,'streams',part,streams)
- end
- def self.get_part(file,category,part,streams=nil)
- conv = { 'streams' => 'stream', 'format' => 'format'}
- part = [part].flatten
- file = File.expand_path(file)
- res = nil
- cmd = "ffprobe -loglevel quiet -i #{file.inspect} -print_format json -show_entries \"#{conv[category]}=#{part.join(',')}\""
- cmd << " -select_streams #{streams}" if streams
- IO.popen(cmd) do |io|
- input = io.read
- begin
- res = JSON.parse(input)
- rescue StandardError => e
- return nil
- end
- if s = res[category]
- res = res[category]
- if res.class == Hash
- res.delete_if{|key|!part.index(key)}
- elsif res.class == Array
- res.each do |r|
- r.delete_if{|key|!part.index(key)}
- end
- end
- end
- end
- return res unless res.empty?
- end
- def self.get_frame_durations(file, stream_no, show_progress, scale_progress=1.0)
- data = []
- prv_time = nil
- start_time = nil
- progress = 0
- pct_last = -1
- dur = get_duration(file)
- # get data (frame times)
- IO.popen("ffprobe -threads 2 -loglevel quiet -i #{file.inspect} -select_streams v:#{stream_no} -show_entries frame=pkt_pts_time -print_format flat") do |io|
- io.each_line do |line|
- time = line.match(/(\d+\.\d+)/)
- if time
- time = time[1].to_f
- if start_time
- if show_progress
- progress += 1
- pct = (100.0*scale_progress*(time-start_time)/dur).to_i
- if pct > pct_last
- $stderr << "%03d%s\r" % [pct,'%']
- pct_last = pct
- end
- end
- diff = (time-prv_time).abs
- data << [time-start_time,diff]
- else
- start_time = prv_time
- end
- prv_time = time
- end
- end
- end
- return data
- end
- end
- ############################################################
- # Different methods of filtering pixels containing a timer #
- ############################################################
- # per... number of frames after which the timer changes, eg every 10 frames when
- # sampled at 0.1 s/frame
- def check_step_behaviour(pixels,per,opts={})
- opts[:digits_difference_single_pixel] ||= Default[:digits_difference_single_pixel]
- opts[:static_interval_threshold] ||= Default[:static_interval_threshold]
- per_s = per.to_f
- dsp = opts[:digits_difference_single_pixel]
- sid = opts[:static_interval_threshold]**2
- res = pixels.shape[0]
- n = pixels.shape[1]
- mask = NArray.int(res)
- # threshold into two states: static or dynamic
- delta = (pixels[true,0..-2]-pixels[true,1..-1]).abs >= dsp
- idx = NArray.float(n-1).indgen
- # check for static intervals
- res.times do |i|
- times = idx[delta[i,true]]
- if times.size < 2
- mask[i] = 0
- else
- ival = (times[1..-1]-times[0..-2]).abs
- ival.map!{|t| t < 6*per_s ? t%per_s : t}
- mask[i] = (ival**2).sum > (times.size-1)*sid ? 0 : 255
- end
- end
- return mask
- end
- # res...number of pixels, *files...array of files to analyze (same resolution)
- def pixel_difference_std(data,opts={})
- opts[:threshold_pixel_std] ||= Default[:threshold_pixel_std]
- sdev = data.stddev(1)
- threshold_pixels!(sdev,opts[:threshold_pixel_std])
- return sdev.to_i
- end
- # res...number of pixels, *files...array of files to analyze (same resolution)
- def pixel_difference_diff(data,opts={})
- opts[:threshold_pixel_absdiff] ||= Default[:threshold_pixel_absdiff]
- res = data.shape[0]
- n = data.shape[1]
- pix_sdiff = NArray.int(res)
- (1..n-1).to_a.shuffle.each do |i|
- pix_sdiff += (data[true,i]-data[true,i-1]).abs
- end
- # normalize differences and threshold
- pix_sdiff /= (n-1)
- threshold_pixels!(pix_sdiff,opts[:threshold_pixel_absdiff])
- return pix_sdiff.to_i
- end
- def check_periodicity(data,period,opts={})
- opts[:threshold_pixel_perdiff] ||= Default[:threshold_pixel_perdiff]
- res = data.shape[0]
- n = data.shape[1]
- diff_avg = NArray.sint(res)
- # group files for each phase of the cycle
- cycles = n.times.each_slice(period).to_a
- cycles.pop if cycles.last.size < period
- return diff_avg if cycles.empty?
- cycles = transpose(cycles)
- # average difference between all images of one phase
- cycles.each do |idx|
- diff_avg += data[true,idx.shuffle].stddev(1)
- end
- diff_avg /= cycles.size
- # partition into regions compatible with +period+
- threshold_pixels!(diff_avg,opts[:threshold_pixel_perdiff],255,true)
- return diff_avg.to_i
- end
- # period... Number of samples (frames) that constitute one period.
- # Assumes that period is an integer number of frames.
- # To get a sensible output, the number of frames should be at least ~6*period
- def freq_analysis(data_t,period,opts={})
- opts[:period_peak_width] ||= Default[:period_peak_width]
- opts[:peak_height] ||= Default[:peak_height]
- opts[:peak_min_height] ||= Default[:peak_min_height]
- opts[:period_analyze] ||= Default[:period_analyze]
- res = data_t.shape[0]
- n = data_t.shape[1]
- # peak width in frames
- pfw = (opts[:period_peak_width]*period).to_i
- pfw = 1 if pfw < 1
- # shortcuts
- ph = opts[:peak_height]
- pal = (opts[:period_analyze]*n/period).to_i
- pal = n/period-1 if pal>=n/period
- pmn = opts[:peak_min_height]
- # stores thresholded mask
- diff = NArray.int(res)
- # de-trend by subtracting the mean
- data_d = data_t.clone
- res.times do |i|
- data_d[i,true] -= data_d[i,true].mean
- end
- # auto-correlate data to find periodic timer
- data_a = get_linear_autocorrelation(data_d)
- # remove low data
- data_a[data_a<pmn*data_a.max] = 0.0
- # evaluate autocorrelation diagram for desired periodicity
- idx = NArray.int(pal).indgen(period,period)
- res.times do |i|
- pix = data_a[i,true]
- next if pix.max!=pix[0]
- # check peak height, should be higher than the surroundings
- bool = true
- (1..pfw-1).each do |j|
- bool &&= (((pix[0..-1-j]-pix[j..-1])[idx]>0) & ((pix[j..-1]-pix[0..-1-j])[idx-j]>0)).sum == idx.size
- end
- next unless bool && ((ph*pix[0..-1-pfw]-pix[pfw..-1])[idx]>0 && (ph*pix[pfw..-1]-pix[0..-1-pfw])[idx-pfw]>0).sum == idx.size
- # check interval between two peaks, should be lower than both peaks
- bool = true
- idx.each do |j|
- lpeak = pix[j-period]
- mx = (lpeak-pix[j])/period.to_f
- jdx = NArray.float(period-1).indgen(j+1)
- bool &&= (pix[jdx-period] >= lpeak-(jdx-j)*mx).sum==0
- end
- diff[i] = 0xFF if bool
- end
- return diff
- end
- #####################
- # Utility functions #
- #####################
- # (2*3600+30*60+12.312) => 2:30:12.312
- def sec2hhmmssms(t)
- h = t.to_i/3600
- m = (t%3600).to_i/60
- s = ((t%3600)%60).to_i
- ms = ((t%3600%60%1)*1000.0).round
- [h,m,s,ms]
- end
- # Data is an NArray (typecode 1 char) of 0s or +/- 1s (-1==0xFF), signifying a transition into the other state
- def filter_short_states(data,hold,tick)
- return data if data.size < 3
- reg1 = /(0|^)(1{1,#{tick-1}})(0|$)/
- reg2 = /(1|^)(0{1,#{hold-1}})(1|$)/
- str = data.to_a.join
- while str.gsub!(reg1){$1+'0'*$2.size+$3} || str.gsub!(reg2){$1+'1'*$2.size+$3} do; end
- data[0..-1] = NArray.to_na(str.chars.map(&:to_i))
- end
- def smooth_gauss(pixels,width=2)
- return pixels if pixels.size < 2*width
- res = NArray.float(pixels.size)
- n = 2*width+1
- filter = NArray.float(n)
- n.times do |i|
- filter[i] = Math.exp(-2.0*(i-width)**2.0/(width*width))
- end
- filter *= 1.0/filter.sum
- # pixels to the left
- (0..width-1).each do |i|
- f = filter[width-i..n-1]
- f *= 1.0/f.sum
- res[i] = (pixels[0..width+i]*f).sum
- end
- # pixels on the right
- (pixels.size-width..pixels.size-1).each do |i|
- f = filter[0..width+pixels.size-i-1]
- f *= 1.0/f.sum
- res[i] = (pixels[i-width..pixels.size-1]*f).sum
- end
- (width..pixels.size-width-1).each do |i|
- res[i] = (pixels[i-width..i+width]*filter).sum
- end
- return res
- end
- def get_hanning_window(res,n)
- win = NArray.float(res,n)
- n.times do |i|
- win[0,i] = 0.5*(1.0-Math.cos(2.0*Math::PI*i/(n-1.0)))
- (1..res-1).each do |j|
- win[j,i] = win[0,i]
- end
- end
- return win
- end
- # smooth ~size of peak at sample rate
- def get_linear_autocorrelation(data_t, window=nil, smooth=nil)
- n = data_t.shape[0]
- m = data_t.shape[1]
- # pad with zeros
- data_p = NArray.float(n,2*m)
- data_p[true,0..m-1] = data_t
- # window data
- data_p[true,0..m-1] *= window if window
- # forward fft
- data_f = FFTW3.fft(data_p,-1)
- # power spectrum (|z|^2)
- data_f *= data_f.conj
- data_ps = data_f.real
- # inverse fft
- data_i = FFTW3.ifft(data_ps,-1)
- data_i *= data_i.conj
- data_i = data_i.real
- # crop to original range
- data_c = data_i[true,0..m-1]
- # smooth autocorrelation
- if smooth
- n.times do |i|
- data_i[i,true] = smooth_gauss(data_i[i,true],smooth)
- end
- end
- return data_c
- end
- def threshold_pixels!(pixels,thresh,max=0xFF,invert=false)
- if invert
- pixels.map! do |pix|
- pix >= thresh ? 0 : max
- end
- else
- pixels.map! do |pix|
- pix < thresh ? 0 : max
- end
- end
- end
- def fill_rects(pixels,dims,val,rects)
- x = dims[0]
- rects.each do |rect|
- (rect[1]..rect[3]).each do |y|
- pixels[rect[0]+x*y..rect[2]+x*y] = val
- end
- end
- end
- def rects_limit_size(rects,t_a,t_w,t_h)
- rects.delete_if do |rect|
- w = (rect[2]-rect[0]+1)
- h = (rect[3]-rect[1]+1)
- !t_a.include?(w*h) || w<t_w || h<t_h
- end
- end
- def rects_limit_fillratio(rects,pixels,dims,ratio)
- pixels = pixels.reshape(dims[0],dims[1])
- rects.delete_if do |rect|
- reg = pixels[rect[0]..rect[2],rect[1]..rect[3]]
- (reg>0).sum/reg.size.to_f < ratio
- end
- end
- # Get bounding boxes for each connected area.
- def get_rects(regions,dims)
- x = dims[0]
- rects = []
- vals = NArray.int(x*dims[1])
- vals.indgen!(0)
- (1..regions.max).each do |i|
- region = vals[regions.eq(i)]
- next if region.size == 0
- p_x = region%x
- p_y = region/x
- ax,ay = p_x.min,p_y.min
- bx,by = p_x.max,p_y.max
- rects << NArray.to_na([ax,ay,bx,by])
- end
- return rects
- end
- # Group rectangles that are close to each other, controlled by +thresh+.
- # Takes the sum of the squared distance of the top-left and bottom-right corner.
- def group_rects(rects,opts={})
- opts[:bounding_box_difference] ||= Default[:bounding_box_difference]
- groups = []
- bbd = opts[:bounding_box_difference]
- rects.each do |rect|
- # check for near-by rects
- q = groups.find do |g|
- g.index{|r| ((r-rect)**2).sum < bbd}
- end
- if q
- q << rect
- else
- groups << [rect]
- end
- end
- return groups
- end
- def merge_overlapping_rects(rects)
- new = []
- rects.each do |rect|
- # get overlapping rects
- overlap = new.select do |r|
- x = (r[0]>=rect[0]&&r[0]<=rect[2])||(r[2]>=rect[0]&&r[2]<=rect[2])
- y = (r[1]>=rect[1]&&r[1]<=rect[3])||(r[3]>=rect[1]&&r[3]<=rect[3])
- x && y
- end
- if overlap.size == 0
- new << rect
- else
- overlap << rect
- ax = overlap.min{|r,q|r[0]<=>q[0]}[0]
- ay = overlap.min{|r,q|r[1]<=>q[1]}[1]
- bx = overlap.max{|r,q|r[2]<=>q[2]}[2]
- by = overlap.max{|r,q|r[3]<=>q[3]}[3]
- overlap[-1][0] = ax
- overlap[-1][1] = ay
- overlap[-1][2] = bx
- overlap[-1][3] = by
- overlap.each{|r|new.delete(r)}
- new << overlap.last
- end
- end
- return new
- end
- # w>0 dilate
- # w<0 erode
- def resize_rects(rects,w)
- rects.each do |rect|
- rect[0] -= w
- rect[2] += w
- rect[1] -= w
- rect[3] += w
- end
- rects.delete_if do |rect|
- rect[2]-rect[0]<0 || rect[3]-rect[1]<0
- end
- return rects
- end
- # Averages (pixel intensities) of all images grouped together.
- def average_images!(groups)
- groups.map! do |group|
- res = group[0].size
- group.inject(NArray.sint(res)){|img,s|s+img}/group.size
- end
- return groups
- end
- # Takes an NArray of images as input. [img1,img2,...] = NArray(resolution,frames)
- # Mask_t is an Array/NArray of dimension +frames+, 1 if frame is to be considerd, 0 if it is to be discarded. If empty, takes all images.
- # Mask_x is an NArray (type byte) of the same dimension as each of the +images+, if 0 the corresponding pixel in +images+ gets ignored for analysis. Useful when the timer's shape is not quite rectangular.
- # Returns an array, where each entry is an array of similar images. [[img1,img4],[img2,img5],[img3]]
- # All images must be of equal size.
- def group_images(images,thresh,mask_t,mask_x)
- raise ArgumentError, 'mask_x must be of type byte' unless mask_x.typecode == NArray::BYTE
- groups = []
- eff_res = mask_x.sum # effective resolution
- n = images.shape[1]
- # take each digit
- n.times do |i|
- next if mask_t[i]==0
- img = images[true,i]
- # check how much they fit into each group
- cand = groups.select do |group|
- group.inject(0.0){|s,pic|s+((img-pic)[mask_x]**2).sum**0.5} < thresh*eff_res*group.size
- end
- if cand.size == 0
- # doesn't fit, create new group
- groups << [img]
- else
- # add to group where it fits best
- cand.min{|x,y|x[0]<=>y[0]} << img
- end
- end
- return groups
- end
- # Digits are expected to occur with a frequency given by occ.
- def trim_groups!(groups,occ)
- return groups.clear if occ.empty?
- gsizes = groups.map(&:size).sort
- trs = gsizes[[0,gsizes.size-10].max]/2
- groups.delete_if do |group|
- group.size < trs
- end
- # while groups.size > 10 do
- # min = groups.min{|x,y|x[1]<=>y[1]}
- # groups.delete(min)
- # end
- return groups
- end
- ##############################
- # Image processing functions #
- ##############################
- def flood_fill(pixels,i,val,dirs,block=0)
- res = pixels.size
- queue = [i]
- while !queue.empty?
- idx = queue.pop
- next if idx<0 || idx>=res || pixels[idx] == block || pixels[idx] == val
- pixels[idx] = val
- queue.concat((idx+dirs).to_a)
- end
- end
- # shaping element, pixel offsets
- def se_square(dims,width)
- x = dims[0]
- size = 2*width+1
- delta = NArray.int(size,size)
- delta.indgen
- delta.map! do |i|
- (i%size-width)+x*(i/size-width)
- end
- return delta.reshape(size*size)
- end
- # These work on b/w images with 0=black, !0=white
- # Faster when there aren't many white pixels.
- def fast_dilate(pixels,se)
- res = pixels.size
- idx = NArray.int(res)
- idx.indgen!
- idx[pixels.eq(0xFF)].each do |i|
- d = se+i
- pixels[d[(d>=0)&(d<res)]] = 0xFF
- end
- return pixels
- end
- def dilate(pixels,se,thresh=0.0)
- orig = pixels[true]
- res = pixels.size
- idx = NArray.int(res)
- idx.indgen!
- idx[pixels.eq(0)].each do |i|
- d = se+i
- d = d[(d>=0)&(d<res)]
- pixels[i]=0xFF if orig[d].ne(0).sum>d.size*thresh
- end
- return pixels
- end
- def erode(pixels,se,thresh=0.0)
- orig = pixels[true]
- res = pixels.size
- idx = NArray.int(res)
- idx.indgen!
- idx[pixels.ne(0)].each do |i|
- d = se+i
- d = d[(d>=0)&(d<res)]
- pixels[i]=0 if orig[d].eq(0).sum>d.size*thresh
- end
- return pixels
- end
- def close(pixels,se,thresh_1=0.0,thresh_2=thresh_1)
- dilate(pixels,se,thresh_1)
- erode(pixels,se,thresh_2)
- end
- def open(pixels,se,thresh_1=0.0,thresh_2=thresh_1)
- erode(pixels,se,thresh_1)
- dilate(pixels,se,thresh_2)
- end
- # pixels should be int array black-white, ie 0 or 255
- def bw_label(pixels,dims)
- x = dims[0]
- res = dims[0]*dims[1]
- dirs = NArray.to_na([1,-1,x,-x,1+x,1-x,x-1,-x-1])
- val = 1
- idx = NArray.int(res)
- idx.indgen!
- pixels = pixels/255
- mask = idx[pixels>0]
- pixels *= res+1
- mask.each do |i|
- if pixels[i] == res+1
- flood_fill(pixels,i,val,dirs)
- val += 1
- end
- end
- return pixels
- end
- ################
- # Locate Timer #
- ################
- # Analyse a time interval whether it contains a continously ticking timer.
- # Returns an array of all rectangular areas found.
- def analyse_interval(file,start,frames,dt,period,opts={})
- opts[:debug] ||= Default[:debug]
- opts[:location_method] ||= Default[:location_method]
- opts[:dynamicity_method] ||= Default[:dynamicity_method]
- opts[:bounding_box_smooth_width] ||= Default[:bounding_box_smooth_width]
- opts[:bounding_box_area] ||= Default[:bounding_box_area]
- opts[:bounding_box_width] ||= Default[:bounding_box_width]
- opts[:bounding_box_height] ||= Default[:bounding_box_height]
- opts[:bounding_box_fill_ratio] ||= Default[:bounding_box_fill_ratio]
- opts[:threshold_pixel_std] ||= Default[:threshold_pixel_std]
- # read frames from video file or a sequence of images (frames)
- data, dims = get_frames(file,start,frames,dt,opts)
- ################
- # classify data#
- ################
- # not enough changes when dt is small, dynamicty method not applicable
- if dt < Rational(1,1)
- opts = opts.merge(:dynamicity_method => :standard_deviation)
- if dt < Rational(1,3)
- opts = opts.merge(:dynamicity_method => :none)
- else
- opts = opts.merge(:threshold_pixel_std => opts[:threshold_pixel_std]*dt)
- end
- elsif dt%10<1
- opts = opts.merge(:dynamicity_method => :none)
- end
- case opts[:dynamicity_method]
- when :random_comparison
- res1 = pixel_difference_diff(data,opts)
- when :standard_deviation
- res1 = pixel_difference_std(data,opts)
- when :none
- res1 = NArray.int(dims[0]*dims[1])
- res1[true] = 255
- end
- case opts[:location_method]
- when :simple
- res2 = check_periodicity(data,period,opts)
- when :fourier
- if frames/period < 4
- raise ArgumentError, 'not enough samples for method :fourier, frames/period should be >= 4'
- end
- res2 = freq_analysis(data,period,opts)
- when :both
- res2 = freq_analysis(data,period,opts) & check_periodicity(data,period,opts)
- when :steps
- dt_n = Rational(1,1)/(dt%1)
- if dt_n < 3 || (dt%1).lcm(Rational(1,1)) != Rational(1,1)
- raise ArgumentError,'fractional part of time step must be 1/n, n>=3 for method :steps, :fouriersteps'
- end
- res2 = check_step_behaviour(data,dt_n)
- when :fouriersteps
- dt_n = Rational(1,1)/(dt%1)
- if frames/period < 4
- raise ArgumentError, 'not enough samples for method :fourier, frames/period should be >= 4'
- end
- if dt_n < 3 || (dt%1).lcm(Rational(1,1)) != Rational(1,1)
- raise ArgumentError,'fractional part of time step must be 1/n, n>=3 for method :steps, :fouriersteps'
- end
- res2 = freq_analysis(data,period,opts) & check_step_behaviour(data,dt_n)
- end
- res = res1&res2
- ################
- # post-process #
- ################
- post = res.clone
- # save for later
- r_label = bw_label(post.clone,dims)
- r_nofill = get_rects(r_label,dims)
- # filter pixel noise, fill gaps
- se = se_square(dims,opts[:bounding_box_smooth_width])
- case opts[:location_method]
- when :simple
- close(post,se,0.5)
- when :fourier
- close(post,se,0.5)
- erode(post,se,0.5)
- when :both
- close(post,se,0.5)
- when :steps
- close(post,se,0.6)
- when :fouriersteps
- close(post,se,0.6)
- end
- # label connected areas
- post = bw_label(post,dims)
- opts[:debug] << " - got #{post.max} connected area(s)\n"
- # get bounding boxes
- rects = get_rects(post,dims)
- # merge boxes that overlap
- k = rects.size
- rects = merge_overlapping_rects(rects)
- opts[:debug] << " - merged some overlapping timer area(s), #{k} -> #{rects.size}\n" if rects.size<k
- # sometimes morphological operations may connect too much
- # add the original bounding boxes to the image
- rects.concat(r_nofill)
- # sane-ify
- # remove rectangles too small/large
- k = rects.size
- rects_limit_size(rects,opts[:bounding_box_area],opts[:bounding_box_width],opts[:bounding_box_height])
- opts[:debug] << " - eliminated too small/large timer areas, #{k} -> #{rects.size}\n" if rects.size<k
- # remove rectangles that are mostly empty (using the original mask)
- k = rects.size
- rects_limit_fillratio(rects,res,dims,opts[:bounding_box_fill_ratio])
- opts[:debug] << " - eliminated sparsely populated timer areas, #{k} -> #{rects.size}\n" if rects.size<k
- # get rid of those boxes we introduces above
- rects = merge_overlapping_rects(rects)
- # convert mask to binary
- res = res.eq(0xFF)
- res.reshape!(*dims)
- return rects, res
- end
- # +locate_temporal_resolution+ is the interval at which frames are taken/scanned.
- # Returns array of timers [timer1,timer2,...], first entry is the most probable one
- # timer_i = [a_x, a_y, b_x, b_y, digits], a=top-left corner, b=bottom_right_corner
- # digits = [image_1,...] = [NArray(pixels),...]
- def locate_timers(file,opts={})
- opts[:debug] ||= Default[:debug]
- opts[:digits_difference] ||= Default[:digits_difference]
- opts[:timer_frequency_ratio] ||= Default[:timer_frequency_ratio]
- opts[:locate_temporal_resolution] ||= Default[:locate_temporal_resolution]
- opts[:period_seconds] ||= Default[:period_seconds]
- opts[:frames_per_interval] ||= Default[:frames_per_interval]
- opts[:min_different_digits] ||= Default[:min_different_digits]
- psc = Rational(opts[:period_seconds])
- ltr = Rational(opts[:locate_temporal_resolution])
- per = ((psc.lcm(ltr).lcm(Rational(1,1)))/ltr).to_i # period as integer number of frames
- frm = opts[:frames_per_interval]
- d_diff = opts[:digits_difference]
- mdd = opts[:min_different_digits]
- duration = VideoScanner.get_duration(file).to_i
- len = (duration/(frm*ltr)).to_i
- active_intervals = NArray.byte(frm*len) # frames of intervals with an active timer
- # how often each digits appears for a given period, eg per=2, 60 frames, '0' 30 times, '5' 30 times, note that +per+ can be rational
- occ = {}
- len.times do |i|
- frm.times do |j|
- t = i*frm*ltr+ltr*j
- d = (t%10).to_i
- occ[d] ||= 0
- occ[d] += 1
- end
- end
- rects = [] # rectangular region(s) containing the timer(s)
- masks = [] # the original mask of which pixel is compatible with the presence of a timer
- # get possible location for each interval
- len.times do |i|
- opts[:debug] << "Analyzing %0d:%02d:%02d.%03d-%0d:%02d:%02d.%03d...\n" % sec2hhmmssms(i*ltr*frm).concat(sec2hhmmssms((i+1)*ltr*frm))
- r, m = analyse_interval(file,i*frm*ltr,frm,ltr,per,opts)
- if !r.empty?
- # which intervals are containing a timer
- active_intervals[i*frm..(i+1)*frm-1] = 1
- # the original mask of pixels comptabible with the presence of a timer
- masks.push(m)
- end
- rects.concat(r)
- opts[:debug] << "Found #{r.size} candidates.\n\n"
- end
- # groupt rects and take those with the highest frequency
- groups = group_rects(rects,opts)
- k = groups.size
- thresh = len < 18 ? 1 : (len < 36 ? 2 : len/18)
- groups.delete_if{|g|g.size<=thresh}
- opts[:debug] << "Removed some candidates that did not occur in many scanned intervals, #{k} -> #{groups.size}.\n" if groups.size<k
- best = groups.max{|x,y|x.size<=>y.size}
- timers = groups.select{|g|g.size>=best.size*opts[:timer_frequency_ratio]}
- timers.map!{|g|g.inject(&:+).map{|x|x/g.size}.to_a}
- # sorry, no timers found
- return timers if timers.empty? || masks.empty?
- # merge all masks together
- opts[:debug] << "Storing original pixel masks...\n"
- mask = masks.inject(NArray.byte(*masks[0].shape)){|s,m|s|m}
- # save the original mask for each timer region
- timers.each_with_index do |timer|
- timer[5] = mask[timer[0]..timer[2],timer[1]..timer[3]]
- timer[5].flatten!
- end
- # extract images of the timer's digits
- opts[:debug] << "Extracting timer digit images...\n"
- timer_images = get_cropped_frames(file,0,frm*len,ltr,timers,opts)
- # group images into different digits and average
- opts[:debug] << "Grouping timer digit images...\n"
- timer_images.each_with_index do |images,i|
- groups = group_images(images,d_diff,active_intervals,timers[i][5])
- trim_groups!(groups,occ)
- average_images!(groups)
- timers[i][4] = groups
- end
- # delete timers with not enough digits
- k = timers.size
- timers.delete_if{|t|t[4].size < mdd}
- opts[:debug] << "Removed some timers that did not contain enough digits, #{k} -> #{timers.size}."
- return timers
- end
- def pretty_print_timer_regions(timers,io)
- timers.each_with_index do |t,i|
- io << "Timer #{i+1}: (#{t[0]},#{t[1]}), size #{t[2]-t[0]+1}x#{t[3]-t[1]+1}\n"
- end
- end
- #############################
- # Check timers for activity #
- #############################
- # Returns an array +timers+ with info on when the timer starts and stops.
- # timers = [timer_1, timer_2 ,...]
- # timer_i = [events, initial_state, final_state]
- # events = [event_1, event_2, ...]
- # event_i = [time_index, type_of_event]
- # type_of_event, initial_state, final_state: 1 (starts) or 255 (stops)
- # time_index: time in seconds relative to the beginning of the video file
- def scan_timers(file,start,frames,dt,timers,opts={})
- return timers.map{[[]]} if timers.empty? || frames < 3
- opts[:digits_difference] ||= Default[:digits_difference]
- opts[:timer_ticking_hold_threshold] ||= Default[:timer_ticking_hold_threshold]
- opts[:timer_ticking_tick_threshold] ||= Default[:timer_ticking_tick_threshold]
- opts[:timer_ticking_smooth_width] ||= Default[:timer_ticking_smooth_width]
- thresh_tick = opts[:timer_ticking_hold_threshold]
- thresh_hold = opts[:timer_ticking_hold_threshold]
- thresh_digits = opts[:digits_difference]
- smw = opts[:timer_ticking_smooth_width]
- timers = [timers] unless timers[0].is_a?(Array)
- # get video frames
- regs = get_cropped_frames(file,start,frames,dt,timers,opts)
- # analyze video to determine when the timer is ticking
- timer_i = -1
- regs.map! do |r|
- timer_i += 1
- mask = timers[timer_i][5]
- eff_res = mask.sum # effective resolution
- # list of frames where it does look like one of the digits of the timer
- alk = NArray.byte(frames)
- x = NArray.int(frames-1)
- # check at which frames it looks similar to the timer, consider only pixels given my +mask+
- frames.times do |i|
- alk[i] = 1 if timers[timer_i][4].index{|img|((img-r[true,i])[mask]**2).sum<=(eff_res*thresh_digits)**2}
- end
- # threshold into two states: ticking / on-hold, by getting the
- # difference of adjacent pixels, considering only pixels given by +mask+
- (frames-1).times do |i|
- x[i] = ((r[true,i]-r[true,i+1])[mask]**2).sum > (thresh_digits*eff_res)**2 ? 1 : 0
- end
- # time is paused when it is not visible
- x &= alk[0..-2]
- x &= alk[1..-1]
- # remove wild fluctuations
- x = smooth_gauss(x,smw)>0.5
- # remove short-term changes
- filter_short_states(x,thresh_hold,thresh_tick)
- # get transitions between the two states
- y = x[1..-1]-x[0..-2]
- # get initial/final state
- init = x[0] == 0 ? -1 : 1
- fin = x[-1] == 0 ? 1 : -1
- # translate indices into time stamps
- idx = NArray.float(y.size).indgen[y.ne(0)]
- val = y[idx]
- idx.map!{|z|(start+(z+0.5)*dt).round}
- [idx.to_a.zip(val.to_a),init,fin]
- end
- return regs
- end
- # I suggest using an odd/prime (not 5) number of seconds for scan_interval,
- # to get as many different digits as possible.
- # Eg, the seconds timer will always end on 0 or 5 if you choose the interval 5s.
- def timer_ticking(file,timer_regions,opts={})
- return timers.map{[[]]} if timer_regions.empty?
- opts[:debug] ||= Default[:debug]
- opts[:scan_interval_coarse] ||= Default[:scan_interval_coarse]
- opts[:scan_interval_fine] ||= Default[:scan_interval_fine]
- opts[:scan_fine_interval] ||= Default[:scan_fine_interval]
- opts[:timer_ticking_hold_threshold] ||= Default[:timer_ticking_hold_threshold]
- opts[:timer_ticking_tick_threshold] ||= Default[:timer_ticking_tick_threshold]
- thresh_tick = opts[:timer_ticking_tick_threshold]
- thresh_hold = opts[:timer_ticking_hold_threshold]
- dt_coarse = Rational(opts[:scan_interval_coarse])
- dt_fine = Rational(opts[:scan_interval_fine])
- duration = VideoScanner.get_duration(file).to_i
- frames = (duration/dt_coarse).to_i
- opts[:debug] << "Scanning timers for activity...\n"
- timer_events = scan_timers(file,0,frames,dt_coarse,timer_regions,opts)
- timer_events.each_with_index do |t,i|
- opts[:debug] << "Timer #{i}: Found #{t[0].size} event(s).\n"
- end
- # go over all events and temporally locate them at a higher resolution
- if opts[:scan_fine_interval] && dt_fine < dt_coarse
- timer_events.each_with_index do |timer,i|
- # check start at fine resolution
- opts[:debug] << "Checking at the beginning of the video at a higher resolution for the timer #{i}...\n"
- frm = (thresh_tick*dt_coarse*2/dt_fine).to_i
- fine_start = scan_timers(file,0,frm,dt_fine,timer_regions[i])[0]
- if !fine_start[0].empty?
- timer[1] = fine_start[1]
- timer[0] = fine_start[0].concat(timer[0])
- end
- # check end at fine resolution
- opts[:debug] << "Checking at the end of the video at higher a resolution for the timer #{i}...\n"
- t1 = thresh_hold*dt_coarse*2
- frm = (t1/dt_fine).to_i
- fine_start = scan_timers(file,duration-2-t1,frm,dt_fine,timer_regions[i])[0]
- if !fine_start[0].empty?
- timer[2] = fine_start[2]
- timer[0].concat(fine_start[0])
- end
- # check events at finer resolution
- timer[0].each_with_index do |event,j|
- opts[:debug] << "Refining estimate for timer #{i}, event #{j}...\n"
- time = event[0]
- type = event[1]
- delta = type == 1 ? thresh_tick : thresh_hold
- t0 = time - delta*dt_coarse
- t1 = time + delta*dt_coarse
- t0 = 0 if t0 < 0
- t1 = duration if t1 > duration
- frm = ((t1-t0)/dt_fine).to_i
- fine = scan_timers(file,t0,frm,dt_fine,timer_regions[i])[0][0]
- fine.delete_if{|f|f[1]!=type}
- res = fine.inject(0){|s,f|s+f[0]}
- event[0] = (res/fine.size.to_f).round unless fine.empty?
- end
- end
- end
- # remove events that are the same and sort events
- timer_events.each do |timer|
- timer[0].delete_if do |x|
- timer[0].index{|y|!x.equal?(y) && x[1]==y[1] && (x[0]-y[0]).abs < 2*dt_fine}
- end
- # sort by event time, then by event type
- timer[0].sort! do |x,y|
- a = x[0]<=>y[0]
- a == 0 ? x[1]<=>y[1] : a
- end
- end
- # remove timers that are the same
- array_remove_duplicates!(timer_events) {|t1,t2|
- n = t1[0].size
- if t2[0].size == n
- diff = 0
- n.times{|i|diff += t1[0][i][1]==t2[0][i][1] ? (t1[0][i][0]-t2[0][i][0]).abs : 9E99}
- t1.size == t2.size && t1[1]==t2[1] && diff < 2*n*dt_fine
- end
- }
- return timer_events
- end
- def array_remove_duplicates!(arr,&block)
- arr.delete_if do |x|
- arr.select{|y|x.equal?(y) || yield(x,y)}.size>1
- end
- end
- def pretty_print_timer_events(timers,io)
- timers.each_with_index do |timer,i|
- io << "Timer #{i+1}:\n"
- if timer[2] == timer[3] && timer[0].empty?
- # note that this script can't detect the timer if it's paused the entire time
- io << " - keeps #{timer[1] == 1 ? 'ticking' : 'being paused'}\n"
- else
- io << " - starts off #{timer[1] == 1 ? 'ticking' : 'paused'}.\n"
- timer[0].each do |event|
- hmsms = sec2hhmmssms(event[0])
- io << " - #{event[1]==1 ? 'starts' : 'stops'} at %0d:%02d:%02d.%03d\n" % hmsms
- end
- io << " - ends up #{timer[2] == 1 ? 'paused' : 'ticking'}.\n"
- end
- end
- end
- ########################
- # Analyze a video file #
- ########################
- def scan_file(file,opts={})
- opts[:debug] ||= Default[:debug]
- file = File.expand_path(file)
- raise StandardError, 'no such file' unless File.exist?(file) && File.readable?(file)
- t_init = Time.now
- # scan for timers
- timer_regions = locate_timers(file,opts)
- if timer_regions.empty?
- opts[:debug] << "\n\n"
- opts[:debug] << "Failed to find any timers.\n"
- return [],[]
- end
- # debug
- opts[:debug] << "\n\n"
- opts[:debug] << "Timers found:\n"
- pretty_print_timer_regions(timer_regions,opts[:debug])
- opts[:debug] << "\n"
- # scan for timer movement and delete non-moving timers
- timer_events = timer_ticking(file,timer_regions,opts)
- timer_events.delete_if{|t|t[0].empty? && t[1]!=1}
- if !t_ev.find{|t|!t[0].empty? || t[1]==1}
- opts[:debug] << "\n\n"
- opts[:debug] << "Failed to detect any events, giving up.\n"
- return [],[]
- end
- # debug
- opts[:debug] << "\n\n"
- opts[:debug] << "This is what they're doing:\n"
- pretty_print_timer_events(timer_events,opts[:debug])
- opts[:debug] << "\n"
- opts[:debug] << "Scanning took #{(Time.now-t_init).round}s.\n"
- return timer_events, timer_regions
- end
- # merge timers that do the same, eg see dump_000000003.mp4
- # With timestep 10.x, seconds digit won't change much.
- # Method fourier needs some frames, at least ~ 6 * period (number of frames after which digit repeats).
- # Dynamicty does not work well with 0.x timesteps, so it gets disabled.
- def smart_scan_file(file,opts={})
- opts[:debug] ||= Default[:debug]
- opts[:transcode_before_analysis] ||= Default[:transcode_before_analysis]
- file = File.expand_path(file)
- raise StandardError, 'no such file' unless File.exist?(file) && File.readable?(file)
- file = transcode_file(file).to_s if opts[:transcode_before_analysis]
- lookup1 = [
- [0, nil, []],
- [10, Rational(1,5), [:steps]],
- [70, Rational(1,4), [:steps]],
- [5*60, Rational(5,4), [:simple]],
- [10*60, 1, [:fourier]],
- [10*60, Rational(6,5), [:simple]],
- [15*60, 1, [:simple,:fourier]],
- [30*60, 1, [:simple,:fourier]],
- [60*60, 3, [:simple,:fourier]],
- [120*60, Rational(102,10),[:steps]],
- [120*60, 3, [:simple,:fourier]],
- [180*60, 7, [:simple,:fourier]],
- [240*60, 7, [:simple,:fourier]],
- [480*60, 9, [:simple,:fourier]],
- [600*60, 9, [:simple,:fourier]],
- [9E99, 9, [:simple,:fourier]]
- ]
- lookup2 = [
- [0, nil, []],
- [10, Rational(11,10), Rational(21,20)],
- [10*60, Rational(6,5), Rational(11,10)],
- [30*60, Rational(1,1), Rational(4,3)],
- [60*60, Rational(3,1), Rational(5,4)],
- [120*60, Rational(7,1), Rational(5,4)],
- [180*60, Rational(7,1), Rational(5,4)],
- [240*60, Rational(7,1), Rational(5,4)],
- [480*60, Rational(9,1), Rational(5,4)],
- [600*60, Rational(9,1), Rational(5,4)],
- [9E99, Rational(9,1), Rational(5,4)]
- ]
- t_init = Time.now
- t_reg, t_ev = [], []
- duration = VideoScanner.get_duration(file)
- fps = VideoScanner.get_fps(file)[0].to_f
- # locate timers
- opts[:debug] << "Analyzing <#{file}>, " + "duration %0d:%02d:%02d.%03d...\n" % sec2hhmmssms(duration)
- opts[:debug] << "\n"
- opts[:debug] << "Scanning for timers...\n"
- idx = lookup1.index{|d|duration < d[0]}
- idx = lookup1.rindex{|d|lookup1[idx][0] == d[0]}
- jdx = 0
- while idx > 0
- opts[:debug] << "\n"
- opts[:debug] << "Trying timestep #{lookup1[idx][1].to_f}, method #{lookup1[idx][2][jdx]}...\n"
- opts = opts.merge(:locate_temporal_resolution => lookup1[idx][1], :location_method => lookup1[idx][2][jdx])
- t_reg = locate_timers(file,opts)
- if t_reg.empty?
- opts[:debug] << 'Retrying...'
- jdx += 1
- if jdx >= lookup1[idx][2].size
- idx -= 1
- jdx = 0
- end
- else
- break
- end
- end
- # check for success
- if t_reg.empty?
- opts[:debug] << "\n\n"
- opts[:debug] << "Failed to locate any timers, giving up.\n"
- return [],[]
- end
- # debug
- opts[:debug] << "\n\n"
- opts[:debug] << "Timer(s) found:\n"
- pretty_print_timer_regions(t_reg,opts[:debug])
- opts[:debug] << "\n"
- # get events
- opts[:debug] << "\n"
- opts[:debug] << "Analyzing timer(s) for event(s)...\n"
- idx = lookup2.index{|d|duration < d[0]}
- idx = lookup2.rindex{|d|lookup2[idx][0] == d[0]}
- while idx > 0
- opts[:debug] << "\n"
- opts[:debug] << "Trying coarse timestep #{lookup2[idx][1].to_f}, fine #{lookup2[idx][2].to_f}...\n"
- opts = opts.merge(:scan_interval_coarse => lookup2[idx][1], :scan_interval_fine => lookup2[idx][2])
- t_ev = timer_ticking(file,t_reg,opts)
- if !t_ev.find{|t|!t[0].empty? || t[1]==1}
- opts[:debug] << "Retrying...\n"
- idx -= 1
- else
- break
- end
- end
- # remove timer that are being paused the entire time (shouldn't be possible to detect, though)
- t_ev.delete_if{|t|t[0].empty? && t[1]!=1}
- # check for success
- if !t_ev.find{|t|!t[0].empty? || t[1]==1}
- opts[:debug] << "\n\n"
- opts[:debug] << "Failed to detect any events, giving up.\n"
- return [],[]
- end
- # debug
- opts[:debug] << "\n\n"
- opts[:debug] << "This is what they're doing:\n"
- pretty_print_timer_events(t_ev,opts[:debug])
- # report run time
- opts[:debug] << "\n"
- t_elap = Time.now-t_init
- opts[:debug] << "Scanning took #{t_elap.round}s, #{(duration*fps/t_elap.to_f).round} fps.\n"
- return t_ev, t_reg
- end
- #################
- # CLI interface #
- #################
- if caller.empty?
- if ARGV.empty?
- $stderr << "usage: ruby #{__FILE__} <input-video-file>\n"
- exit(1)
- end
- ARGV.each do |file|
- begin
- events, regions = smart_scan_file(file, :debug => $stderr)
- rescue StandardError=>e
- $stderr << "error scanning #{file}\n"
- $stderr << "#{e.class}, #{e.message}, #{e.backtrace}\n"
- end
- $stderr << "\n########################################\n"
- $stderr << "########################################\n\n"
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement