SHARE
TWEET

identiglyph.pl

a guest Oct 3rd, 2010 391 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env perl
  2.  
  3. use bytes;
  4. use warnings;
  5. use strict;
  6. use POSIX qw(ceil);
  7. use Math::FFT;
  8. use constant { FFTLEN => 2048, SAMPRATE => 44100 };
  9. use constant BINHZ => SAMPRATE / FFTLEN;
  10. use constant BUFZERO => ((0.0) x FFTLEN);
  11.  
  12. my @samp;
  13. my @fftbuf;
  14. my @symbols;
  15. my $matrix = gen_matrix();
  16. my ($prevsymstart, $numsym) = (-1, 0);
  17. my %symstats;
  18.  
  19. @ARGV == 1 or print "usage: $0 filename.wav\n" and exit 1;
  20.  
  21. open IN, "-|:raw", "sox $ARGV[0] -t raw -" or die("error invoking sox: $!");
  22. push @samp, unpack("s*", $_) while sysread(IN, $_, 1024);
  23. close IN;
  24. print "Read ${\ scalar(@samp)} samples\n";
  25. rss_used();
  26. find_symbols();
  27. rss_used();
  28. print "Processed $numsym symbols\n";
  29. print "Symbol statistics:\n";
  30. print "  $_: $symstats{$_} times\n" for (sort { length($a) <=> length($b) || $a cmp $b } keys %symstats);
  31. print "\n";
  32.  
  33. open OUT, ">", "identiglyph.out" or die("error opening identiglyph.out: $!");
  34. print OUT join('', @symbols);
  35. close OUT;
  36. exit 0;
  37.  
  38. sub find_symbols
  39. {
  40.   use constant { THRESH => ceil(0.10 * 32768), MINRUNLEN => 40 };
  41.   my ($inrun, $runlen) = (0, 0);
  42.  
  43.   for (my $i = 0; $i < @samp; $i++) {
  44.     if (abs($samp[$i]) < THRESH) {
  45.       if (!$inrun) {
  46.         $inrun = $runlen = 1;   # begin a run of zeros
  47.       } else {
  48.         $runlen++;              # continue a run
  49.       }
  50.     } else {
  51.       if ($inrun) {             # end of run -- process if long enough
  52.         do_symbol($i, $runlen) if($runlen >= MINRUNLEN);
  53.         $inrun = 0;
  54.       }
  55.     }
  56.   }
  57.   # file probably ended with a run of zeros, so get the last symbol
  58.   do_symbol($#samp+1, $runlen) if($inrun && $runlen >= MINRUNLEN);
  59. }
  60.  
  61. sub do_symbol
  62. {
  63.   # we're called when at index $i in @samp and we've just seen the end of a $runlen run of zeros
  64.   my ($i, $runlen) = @_;
  65.  
  66.   # assume file starts with a run of zeros
  67.   $prevsymstart = $i, return if ($prevsymstart == -1);
  68.  
  69.   my ($symstart, $symend) = ($prevsymstart, $i - $runlen - 1);
  70.   my $len = $symend - $symstart + 1;
  71.  
  72.   # open(OUT, "|-:raw", "sox -t raw -r 44100 -s -w -c 1 - symbol_wavs/symbol_" . sprintf("%08d", $symstart) . ".wav") or die("error invoking sox: $!");
  73.   # print OUT pack("s*", @samp[$symstart .. $symend]);
  74.   # close(OUT);
  75.  
  76.   my $bufstart = ceil((FFTLEN - $len)/2);
  77.   $numsym++;
  78.   $prevsymstart = $i;
  79.   print "Warning: symbol start=$symstart end=$symend len=$len probably incorrect, skipping.\n", return if ($len > 750 || $len < 700);
  80.  
  81.   # FFT buffer is wider than the sample, so zero it out and then copy the sample into the center,
  82.   # converting from signed integers to floating point along the way
  83.   @fftbuf = BUFZERO;
  84.   # $fftbuf[$bufstart + $_] = $samp[$symstart + $_] / 32768.0 for (0..$len-1);  # faster?
  85.   @fftbuf[$bufstart..$bufstart+$len-1] = map { $_ / 32768.0 } @samp[$symstart..$symend];
  86.   my $fft = Math::FFT->new(\@fftbuf);
  87.   my $spectrum = $fft->spctrm(window => "hamm");
  88.   # print "symbol $numsym:\n";
  89.   # printf "           DC: %8f\n", $$spectrum[0];
  90.   # my $h = BINHZ / 2;
  91.   # printf("  %8.2f Hz: %8f\n", $h, $$spectrum[$_]), $h+=BINHZ for (1..$#$spectrum);
  92.   print "Symbol $numsym top bins: ";  
  93.   # printf "%.2f Hz, power=%8f  ", (($_-1) * BINHZ + BINHZ/2), $$spectrum[$_] for (sort { $$spectrum[$b] <=> $$spectrum[$a] } (0..$#$spectrum))[0..1];
  94.   # my @topbins = sort { $a <=> $b } (sort { $$spectrum[$b] <=> $$spectrum[$a] } (0..$#$spectrum))[0..2];
  95.  
  96.   my @bins = ((sort { $$spectrum[$b] <=> $$spectrum[$a] } (28..42))[0], (sort { $$spectrum[$b] <=> $$spectrum[$a] } (50..65))[0]);
  97.   printf "%d/%.2f Hz  ", $_, (($_-1) * BINHZ + BINHZ/2) for @bins;
  98.   $symstats{$_}++ for @bins;
  99.   $symstats{join("x", @bins)}++;
  100.   push @symbols, $matrix->{$bins[0]}{$bins[1]};
  101.   print "\n";
  102. }
  103.  
  104. sub gen_matrix
  105. {
  106.   my %matrix;
  107.   my $i = 1;
  108.  
  109.   # these are bin numbers for N=2048, they need scaling if N changes
  110.   # corresponding bin frequencies are (635, 700, 764, 851) and (1109, 1217, 1346) in Hz
  111.   # actual measured frequencies are   (636, 709, 785, 862) and (1109, 1228, 1357) in Hz
  112.   for my $r (30, 33, 36) {
  113.     for my $c (52, 57, 63) {
  114.       $matrix{$r}{$c} = $i++;
  115.     }
  116.   }
  117.   $matrix{40}{57} = 0;
  118.  
  119.   return \%matrix;
  120. }
  121.  
  122. sub rss_used
  123. {
  124.   open(STAT, "<", "/proc/$$/stat") and print STDERR sprintf("Resident memory: %.2f MB\n", (((split(/ /, <STAT>))[23]) * 4096 / 1024 / 1024)) and close(STAT);
  125. }
RAW Paste Data
Top