Share Pastebin
Guest
Public paste!

Plutor

By: a guest | Aug 19th, 2007 | Syntax: Perl | Size: 2.35 KB | Hits: 68 | Expires: Never
Copy text to clipboard
  1. #!/usr/bin/perl
  2.  
  3. # How I used this:
  4. # I got all of the MetaTalk archives at <http://metatalk.metafilter.com/archive.mefi>
  5. # and saved all of those pages.  Then, I ran this script with all of the filenames as
  6. # arguments.  I posted the results here: <http://metatalk.metafilter.com/14744/Safety-first>
  7.  
  8. use POSIX qw(strftime);
  9.  
  10. my %mval = (    January     => 0,
  11.                 February    => 1,
  12.                 March       => 2,
  13.                 April       => 3,
  14.                 May         => 4,
  15.                 June        => 5,
  16.                 July        => 6,
  17.                 August      => 7,
  18.                 September   => 8,
  19.                 October     => 9,
  20.                 November    => 10,
  21.                 December    => 11 );
  22.  
  23. my @posts;
  24.  
  25. for my $filename (@ARGV) {
  26.     if (open(F, "<", $filename)) {
  27.         local $/;
  28.         my $f = <F>;
  29.         close(F);
  30.  
  31.         my ($month, $year) = ($f =~ /<p class="copy">(\w+) (200\d) Archives \(<a href="\/archive\.mefi\">all archives<\/a>\)<\/p>/);
  32.         $year -= 1900;
  33.  
  34.         my ($day, $hour, $min);
  35.         while ($f =~ /<div class="monthday">(\w+) (\d+)<\/div>|<span class="smallcopy">posted by <a[^>]*>[^<]*<\/a> to <a[^>]*>[^<]*<\/a> at (\d+):(\d+) ([AP]M) PST - <a href="([^"]*)"/g) {
  36.             if ($1 and $2) {
  37.                 $day = $2;
  38.             } elsif ($3 and $4 and $5) {
  39.                 $hour = $3;
  40.                 $min = $4;
  41.                 $hour = 0 if ($hour == 12);
  42.                 $hour += 12 if ($5 eq "PM");
  43.  
  44.                 $url = $6;
  45.                 ($name) = ($url =~ /\/mefi\/(\d+)/);
  46.  
  47.                 my $u = strftime('%s', 0, $min, $hour, $day, $mval{$month}, $year);
  48.                 #print "$year-$mval{$month}-$day $hour:$min:00 $u $url\n";
  49.                 push @posts, [ $u, $url, $name ];
  50.             }
  51.         }
  52.     } else {
  53.         warn "Could not open $filename: $!";
  54.     }
  55. }
  56.  
  57. @posts = sort { $b->[0] <=> $a->[0] } @posts;
  58. my $lastpost = shift @posts;
  59.  
  60. for my $p (@posts) {
  61.     my $diff = abs( $lastpost->[0] - $p->[0] );
  62.     if ($diff >= 24*60*60) {
  63.         print "<a href=\"$lastpost->[1]\">$lastpost->[2]</a> ";
  64.         print "and <a href=\"$p->[1]\">$p->[2]</a> - ";
  65.         printf('%d:%02d', int($diff/3600), int($diff/60)%60);
  66.         print " apart on " . strftime('%d %b %Y', localtime($p->[0])) . "\n";
  67.     }
  68.  
  69.     $lastpost = $p;
  70. }