Advertisement
Guest User

Untitled

a guest
Jun 26th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.24 KB | None | 0 0
  1. use HTML::Tree;
  2.  
  3. sub rutracker_parse_forum
  4. {
  5.     $filename_input = "$html_forum_path/f-399.html";
  6.     $filename_output = ">$forum_path/f-399.html";
  7.     open file, $filename_output;
  8.    
  9.     $tree = HTML::TreeBuilder->new;
  10.     $tree->parse_file($filename_input);
  11.    
  12.     $nav = $tree->look_down('_tag' => 'td', 'class' => 'nav');
  13.     $type = ($nav->find('a'))[2]->as_text;
  14.     println $type;
  15.    
  16.     @trs = ($tree->look_down('_tag' => 'table', 'class' => 'forumline forum'))[0]->find('tr');
  17.     foreach $tr (@trs)
  18.     {
  19.         @tds = $tr->find('td');
  20.        
  21.         if (scalar @tds < 2) {
  22.             next;
  23.         }
  24.         $link = $tds[1]->find('a');
  25.         $link_title = $link->as_text;
  26.         $href = $link->attr_get_i('href');
  27.        
  28.         $id = "([0-9]+)";
  29.         if ($href =~ /^t-$id.html$/) {
  30.             $href = $1;
  31.         }
  32.         else {
  33.             printc red, "Topic link '$href' is wrong";
  34.         }
  35.        
  36.         $size = '?';
  37.         if ($filename !~ /^f-396/ ) {
  38.             $size = trim $tds[2]->as_text;
  39.         }
  40.         $size = $size == '' ? '-' : $size;
  41.         $date_text = defined $tds[4] ? trim($tds[4]->find('p')->as_text) : "date error";
  42.        
  43.         # printlnc gray, $link_title;
  44.         # printlnc blue, $href;
  45.         # printlnc lime, $size;
  46.         # printlnc cyan, $date_text;
  47.         # println;
  48.        
  49.         print file $href."\n".$link_title."\n".$size."\n".$date_text."\n\n";
  50.     }
  51.     close file;
  52. }
  53.  
  54. 1;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement