Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use HTML::Tree;
- sub rutracker_parse_forum
- {
- $filename_input = "$html_forum_path/f-399.html";
- $filename_output = ">$forum_path/f-399.html";
- open file, $filename_output;
- $tree = HTML::TreeBuilder->new;
- $tree->parse_file($filename_input);
- $nav = $tree->look_down('_tag' => 'td', 'class' => 'nav');
- $type = ($nav->find('a'))[2]->as_text;
- println $type;
- @trs = ($tree->look_down('_tag' => 'table', 'class' => 'forumline forum'))[0]->find('tr');
- foreach $tr (@trs)
- {
- @tds = $tr->find('td');
- if (scalar @tds < 2) {
- next;
- }
- $link = $tds[1]->find('a');
- $link_title = $link->as_text;
- $href = $link->attr_get_i('href');
- $id = "([0-9]+)";
- if ($href =~ /^t-$id.html$/) {
- $href = $1;
- }
- else {
- printc red, "Topic link '$href' is wrong";
- }
- $size = '?';
- if ($filename !~ /^f-396/ ) {
- $size = trim $tds[2]->as_text;
- }
- $size = $size == '' ? '-' : $size;
- $date_text = defined $tds[4] ? trim($tds[4]->find('p')->as_text) : "date error";
- # printlnc gray, $link_title;
- # printlnc blue, $href;
- # printlnc lime, $size;
- # printlnc cyan, $date_text;
- # println;
- print file $href."\n".$link_title."\n".$size."\n".$date_text."\n\n";
- }
- close file;
- }
- 1;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement