Guest User

Untitled

a guest
Sep 5th, 2016
273
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.68 KB | None | 0 0
  1. #!/usr/bin/perl
  2. use utf8;
  3. use LWP::UserAgent;
  4. use LWP::Debug qw(+);
  5. use JSON qw( decode_json );
  6.  
  7. use HTTP::Cookies;
  8. use Encode;
  9. use strict;
  10.  
  11. my $board_uri="https://2ch.hk/wr/catalog.json";
  12. my $threads_base_uri="https://2ch.hk/wr/res/";
  13.  
  14. my $ua = LWP::UserAgent->new;
  15. $ua->proxy([qw(http https)] => "socks://127.0.0.1:9050");
  16. $ua->requests_redirectable(undef);
  17. $ua->agent('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0');
  18. my $cookie_jar = HTTP::Cookies->new;
  19. #$cookie_jar -> set_cookie('','cf_clearance','replace',"/","2ch.hk");
  20. #$cookie_jar -> set_cookie('','__cfduid','replace',"/","2ch.hk");
  21. $ua -> cookie_jar($cookie_jar);
  22.  
  23.  
  24. #print $ua->get($board_uri)->decoded_content;
  25.  
  26. my $board = decode_json(encode('UTF-8', $ua->get($board_uri)->decoded_content));
  27.  
  28. for(my $i=0;$i<=$#{$board->{'threads'}};$i++)
  29. {
  30. if ($board->{'threads'}[$i]->{'posts_count'} < 100) { next }
  31. my $data = $ua->get($threads_base_uri.$board->{'threads'}[$i]->{'num'}.'.json')->decoded_content;
  32. if (!($data =~ m/^{/)) { next };
  33. my $thread = decode_json(encode('UTF-8', $data));
  34. my $all='';
  35. for(my $j=0;$j<=$#{$thread->{'threads'}[0]->{'posts'}};$j++)
  36. {
  37. my $post=$thread->{'threads'}[0]->{'posts'}[$j]->{'comment'};
  38. $post =~ s/<a [^>]+>[^<]+<[^>]+>/ /g;
  39. $post =~ s/<br>/. /g;
  40. $post =~ s/<[^>]+>/ /g;
  41. $post =~ s/>>\d+/ /g;
  42. $post =~ s/(&#[a-f0-9]+;|&[^;]{1,6};)/ /g;
  43. $all .= $post.". ";
  44. }
  45. $all =~ s/ +/ /g;
  46. $all =~ s/[\.?!][\.?! ]+/. /g;
  47. $all =~ s/[\.?! ][\.?!]/. /g;
  48. my $number = $board->{'threads'}[$i]->{'num'};
  49. $number =~ s/[^0-9]//g;
  50. print "$number\n";
  51. print lc(decode_utf8($all));
  52. open(F,"> /tmp/data/$number");
  53. print F $all;
  54. close(F);
  55. }
Advertisement
Add Comment
Please, Sign In to add comment