Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/local/bin/perl
- # Author: Dawid Mocek
- # PP Projekt
- # For educational purpose only
- # All rights reserved
- use strict;
- use warnings;
- use IO::File;
- use File::Path qw( make_path );
- use LWP::Simple;
- use LWP::Parallel;
- use LWP::Parallel::UserAgent qw(:CALLBACK);
- use HTTP::Request;
- use Data::Dumper;
- use Config::IniFiles;
- use DBI;
- sub newdir {
- my $dir = $_[0];
- if( ! -d $dir) {
- make_path($dir, {verbose => 1}) or die("Failed to create directory: " . $dir);
- }
- }
- sub mynewdir {
- my $dir = $_[0];
- if( ! -d $dir) {
- print "mkdir: " , $dir , "\n";
- mkdir($dir);
- }
- }
- sub prepare_http_req {
- my $req = HTTP::Request->new('GET' => $_[0]);
- $req->header('User-Agent' => $_[1]);
- # 'Accept-Encoding' => 'gzip, deflate');
- return $req;
- }
- # sub handle_http_resp {
- # my($content, $response, $proto, $entry) = @_;
- # print "Handling answer from " . $response->request->url . "\n";
- # if(length($content)) {
- # $response->add_content($content);
- # }
- # else {
- #
- # return C_ENDCON;
- # }
- #
- #}
- sub load_ini {
- my $inifile = $_[0];
- unless(-e $inifile) {
- print("File: $inifile does not exists\n");
- exit;
- }
- my $cfg = Config::IniFiles->new(-file => $inifile, -fallback => "General");
- return $cfg;
- }
- ### Config ###
- my $ini_file = './config.ini';
- my $cfg = load_ini($ini_file);
- ### Database ###
- my $dbh;
- my $sth;
- my $rows_cnt;
- my $row;
- $dbh = DBI->connect('DBI:mysql:database=' . $cfg->val('db', 'name') . ';host=' . $cfg->val('db', 'host'), $cfg->val('db', 'user'), $cfg->val('db', 'pass'), {mysql_auto_reconnect => 1, mysql_enable_utf8 => 1});
- die "Connection error: " . DBI->errstr unless $dbh;
- ### LWP Parallel ###
- my @requests;
- my $req;
- my $res;
- my $pua = LWP::Parallel::UserAgent->new;
- $pua->timeout($cfg->val('ua', 'timeout'));
- $pua->agent($cfg->val('ua', 'agent'));
- $pua->max_req($cfg->val('ua', 'parallel'));
- $sth = $dbh->prepare('SELECT href, ds_id FROM `' . $cfg->val('db', 'tb_href') . '` ORDER BY id ASC');
- $sth->execute();
- while(my @row = $sth->fetchrow_array) {
- my $url = $row[0];
- my $ds_id = $row[1];
- my $path = $cfg->val('prop', 'output_dir') . '/' . $ds_id;
- my $html_file = $path . '/' . $ds_id . '.html';
- newdir($path);
- print 'Registering: ' . $url . "\n";
- $pua->register(prepare_http_req($url, $cfg->val('ua', 'agent')), $html_file);
- }
- $sth->finish();
- $dbh->disconnect();
- my $entries = $pua->wait();
- foreach(keys %$entries) {
- $res = $entries->{$_}->response;
- print $res->request->url, " ", $res->message, "\n";
- }
Add Comment
Please, Sign In to add comment