Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- # Author: Dawid Mocek
- # PP Projekt
- # For educational purpose only
- # All rights reserved
- # DS parallel recipes downloader
- use strict;
- use warnings;
- use LWP::Simple;
- use LWP::Parallel;
- use Data::Dumper;
- use HTML::TreeBuilder::XPath;
- use XML::Writer;
- use DBI;
- use Config::IniFiles;
- sub load_ini {
- my $inifile = $_[0];
- unless(-e $inifile) {
- print("File: $inifile does not exists\n");
- exit;
- }
- my $cfg = Config::IniFiles->new(-file => $inifile, -fallback => "General");
- return $cfg;
- }
- my $ini_file = './config.ini';
- my $cfg = load_ini($ini_file);
- my $url = $cfg->val('ds', 'url') . "/przepisy/sortowanie/2/strona";
- my $grids_xpath = '/html/body/div[@id="Wrapper"]/div[@id="Container"]/div[@id="Content"]/div[@class="main-wrapper"]/div[@class="content-wrapper with-sidebar"]/section[@class="content"]/div[@class="grid-wrapper"]/div[@class="grid-item"]/div[@class="grid-title"]//a';
- ### LWP ###
- my $ua = LWP::UserAgent->new;
- $ua->timeout($cfg->val('ua', 'timeout'));
- $ua->agent($cfg->val('ua', 'agent'));
- ### HTML Parser ###
- my $tree = HTML::TreeBuilder::XPath->new(ignore_unknown => 0);
- ### Database ###
- my $dbh;
- $dbh = DBI->connect('DBI:mysql:database=' . $cfg->val('db', 'name') . ';host=' . $cfg->val('db', 'host'), $cfg->val('db', 'user'), $cfg->val('db', 'pass'), {mysql_auto_reconnect => 1, mysql_enable_utf8 => 1});
- my $stmt_href = $dbh->prepare('INSERT INTO `doradcasmaku_recipies_links`(title, href) VALUES(?, ?)');
- binmode STDOUT, ':encoding(UTF-8)';
- foreach my $i(1..1000) {
- my $absolute_url = $url . "/" . $i;
- my $response = $ua->get($absolute_url);
- print "Fetching: ". $absolute_url;
- if($response->is_success) {
- print " " . $response->code . "\n";
- $tree->parse($response->as_string);
- my @grids = $tree->findnodes($grids_xpath);
- foreach my $grid(@grids) {
- $stmt_href->execute($grid->attr('title'), $grid->attr('href'));
- }
- }
- else {
- print " " . $response->code . "\n";
- }
- }
- $dbh->disconnect();
Add Comment
Please, Sign In to add comment