Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- # Author: Dawid Mocek
- # PP Projekt
- # For educational purpose only
- # All right reserved
- # DS html parser - steps finder
- use strict;
- use warnings;
- use Data::Dumper;
- use HTML::TreeBuilder::XPath;
- use XML::Writer;
- use DBI;
- use Config::IniFiles;
- sub trim {
- $_[0] =~ s/^\s+|\s+$/g;
- }
- sub load_ini {
- my $inifile = $_[0];
- unless(-e $inifile) {
- print("File: $inifile does not exists\n");
- exit;
- }
- my $cfg = Config::IniFiles->new(-file => $inifile, -fallback => "General");
- return $cfg;
- }
- my $ini_file = './config.ini';
- my $cfg = load_ini($ini_file);
- my $steps_xpath = '/html/body//div[@class="step-description"]/h4[@class="step-title"]';
- ### Database ###
- my $dbh;
- my $sth;
- $dbh = DBI->connect('DBI:mysql:database=' . $cfg->val('db', 'name') . ';host=' . $cfg->val('db', 'host'), $cfg->val('db', 'user'), $cfg->val('db', 'pass'), {mysql_auto_reconnect => 1, mysql_enable_utf8 => 1});
- $sth = $dbh->prepare('UPDATE `' . $cfg->val('db', 'tb_href') . '` SET steps = ? WHERE ds_id = ?');
- $dbh->begin_work();
- my $html_file = $cfg->val('prop', 'html_files');
- open my $html_fh, $html_file or die "Coulnd not open $html_file: $!";
- while(my $line = <$html_fh>) {
- $line =~ s/^\s+|\s+$//g;
- $line =~ /\/share\/przepisy\/(?<ds_id>\d+)\//m;
- my $ds_id = $+{ds_id};
- ### HTML Parser ###
- my $tree = HTML::TreeBuilder::XPath->new(ignore_unknown => 0);
- $tree->parse_file($line);
- my $step = $tree->findvalue($steps_xpath);
- if($step =~ /Krok\s1\s\/\s(?<cnt>\d+)/m ) {
- my $cnt = $+{cnt};
- $sth->execute($cnt, $ds_id);
- }
- $tree->delete;
- }
- close $html_fh;
- $dbh->commit();
- $sth->finish();
- $dbh->disconnect();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement