Advertisement
m4ly

DS_HTML_STEPS_PARSER.pl

Oct 8th, 2015
179
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.74 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. # Author: Dawid Mocek
  4. # PP Projekt
  5. # For educational purpose only
  6. # All right reserved
  7.  
  8. # DS html parser - steps finder
  9.  
  10. use strict;
  11. use warnings;
  12.  
  13. use Data::Dumper;
  14. use HTML::TreeBuilder::XPath;
  15. use XML::Writer;
  16. use DBI;
  17. use Config::IniFiles;
  18.  
  19. sub trim {
  20.  $_[0] =~ s/^\s+|\s+$/g;
  21. }
  22.  
  23. sub load_ini {
  24.  
  25.     my $inifile = $_[0];
  26.    
  27.     unless(-e $inifile) {
  28.          print("File: $inifile does not exists\n");
  29.          exit;
  30.          }
  31.  
  32.     my $cfg =  Config::IniFiles->new(-file => $inifile, -fallback => "General");
  33.     return $cfg;
  34. }
  35.  
  36.  
  37. my $ini_file = './config.ini';
  38. my $cfg = load_ini($ini_file);
  39.  
  40.  
  41.  
  42. my $steps_xpath = '/html/body//div[@class="step-description"]/h4[@class="step-title"]';
  43.  
  44. ### Database ###
  45. my $dbh;
  46. my $sth;
  47. $dbh = DBI->connect('DBI:mysql:database=' . $cfg->val('db', 'name') . ';host=' . $cfg->val('db', 'host'), $cfg->val('db', 'user'), $cfg->val('db', 'pass'), {mysql_auto_reconnect => 1, mysql_enable_utf8 => 1});
  48. $sth = $dbh->prepare('UPDATE `' . $cfg->val('db', 'tb_href') . '` SET steps = ? WHERE ds_id = ?');
  49.  
  50. $dbh->begin_work();
  51.  
  52. my $html_file = $cfg->val('prop', 'html_files');
  53.  
  54. open my $html_fh, $html_file or die "Coulnd not open $html_file: $!";
  55.  
  56. while(my $line = <$html_fh>) {
  57.     $line =~ s/^\s+|\s+$//g;
  58.     $line =~ /\/share\/przepisy\/(?<ds_id>\d+)\//m;
  59.     my $ds_id = $+{ds_id};
  60.  
  61.     ### HTML Parser ###
  62.     my $tree = HTML::TreeBuilder::XPath->new(ignore_unknown => 0);
  63.     $tree->parse_file($line);
  64.  
  65.     my $step = $tree->findvalue($steps_xpath);
  66.  
  67.     if($step =~ /Krok\s1\s\/\s(?<cnt>\d+)/m ) {
  68.     my $cnt = $+{cnt};
  69.     $sth->execute($cnt, $ds_id);
  70.     }
  71.     $tree->delete;
  72.  
  73. }
  74.  
  75. close $html_fh;
  76. $dbh->commit();
  77. $sth->finish();
  78. $dbh->disconnect();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement