Advertisement
mikelieman

Voting File Parser

Feb 24th, 2016
288
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 2.67 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. use v5.12;
  4. use strict;
  5. use warnings;
  6.  
  7. use DBI;
  8. use File::Slurp;
  9. use Text::CSV_XS;
  10. use Tie::File;
  11. use Data::Dumper;
  12.  
  13. #  Start logging
  14. use Log::Log4perl qw(:easy);
  15. binmode( STDOUT, ":utf8" );
  16. use Log::Log4perl::CommandLine
  17.     qw(:all :logconfig logging.cfg);
  18.  
  19. my $time_start = time;
  20. INFO qq{Begin Program: $time_start};
  21. DEBUG("Debug flag turned on");
  22. TRACE("Trace flag turned on");
  23.  
  24. my $commit_frequency = 500000;
  25. my $memory_limit = q{0};
  26.  
  27. #my $data_file = q{ShortVoters.txt};
  28. my $data_file = q{AllVoters_01122016.txt};
  29.  
  30. # grant all on tmp_voters.* to testuser identified by 'testpassword';
  31. my $database = q{tmp_voters};
  32. my $hostname = q{localhost};
  33. my $port     = q{3306};
  34. my $user = q{testuser};
  35. my $password = q{testpassword};
  36.  
  37. ###  Get Column Names
  38. my @column_names = read_file("record.cfg", chomp => 1) or LOGDIE;
  39.  
  40. ###  Setup Database
  41. my $dsn = qq{DBI:mysql:database=$database;host=$hostname;port=$port};
  42. my $dbh = DBI->connect($dsn, $user, $password);
  43. $dbh->{AutoCommit} = 0;  # enable transactions, if possible
  44. $dbh->{RaiseError} = 1;
  45.  
  46. my $create_stmt = q{create or replace table voters (} . join(q{ TEXT, }, @column_names) . q{ TEXT);};
  47. INFO $create_stmt;
  48. $dbh->do($create_stmt) or LOGDIE;
  49.  
  50. my $insert_stmt = q{insert into voters values (} . "?" . ", ?" x ( (scalar @column_names) - 1) . q{)};
  51. INFO $insert_stmt;
  52. my $sth_insert = $dbh->prepare($insert_stmt);
  53.  
  54.  
  55. ###  Setup Data File
  56. use Fcntl 'O_RDONLY';
  57. tie my @data_file, 'Tie::File', $data_file , autochomp => 0 , mode => O_RDONLY , memory => $memory_limit or LOGDIE;
  58.  
  59. ###  Setup CSV Parser
  60. my $csv = Text::CSV_XS->new ( { binary => 1, auto_diag => 1 } )  # should set binary attribute.
  61.     or LOGDIE "Cannot use CSV: ".Text::CSV_XS->error_diag ();
  62.  
  63. my $nbr_rows = 0;
  64.  
  65. INFO q{Entering foreach loop at } . (time - $time_start) . q{ second(s)};
  66. foreach my $line (@data_file) {
  67.  
  68.     TRACE $line;
  69.     $csv->parse( $line ) or LOGDIE( qq{CSV Error: } . Text::CSV_XS->error_diag () );
  70.              
  71.     if ( not $nbr_rows ) {
  72.         INFO q{First Record Parsed at } . (time - $time_start) . q{ seconds};
  73.     }
  74.  
  75.     $sth_insert->execute($csv->fields) or LOGDIE;
  76.     $nbr_rows++;
  77.  
  78.     if ( $nbr_rows % $commit_frequency == 0 ) {
  79.         $dbh->commit or LOGDIE;
  80.         dump_elapsed_time();
  81.     }
  82. }
  83.  
  84. $dbh->commit or LOGDIE;
  85. dump_elapsed_time();
  86.  
  87. $csv->eof or $csv->error_diag();
  88.  
  89. exit;
  90.  
  91.  
  92. #############################################################################
  93. #
  94. #
  95. #
  96. #############################################################################
  97. sub dump_elapsed_time {
  98.  
  99.     my $time_elapsed = time - $time_start;
  100.  
  101.     INFO sprintf(q{% 5.1fM rows processed in %3d minutes | %5d rows/second | %.1e seconds/row},
  102.                     $nbr_rows/1000000, $time_elapsed/60, $nbr_rows/$time_elapsed , $time_elapsed/$nbr_rows);
  103.  
  104. }
  105.  
  106. __END__
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement