Advertisement
Mekkkkkk

perl datetime parsing

Jan 30th, 2014
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 4.75 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2. #
  3. # Benchmarking various parsing methods for my specific case of parsing
  4. # format YYYY-MM-DD HH24:MI:SS.FF (like 2014-01-29 15:23:22.721485)
  5.  
  6. use strict;
  7. use feature 'state';
  8.  
  9. use DateTime::Format::Strptime;
  10. use DateTime::Format::Natural;
  11. use DateTime::Format::DateParse;
  12. use DateTime::Format::Builder;
  13. use DateTime::Format::Oracle;
  14. use DateTime::Format::Pg;
  15. use DateTime::Format::ISO8601;
  16. use DateTime::Format::Flexible;
  17. use Benchmark qw(cmpthese);
  18.  
  19. my $value = '2014-01-29 15:23:22.721485';
  20.  
  21. sub _create_builder {
  22.     my $parser = DateTime::Format::Builder->new();
  23.     $parser->parser(
  24.         length => 26,
  25.         params => [ qw( year month day hour minute second nanosecond ) ],
  26.         regex  => qr/^(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)\.(\d{6})$/,
  27.         postprocess => sub {
  28.             my %args = @_;
  29.             $args{parsed}{nanosecond} *= 1000;
  30.             return 1;
  31.         },
  32.        );
  33.     return $parser;
  34. };
  35.  
  36. sub _create_builder_alt {
  37.     my $parser = DateTime::Format::Builder->new();
  38.     $parser->parser(
  39.         params => [ qw( year month day hour minute second nanosecond ) ],
  40.         regex  => qr/^(\d{4})-(\d\d?)-(\d\d?)(?: (\d\d?):(\d\d?):(\d\d?)(?:\.(\d+))?)?$/,
  41.         postprocess => sub {
  42.             my %args = @_;
  43.             my $nano = $args{parsed}{nanosecond};
  44.             $args{parsed}{nanosecond} = $nano . '0' x (9 - length($nano));
  45.             return 1;
  46.         },
  47.        );
  48.     return $parser;
  49. };
  50.  
  51. our %parsers = (
  52.     'strptime' => sub {
  53.         state $parser = DateTime::Format::Strptime->new(
  54.             pattern => '%F %T.%N',
  55.             locale => 'C',
  56.             time_zone => 'Europe/Warsaw',
  57.             on_error => 'croak',
  58.            );
  59.         return $parser->parse_datetime(@_);
  60.     },
  61.     'natural' => sub {
  62.         state $parser = DateTime::Format::Natural->new(
  63.             format => 'yyyy-mm-dd',
  64.             prefer_future => 0,
  65.             time_zone => 'floating',
  66.            );
  67.         return $parser->parse_datetime(@_);
  68.     },
  69.     'dateparse' => sub {
  70.         return DateTime::Format::DateParse->parse_datetime(@_);
  71.     },
  72.     'oracle' => sub {
  73.         BEGIN{ $ENV{'NLS_DATE_FORMAT'} = 'YYYY-MM-DD'; };
  74.         BEGIN{ $ENV{'NLS_TIMESTAMP_FORMAT'} = 'YYYY-MM-DD HH24:MI:SS.FF'; };
  75.         return DateTime::Format::Oracle->parse_timestamp(@_); # parse_datetime
  76.     },
  77.     'pg' => sub {
  78.         return DateTime::Format::Pg->parse_datetime(@_);
  79.     },
  80.     'iso8601' => sub {
  81.         #return DateTime::Format::ISO8601->parse_datetime(@_);
  82.         my $item = shift;
  83.         $item =~ s/ /T/;
  84.         return DateTime::Format::ISO8601->parse_datetime($item);
  85.      },
  86.     'flexible' => sub {
  87.         DateTime::Format::Flexible->parse_datetime(@_);
  88.      },
  89.     'builder_f' => sub {
  90.          state $parser = _create_builder();
  91.          return $parser->parse_datetime(@_);
  92.      },
  93.     'builder_l' => sub {
  94.          state $parser = _create_builder_alt();
  95.          return $parser->parse_datetime(@_);
  96.      },
  97.    );
  98.  
  99. ###########################################################################
  100. # Rozbiegówka
  101. ###########################################################################
  102.  
  103. our %valid;
  104.  
  105. my $printer = DateTime::Format::Strptime->new(pattern => '%F %T.%N', locale => 'C');
  106.  
  107. foreach my $name (sort keys %parsers) {
  108.     my $parser = $parsers{$name};
  109.  
  110.     my $obj;
  111.     eval {
  112.         $obj = $parser->($value);
  113.     }; if($@) {
  114.         $@ =~ s/at datetime_parsing_benchmark\.pl line \d+.*//s;
  115.         printf "%10s: [invalid] %s\n", $name, "failed to parse: $@";
  116.         next;
  117.     }
  118.  
  119.     my $is_valid = $obj->year == 2014
  120.                    && $obj->month == 1
  121.                    && $obj->day  == 29
  122.                    && $obj->hour  == 15
  123.                    && $obj->minute == 23
  124.                    && $obj->second  == 22
  125.                    && $obj->nanosecond == 721485000;
  126.  
  127.     printf "%10s: [%7s] %s\n", $name, $is_valid ? "ok" : "wrong", $printer->format_datetime($obj);
  128.  
  129.     if($is_valid) {
  130.         $valid{$name} = 1;
  131.     }
  132. }
  133.  
  134. ###########################################################################
  135. # Właściwy benchmark
  136. ###########################################################################
  137.  
  138. print "Spawning benchmark\n";
  139.  
  140. my %compared;
  141. #foreach my $name (keys %valid) {
  142. foreach my $name (keys %parsers) {
  143.     my $parser = $parsers{$name};
  144.     my @values = (
  145.        '2014-01-29 15:23:22.721485',
  146.        '2013-11-09 05:23:22.121385',
  147.        '2013-03-01 15:02:44.000000',
  148.        '2013-12-06 18:18:35.317669',
  149.       );
  150.     unless($valid{$name}) {
  151.         $name .= "[bad]";
  152.     }
  153.     $compared{$name} = sub {
  154.         my @parsed = map { $parser->($_)  } @values;
  155.         @parsed = ();
  156.     };
  157. }
  158.  
  159. cmpthese(1000, \%compared);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement