Advertisement
SDSWanderer

Script for fecth data from excel files

Nov 26th, 2014
173
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 3.89 KB | None | 0 0
  1. #!/usr/bin/env perl
  2.  
  3. use v5.14;
  4. use utf8;
  5. use strict;
  6. use warnings;
  7.  
  8. use FindBin qw/$Bin/;
  9.  
  10. use Docopt;
  11. use JSON;
  12. use File::Slurp qw/read_file write_file/;
  13.  
  14. use Spreadsheet::XLSX;
  15. use Spreadsheet::ParseExcel;
  16. use Spreadsheet::ParseExcel::Utility qw/sheetRef/;
  17.  
  18. sub read_json_file {
  19.     my $filepatch = shift;
  20.     die "File not exists" unless -e $filepatch;
  21.  
  22.     my $json = JSON->new()->utf8()->relaxed();
  23.     my $data = eval { $json->decode( scalar read_file($filepatch) ) };
  24.  
  25.     die "Wrong JSON file format" unless $data;
  26.     return $data;
  27. }
  28.  
  29. sub write_json_file {
  30.     my ($filepatch, $data, $pretty) = @_;
  31.  
  32.     my $json = JSON->new()->utf8();
  33.     $json->pretty() if $pretty;
  34.  
  35.     return write_file( $filepatch, $json->encode($data) );
  36. }
  37.  
  38. sub mute {
  39.     my $sub = shift;
  40.     return eval {
  41.         local *STDERR;
  42.         open STDERR, ">>", undef;
  43.         $sub->();
  44.     };
  45. }
  46.  
  47. sub read_excel_file {
  48.     my $filepatch = shift;
  49.     die "File not exists" unless -e $filepatch;
  50.  
  51.     my $extension = lc [split '\.', $filepatch]->[-1];
  52.     my $workbook;
  53.  
  54.     if ($extension eq 'xlsx' or $extension eq 'xlsm') {
  55.         $workbook = mute(sub{ Spreadsheet::XLSX->new( $filepatch ) });
  56.     } elsif ( $extension eq 'xls' ) {
  57.         $workbook = mute(sub{ Spreadsheet::ParseExcel->new()->parse( $filepatch ) });
  58.     } else {
  59.         die "Wrong excel file extension";
  60.     }
  61.  
  62.     die "Not valid excel file" unless $workbook;
  63.     return $workbook;
  64. }
  65.  
  66. sub fetch_result {
  67.     my ($map, $workbook) = @_;
  68.  
  69.     my %worksheets = map {$_->get_name() => $_} $workbook->worksheets();
  70.  
  71.     walk_recursive(
  72.         $map,
  73.         sub {
  74.             my $re = qr/^(?<worksheet>.+)!((?<cell>\w+\d+)|(?<range>\w+\d+:\w+\d+))$/;
  75.             return "WRONG_ADDRESS_FORMAT" unless shift =~ $re;
  76.  
  77.             my $worksheet = $worksheets{ $+{worksheet} };
  78.             return "WRONG_WORKSHEET_NAME" unless $worksheet;
  79.  
  80.             if ($+{cell}) {
  81.                 my $cell = $worksheet->get_cell(sheetRef( $+{cell} ));
  82.                 return "WRONG_CELL_ADDRESS" unless $cell;
  83.  
  84.                 return $cell->unformatted();
  85.             }
  86.             if ($+{range}) {
  87.                 my ($start, $end) = map { [sheetRef($_)] } split ':', $+{range};
  88.                 my $isFlat = $start->[0] == $end->[0] || $start->[1] == $end->[1];
  89.  
  90.                 my @range;
  91.                 foreach my $col ($start->[0]..$end->[0]) {
  92.                     my @row = map {
  93.                         my $cell = $worksheet->get_cell($col, $_);
  94.                         $cell ? $cell->unformatted() : "WRONG_CELL_ADDRESS";
  95.                     } ($start->[1]..$end->[1]);
  96.                     push @range, $isFlat ? @row : \@row;
  97.                 }
  98.  
  99.                 return \@range;
  100.             }
  101.  
  102.         }
  103.     );
  104.  
  105.     return $map;
  106. }
  107.  
  108. sub walk_recursive {
  109.     my ($data, $sub) = @_;
  110.     my $ref_type = ref($data);
  111.  
  112.     if (!$ref_type) {
  113.         $data = $sub->($data);
  114.     } elsif ( $ref_type eq 'HASH' ) {
  115.         foreach my $key ( keys %$data ) {
  116.             $data->{$key} = walk_recursive( $data->{$key}, $sub );
  117.         }
  118.     } elsif ( $ref_type eq 'ARRAY' ) {
  119.         for (my $i = 0; $i < @$data; $i++) {
  120.             $data->[$i] = walk_recursive( $data->[$i], $sub );
  121.         }
  122.     }
  123.  
  124.     return $data;
  125. }
  126.  
  127. my $doc = <<"EOF";
  128. Script for fetch data from excel files
  129.  
  130. Usage:
  131.     $FindBin::Script <excel> <map> <out> [--pretty-output]
  132.  
  133. Options:
  134.     <excel>             Path to excel file
  135.     <map>               Path to JSON map file
  136.     <out>               Path to output JSON file
  137.     -p --pretty-output  Human-readable output JSON formating
  138. EOF
  139.  
  140. my $opts     = docopt( doc => $doc );
  141. my $map      = read_json_file($opts->{'<map>'});
  142. my $workbook = read_excel_file($opts->{'<excel>'});
  143. my $result   = fetch_result($map, $workbook);
  144.  
  145. write_json_file($opts->{'<out>'}, $result, $opts->{'--pretty-output'});
  146.  
  147. 1;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement