Advertisement
Guest User

Rasterize and upload CUPS filter

a guest
Apr 28th, 2014
175
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 3.65 KB | None | 0 0
  1. #!/usr/bin/env perl
  2. # -----------------------------------------------------------------------------------------
  3. # rasterize, parse and upload print filter for CUPS
  4. # -----------------------------------------------------------------------------------------
  5. # if it's fed a PDF file, it queries a web service with the MD5 hash to see if it's been
  6. # converted already, and if not:
  7. # 1. it rasterizes the files and uploads the raster
  8. # 2. it extracts text from it, and uploads the text
  9. # 3. it extracts the color histogram from the raster and uploads it
  10. # -----------------------------------------------------------------------------------------
  11. # TODO:
  12. # - error handling
  13. # - histogams may be best extracted from a non-aliased raster
  14. # - probably more
  15. # -----------------------------------------------------------------------------------------
  16.  
  17.  
  18.  
  19. use strict;
  20.  
  21. $\ = "\n";
  22.  
  23. my $tempdir = '/var/tmp';           # we need a world writeable temp dir
  24.  
  25. my $PERL_BADLANG; $PERL_BADLANG = 0 if ($PERL_BADLANG);     # Suppress warnings about locale problems
  26. my $PDFINFO  = '/opt/local/bin/pdfinfo';
  27. my $PDFTOPPM = '/opt/local/bin/pdftoppm -r 150 -png';
  28. my $PDFTOTXT = '/opt/local/bin/pdftotext -layout';
  29. my $CONVERT  = '/opt/local/bin/convert';
  30. my $MD5      = '/sbin/md5 -q';
  31. my $UPLURL   = 'http://127.0.0.1:3000/upload';
  32.  
  33.  
  34. my $CUPS_BACKEND_OK = 0;
  35. my $CUPS_BACKEND_FAILED = 1;
  36.  
  37. my %args;
  38. my $arg_count = scalar @ARGV;
  39.  
  40. @args{qw/jobid username title copies options file_to_print/} = @ARGV;
  41. $args{file_to_print} = '-' if (! defined $args{file_to_print});
  42.  
  43. for ($arg_count) {
  44.     /^0$/ && do {
  45.     print qq/file rasterize "Unknown" "rasterize pdf and upload it"/;
  46.     last;
  47.     };
  48.     {
  49.     my $file = $args{file_to_print};
  50.     my $title = $args{title};
  51.  
  52.     #-------------------------
  53.     # get file type and exit
  54.     # if it isn't a PDF
  55.     #-------------------------
  56.     my $type = qx/file -b "$file"/;
  57.    
  58.     unless ($type =~ /^PDF/) {
  59.         unlink $file;
  60.         exit 0
  61.     }
  62.  
  63.     #-----------------------
  64.     # get md5 hash of file
  65.     #-----------------------
  66.     my $md5 = qx/$MD5 "$file"/; $md5 =~ s/\s*$//;
  67.     my $pct = qx|curl $UPLURL/f/$md5|;
  68.  
  69.     #------------------
  70.     # get file info
  71.     #------------------
  72.     my $info = qx/$PDFINFO "$file"/;
  73.  
  74.     #---------------------
  75.     # extract page count
  76.     #---------------------
  77.     $info =~ /Pages:\s*(\d+)/; my $pages = $1;
  78.  
  79.     #---------------------
  80.     # print stuff to log
  81.     #---------------------
  82.     open my $fh, '>>', '/var/tmp/foo.txt';
  83.     exit 0 if $pct == $pages;
  84.  
  85.     print $fh join "\n", (@ARGV);
  86.     print $fh '-' x 80;
  87.  
  88.     #---------------------------
  89.     # convert pages, make them
  90.     # readable and upload
  91.     #---------------------------
  92.     for my $p (1..$pages) {
  93.         #---------------------
  94.         # convert to png
  95.         #---------------------
  96.         my $png = sprintf "$tempdir/%s-%03d.png", $md5, $p;
  97.         system(qq/$PDFTOPPM -f $p -l $p "$file" > "$png"/);
  98.         system(qq|curl -F file=\@$png $UPLURL|);
  99.         chmod 0666, $png;
  100.  
  101.         #---------------------
  102.         # convert to text
  103.         #---------------------
  104.         my $txt = sprintf "$tempdir/%s-%03d.txt", $md5, $p;
  105.         system(qq/$PDFTOTXT -f $p -l $p "$file" - > "$txt"/);
  106.         system(qq|curl -F file=\@$txt $UPLURL|);
  107.         chmod 0666, $txt;
  108.  
  109.         #---------------------
  110.         # create histogram
  111.         #---------------------
  112.         my $hst = sprintf "$tempdir/%s-%03d.hst", $md5, $p;
  113.         # print $fh qq(convert "$png" -format \%c histogram:info:"$hst");
  114.         system qq/$CONVERT "$png" -format \%c histogram:info:"$hst"/;
  115.         system(qq|curl -F file=\@$hst $UPLURL|);
  116.         chmod 0666, $hst;
  117.  
  118.         unlink $png, $txt, $hst;
  119.         chmod 0666, '/var/tmp/foo.txt'
  120.     }
  121.     close $fh;
  122.     unlink $file;
  123.     last;
  124.     };
  125. }
  126.  
  127. exit 0;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement