Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env perl
- # -----------------------------------------------------------------------------------------
- # rasterize, parse and upload print filter for CUPS
- # -----------------------------------------------------------------------------------------
- # if it's fed a PDF file, it queries a web service with the MD5 hash to see if it's been
- # converted already, and if not:
- # 1. it rasterizes the files and uploads the raster
- # 2. it extracts text from it, and uploads the text
- # 3. it extracts the color histogram from the raster and uploads it
- # -----------------------------------------------------------------------------------------
- # TODO:
- # - error handling
- # - histogams may be best extracted from a non-aliased raster
- # - probably more
- # -----------------------------------------------------------------------------------------
- use strict;
- $\ = "\n";
- my $tempdir = '/var/tmp'; # we need a world writeable temp dir
- my $PERL_BADLANG; $PERL_BADLANG = 0 if ($PERL_BADLANG); # Suppress warnings about locale problems
- my $PDFINFO = '/opt/local/bin/pdfinfo';
- my $PDFTOPPM = '/opt/local/bin/pdftoppm -r 150 -png';
- my $PDFTOTXT = '/opt/local/bin/pdftotext -layout';
- my $CONVERT = '/opt/local/bin/convert';
- my $MD5 = '/sbin/md5 -q';
- my $UPLURL = 'http://127.0.0.1:3000/upload';
- my $CUPS_BACKEND_OK = 0;
- my $CUPS_BACKEND_FAILED = 1;
- my %args;
- my $arg_count = scalar @ARGV;
- @args{qw/jobid username title copies options file_to_print/} = @ARGV;
- $args{file_to_print} = '-' if (! defined $args{file_to_print});
- for ($arg_count) {
- /^0$/ && do {
- print qq/file rasterize "Unknown" "rasterize pdf and upload it"/;
- last;
- };
- {
- my $file = $args{file_to_print};
- my $title = $args{title};
- #-------------------------
- # get file type and exit
- # if it isn't a PDF
- #-------------------------
- my $type = qx/file -b "$file"/;
- unless ($type =~ /^PDF/) {
- unlink $file;
- exit 0
- }
- #-----------------------
- # get md5 hash of file
- #-----------------------
- my $md5 = qx/$MD5 "$file"/; $md5 =~ s/\s*$//;
- my $pct = qx|curl $UPLURL/f/$md5|;
- #------------------
- # get file info
- #------------------
- my $info = qx/$PDFINFO "$file"/;
- #---------------------
- # extract page count
- #---------------------
- $info =~ /Pages:\s*(\d+)/; my $pages = $1;
- #---------------------
- # print stuff to log
- #---------------------
- open my $fh, '>>', '/var/tmp/foo.txt';
- exit 0 if $pct == $pages;
- print $fh join "\n", (@ARGV);
- print $fh '-' x 80;
- #---------------------------
- # convert pages, make them
- # readable and upload
- #---------------------------
- for my $p (1..$pages) {
- #---------------------
- # convert to png
- #---------------------
- my $png = sprintf "$tempdir/%s-%03d.png", $md5, $p;
- system(qq/$PDFTOPPM -f $p -l $p "$file" > "$png"/);
- system(qq|curl -F file=\@$png $UPLURL|);
- chmod 0666, $png;
- #---------------------
- # convert to text
- #---------------------
- my $txt = sprintf "$tempdir/%s-%03d.txt", $md5, $p;
- system(qq/$PDFTOTXT -f $p -l $p "$file" - > "$txt"/);
- system(qq|curl -F file=\@$txt $UPLURL|);
- chmod 0666, $txt;
- #---------------------
- # create histogram
- #---------------------
- my $hst = sprintf "$tempdir/%s-%03d.hst", $md5, $p;
- # print $fh qq(convert "$png" -format \%c histogram:info:"$hst");
- system qq/$CONVERT "$png" -format \%c histogram:info:"$hst"/;
- system(qq|curl -F file=\@$hst $UPLURL|);
- chmod 0666, $hst;
- unlink $png, $txt, $hst;
- chmod 0666, '/var/tmp/foo.txt'
- }
- close $fh;
- unlink $file;
- last;
- };
- }
- exit 0;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement