Advertisement
jobseeker

parsePDF.php

Mar 5th, 2015
236
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 0.63 KB | None | 0 0
  1. <?php
  2. include 'vendor/autoload.php';
  3.  
  4. $url_pdf = "https://ujsportal.pacourts.us/DocketSheets/MDJReport.ashx?docketNumber=MJ-19311-CR-0000113-2015";
  5.  
  6. $parser = new \Smalot\PdfParser\Parser();
  7. $pdf    = $parser->parseFile($url_pdf);
  8. $text = $pdf->getText();
  9. //This attempts to match the datetime format
  10. //echo preg_replace("/Printed: \d+\/\d+\/\d{4} \d+:\d+ [aApP][mM]/", " ", $input_lines);
  11. //You can also borrow Gordo's regex for this one https://www.codersclan.net/ticket/896
  12.  
  13. //This tries to replace anything from Printed to AM/PM, including Printed and AM/PM
  14. echo preg_replace("/Printed:.* [aApP][mM]/", " ", (string)$text);
  15.  
  16. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement