Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /**
- * convert pdf file to csv.
- *
- * Shell command to convert pdf to text file:
- * pdftotext -layout file.pdf
- *
- * The resulting file will be parsed and converted to a csv file
- *
- * Example rows of interest (footer and header lines will be ignored):
- * 98458995 21.02.2016 10.06.2016/DOE/JOHN 160,96
- * The line has a trailing linefeed !
- *
- */
- function printHeader() {
- echo <<< HEADER
- <!doctype html>
- <html>
- <head>
- <meta charset="utf-8">
- <title>PDF2CSV-Converter</title>
- </head>
- <body>
- <h1>PDF file upload</h1>
- <form enctype="multipart/form-data" method="POST">
- <input type="file" name="pdf" /><br />
- <input type="submit" value="Upload now" />
- </form>
- HEADER;
- }
- function printFooter() {
- echo <<< FOOTER
- </body>
- </html>
- FOOTER;
- }
- printHeader();
- $baseName = "file";
- $pdfFile = $baseName.'.pdf';
- $inFile = $baseName.'.txt';
- $outFile = $baseName.'.csv';
- @unlink($pdfFile);
- @unlink($inFile);
- @unlink($outFile);
- if (array_key_exists('pdf', $_FILES)) {
- $ok = move_uploaded_file($_FILES['pdf']['tmp_name'], $pdfFile);
- if ($ok) {
- // convert pdf to txt
- $output = shell_exec("pdftotext -layout $pdfFile $inFile");
- // use a pattern to identify tabular data of interest
- $pattern = "/^(\d{8})\s{2,}(\d{2}\.\d{2}\.\d{4})\s{2,}/"; // e.g. "98458995 21.02.2016 "
- $content = file($inFile);
- $fh = fopen($outFile, "w");
- foreach($content as $line) {
- $match = array();
- if (preg_match($pattern, $line, $match)) {
- // split cols - in this case multiple spaces
- $cols = preg_split("/\s{2,}/", $line);
- // write line to file
- $csv = sprintf("%s;%s;%s;%s", $cols[0], $cols[1], $cols[2], $cols[3]);
- fputs($fh, $csv);
- }
- }
- fclose($fh);
- echo '<hr /><a href="'.$outFile.'">Download '.$outFile.'</a><hr />';
- }//if ok
- }
- printFooter();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement