Advertisement
gaelikun

PDF to docx conversion using Apache POI 3.11 and iText 5.5.4

Jan 17th, 2015
494
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.47 KB | None | 0 0
  1. /**
  2.  * Test PDF to docx conversion using Apache POI and itextpdf libraries
  3.  * by @gaelikun (elcodedocle)
  4.  *
  5.  * This test was performed using poi-ooxml 3.11 and itextpdf 5.5.4
  6.  *
  7.  * Check out http://pastebin.com/6TXUiuQP for a basic pom.xml maven
  8.  * project file including these dependencies
  9.  */
  10. package info.synapp.tests.poi;
  11.  
  12. import com.itextpdf.text.pdf.PdfReader;
  13. import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
  14. import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
  15. import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
  16. import org.apache.poi.xwpf.usermodel.BreakType;
  17. import org.apache.poi.xwpf.usermodel.XWPFDocument;
  18. import org.apache.poi.xwpf.usermodel.XWPFParagraph;
  19. import org.apache.poi.xwpf.usermodel.XWPFRun;
  20.  
  21. import java.io.FileOutputStream;
  22. import java.io.IOException;
  23.  
  24. /**
  25.  * Test PDF to docx conversion using Apache POI and itextpdf libraries
  26.  */
  27. public class PdfToDocXConversion {
  28.     public static void main(String[] args){
  29.         String pdf = "http://www.energy.umich.edu/sites/default/files/pdf-sample.pdf";
  30.         String destinationDocxFilename = "javadomain1.docx";
  31.         if (args.length > 0){
  32.             pdf = args[0];
  33.         }
  34.         if (args.length > 1){
  35.             destinationDocxFilename = args[1];
  36.         }
  37.         if (args.length > 2) {
  38.             System.out.println("Usage: java route/to/PdfToDocXConversion [sourcepdffilename] [destinationdocxfilename] \n");
  39.             System.exit(1);
  40.         }
  41.         XWPFDocument doc = new XWPFDocument();
  42.         try {
  43.             PdfReader reader = new PdfReader(pdf);
  44.             PdfReaderContentParser parser = new PdfReaderContentParser(reader);
  45.             for (int i = 1; i <= reader.getNumberOfPages(); i++) {
  46.                 TextExtractionStrategy strategy = parser.processContent(i,new SimpleTextExtractionStrategy());
  47.                 String text = strategy.getResultantText();
  48.                 XWPFParagraph p = doc.createParagraph();
  49.                 XWPFRun run = p.createRun();
  50.                 run.setText(text);
  51.                 run.addBreak(BreakType.PAGE);
  52.             }
  53.             FileOutputStream out1 = new FileOutputStream(destinationDocxFilename);
  54.             doc.write(out1);
  55.             out1.close();
  56.             reader.close();
  57.             System.out.println("Document converted successfully\n");
  58.         } catch (IOException e) {
  59.             System.out.println("Something went wrong\n");
  60.             e.printStackTrace();
  61.         }
  62.     }
  63. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement