SHARE
TWEET

Embed PDF into Word document

kiwiwings Dec 29th, 2019 121 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. import java.io.ByteArrayInputStream;
  3. import java.io.ByteArrayOutputStream;
  4. import java.io.FileInputStream;
  5. import java.io.FileOutputStream;
  6. import java.io.IOException;
  7. import java.io.InputStream;
  8. import java.io.OutputStream;
  9. import java.lang.reflect.InvocationTargetException;
  10. import java.math.BigInteger;
  11.  
  12. import javax.xml.namespace.QName;
  13.  
  14. import com.microsoft.schemas.office.office.CTLock;
  15. import com.microsoft.schemas.office.office.CTOLEObject;
  16. import com.microsoft.schemas.office.office.OLEObjectDocument;
  17. import com.microsoft.schemas.office.office.STConnectType;
  18. import com.microsoft.schemas.office.office.STOLEDrawAspect;
  19. import com.microsoft.schemas.office.office.STOLEType;
  20. import com.microsoft.schemas.office.office.STTrueFalseBlank;
  21. import com.microsoft.schemas.vml.CTFormulas;
  22. import com.microsoft.schemas.vml.CTGroup;
  23. import com.microsoft.schemas.vml.CTImageData;
  24. import com.microsoft.schemas.vml.CTPath;
  25. import com.microsoft.schemas.vml.CTShape;
  26. import com.microsoft.schemas.vml.CTShapetype;
  27. import com.microsoft.schemas.vml.STExt;
  28. import com.microsoft.schemas.vml.STStrokeJoinStyle;
  29. import com.microsoft.schemas.vml.STTrueFalse;
  30. import org.apache.poi.hpsf.ClassID;
  31. import org.apache.poi.hpsf.ClassIDPredefined;
  32. import org.apache.poi.ooxml.POIXMLDocumentPart;
  33. import org.apache.poi.ooxml.POIXMLDocumentPart.RelationPart;
  34. import org.apache.poi.ooxml.POIXMLFactory;
  35. import org.apache.poi.ooxml.POIXMLRelation;
  36. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  37. import org.apache.poi.openxml4j.opc.PackagePart;
  38. import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
  39. import org.apache.poi.poifs.filesystem.Ole10Native;
  40. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  41. import org.apache.poi.util.IOUtils;
  42. import org.apache.poi.util.Units;
  43. import org.apache.poi.xslf.usermodel.XSLFRelation;
  44. import org.apache.poi.xwpf.usermodel.Document;
  45. import org.apache.poi.xwpf.usermodel.XWPFDocument;
  46. import org.apache.poi.xwpf.usermodel.XWPFPicture;
  47. import org.apache.poi.xwpf.usermodel.XWPFRun;
  48. import org.apache.xmlbeans.XmlCursor;
  49. import org.junit.Test;
  50. import org.openxmlformats.schemas.drawingml.x2006.main.CTOfficeArtExtension;
  51. import org.openxmlformats.schemas.drawingml.x2006.main.CTOfficeArtExtensionList;
  52. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTObject;
  53. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
  54.  
  55. public class TestEmbed {
  56.     @Test
  57.     public void embed() throws IOException, InvalidFormatException {
  58.         try (XWPFDocument doc = new XWPFDocument();
  59.             InputStream imgIS = new FileInputStream("test-data/document/GaiaTestImg.png");
  60.             InputStream pdfIS = new FileInputStream("test-data/hmef/quick-contents/quick.pdf")) {
  61.  
  62.             // add a pdf as an OLEContainer object
  63.             final MyRelations oleRelDef = MyRelations.OLE_OBJECT;
  64.             int oleNumber = doc.getPackage().getUnusedPartIndex(oleRelDef.getDefaultFileName());
  65.  
  66.             RelationPart oleRel = doc.createRelationship(oleRelDef, MyFactory.inst, oleNumber, false);
  67.             try (OutputStream os = oleRel.getDocumentPart().getPackagePart().getOutputStream();
  68.                  POIFSFileSystem poifs = new POIFSFileSystem();) {
  69.                 // embeds the contents -> double click and return changes the image to a preview
  70.                 // embedPDF(poifs, pdfIS);
  71.                 // add content via OLE package
  72.                 embedPDFasPackage(poifs, pdfIS);
  73.                 poifs.writeFilesystem(os);
  74.             }
  75.  
  76.             // some dummy text
  77.             doc.createParagraph().createRun().setText("foobaa");
  78.             // add run containing the embedded image
  79.             XWPFRun r = doc.createParagraph().createRun();
  80.  
  81.             String imgRel = doc.addPictureData(imgIS, Document.PICTURE_TYPE_PNG);
  82.             addOleShape1(r, imgRel, oleRel.getRelationship().getId());
  83.  
  84.             // working with a picture shape doesn't work
  85.             // XWPFPicture xpic = r.addPicture(imgIS, Document.PICTURE_TYPE_PNG, "dummy.png", Units.pixelToEMU(100), Units.pixelToEMU(100));
  86.             // addOleShape2(r, xpic, oleRel.getRelationship().getId());
  87.  
  88.             try (FileOutputStream fos = new FileOutputStream("blub.docx")) {
  89.                 doc.write(fos);
  90.             }
  91.         }
  92.     }
  93.  
  94.     static void embedPDF(POIFSFileSystem poifs, InputStream pdfIS) throws IOException {
  95.         poifs.createDocument(pdfIS, "Contents");
  96.         poifs.getRoot().setStorageClsid(new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}"));
  97.     }
  98.  
  99.     static void embedPDFasPackage(POIFSFileSystem poifs, InputStream pdfIS) throws IOException {
  100.         Ole10Native ole10Native = new Ole10Native("MyDummy PDF", "dummy.pdf", "dummy.pdf", IOUtils.toByteArray(pdfIS));
  101.         poifs.getRoot().setStorageClsid(ClassIDPredefined.OLE_V1_PACKAGE.getClassID());
  102.         ByteArrayOutputStream bos = new ByteArrayOutputStream();
  103.         ole10Native.writeOut(bos);
  104.         poifs.createDocument(new ByteArrayInputStream(bos.toByteArray()), Ole10Native.OLE10_NATIVE);
  105.     }
  106.  
  107.  
  108.     static void addOleShape1(XWPFRun run, String imgRel, String oleRel) {
  109.         // TODO: increase with every call
  110.         int typeCounter = 1;
  111.         int shapeCounter = 1025;
  112.  
  113.         CTR ctr = run.getCTR();
  114.  
  115.         // move image into object container - this is valid regarding Ecma 376 - fifth edition ... POI uses still first edition :(
  116.         CTObject obj = ctr.addNewObject();
  117.         obj.setDxaOrig(BigInteger.valueOf(2190));
  118.         obj.setDyaOrig(BigInteger.valueOf(1440));
  119.  
  120.         CTGroup grp = CTGroup.Factory.newInstance();
  121.         CTShapetype st = grp.addNewShapetype();
  122.         st.setCoordsize("21600,21600");
  123.         st.setFilled(STTrueFalse.F);
  124.         st.setId("_x0000_t"+typeCounter);
  125.         st.setPreferrelative(com.microsoft.schemas.office.office.STTrueFalse.T);
  126.         st.setPath2("m@4@5l@4@11@9@11@9@5xe");
  127.         st.setStroked(STTrueFalse.F);
  128.         st.addNewStroke().setJoinstyle(STStrokeJoinStyle.MITER);
  129.  
  130.         CTFormulas form = st.addNewFormulas();
  131.         String[] clumsyRect = {
  132.             "if lineDrawn pixelLineWidth 0",
  133.             "sum @0 1 0",
  134.             "sum 0 0 @1",
  135.             "prod @2 1 2",
  136.             "prod @3 21600 pixelWidth",
  137.             "prod @3 21600 pixelHeight",
  138.             "sum @0 0 1",
  139.             "prod @6 1 2",
  140.             "prod @7 21600 pixelWidth",
  141.             "sum @8 21600 0",
  142.             "prod @7 21600 pixelHeight",
  143.             "sum @10 21600 0"
  144.         };
  145.         for (String cr : clumsyRect) {
  146.             form.addNewF().setEqn(cr);
  147.         }
  148.  
  149.         CTPath path = st.addNewPath();
  150.         path.setGradientshapeok(STTrueFalse.T);
  151.         path.setConnecttype(STConnectType.RECT);
  152.         path.setExtrusionok(com.microsoft.schemas.office.office.STTrueFalse.F);
  153.  
  154.         CTLock lock = st.addNewLock();
  155.         lock.setAspectratio(com.microsoft.schemas.office.office.STTrueFalse.T);
  156.         lock.setExt(STExt.EDIT);
  157.  
  158.         CTShape shape = grp.addNewShape();
  159.         shape.setId("_x0000_i"+shapeCounter);
  160.         shape.setStyle("width:109.45pt;height:1in");
  161.         shape.setType("#_x0000_t"+typeCounter);
  162.         shape.setOle(STTrueFalseBlank.X);
  163.  
  164.         CTImageData imgDat = shape.addNewImagedata();
  165.         imgDat.setId2(imgRel);
  166.         imgDat.setTitle("");
  167.  
  168.         OLEObjectDocument oleParent = OLEObjectDocument.Factory.newInstance();
  169.         CTOLEObject ole = oleParent.addNewOLEObject();
  170.         ole.setDrawAspect(STOLEDrawAspect.CONTENT);
  171.         ole.setObjectID("_foobaa"+shapeCounter);
  172.         ole.setProgID("AcroExch.Document");
  173.         ole.setShapeID("_x0000_i"+shapeCounter);
  174.         ole.setType(STOLEType.EMBED);
  175.         ole.setId(oleRel);
  176.  
  177.         XmlCursor objCur = obj.newCursor();
  178.         objCur.toFirstContentToken();
  179.  
  180.         XmlCursor grpCur = grp.newCursor();
  181.         grpCur.copyXmlContents(objCur);
  182.         grpCur.dispose();
  183.  
  184.         XmlCursor oleCur = oleParent.newCursor();
  185.         oleCur.copyXmlContents(objCur);
  186.         oleCur.dispose();
  187.  
  188.         objCur.dispose();
  189.  
  190.     }
  191.  
  192.  
  193.     static void addOleShape2(XWPFRun run, XWPFPicture xpic, String oleRel) {
  194.         final String drawNS = "http://schemas.microsoft.com/office/drawing/2010/main";
  195.         final String wordNS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
  196.  
  197.         // TODO: increase with every call
  198.         int typeCounter = 1;
  199.         int shapeCounter = 1025;
  200.         CTOfficeArtExtensionList extLst = xpic.getCTPicture().getNvPicPr().getCNvPr().addNewExtLst();
  201.  
  202.         CTOfficeArtExtension ext = extLst.addNewExt();
  203.         ext.setUri("{63B3BB69-23CF-44E3-9099-C40C66FF867C}");
  204.         XmlCursor cur = ext.newCursor();
  205.         cur.toEndToken();
  206.         cur.beginElement(new QName(drawNS, "compatExt", "a14"));
  207.         cur.insertNamespace("a14", drawNS);
  208.         cur.insertAttributeWithValue("spid", "_x0000_i"+shapeCounter);
  209.         cur.dispose();
  210.  
  211.  
  212.         CTR ctr = run.getCTR();
  213.  
  214.         // move image into object container - this is valid regarding Ecma 376 - fifth edition ... POI uses still first edition :(
  215.         CTObject obj = ctr.addNewObject();
  216.         obj.setDxaOrig(BigInteger.valueOf(2190));
  217.         obj.setDyaOrig(BigInteger.valueOf(1440));
  218.  
  219.         XmlCursor objCur = obj.newCursor();
  220.         objCur.toFirstContentToken();
  221.  
  222.         XmlCursor drawCur = ctr.getDrawingArray(0).newCursor();
  223.         drawCur.moveXml(objCur);
  224.         drawCur.dispose();
  225.  
  226.         objCur.beginElement("objectEmbed", wordNS);
  227.         objCur.insertAttributeWithValue("id", PackageRelationshipTypes.CORE_PROPERTIES_ECMA376_NS, oleRel);
  228. //        objCur.insertAttributeWithValue("progId", wordNS, "AcroExch.Document");
  229. //        objCur.insertAttributeWithValue("shapeId", wordNS, "_x0000_i"+shapeCounter);
  230. //        objCur.insertAttributeWithValue("drawAspect", wordNS, "content");
  231.  
  232. //        OLEObjectDocument oleParent = OLEObjectDocument.Factory.newInstance();
  233. //        CTOLEObject ole = oleParent.addNewOLEObject();
  234. //        ole.setDrawAspect(STOLEDrawAspect.CONTENT);
  235. //        ole.setObjectID("_1639082351");
  236. //        ole.setProgID("AcroExch.Document");
  237. //        ole.setShapeID("_x0000_i"+shapeCounter);
  238. //        ole.setType(STOLEType.EMBED);
  239. //        ole.setId(oleRel);
  240. //
  241. //        XmlCursor oleCur = oleParent.newCursor();
  242. //        oleCur.copyXmlContents(objCur);
  243. //        oleCur.dispose();
  244.  
  245.         objCur.dispose();
  246.  
  247.     }
  248.  
  249.     static class MyRelations extends POIXMLRelation {
  250.  
  251.         static final MyRelations OLE_OBJECT = new MyRelations(
  252.             XSLFRelation.OLE_OBJECT.getContentType(),
  253.             XSLFRelation.OLE_OBJECT.getRelation(),
  254.             "/word/embeddings/oleObject#.bin",
  255.             null
  256.         );
  257.  
  258.         private MyRelations(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
  259.             super(type, rel, defaultName, cls);
  260.         }
  261.     }
  262.  
  263.     static class MyFactory extends POIXMLFactory {
  264.         static final MyFactory inst = new MyFactory();
  265.  
  266.         protected POIXMLRelation getDescriptor(String relationshipType) {
  267.             return MyRelations.OLE_OBJECT;
  268.         }
  269.  
  270.         protected POIXMLDocumentPart createDocumentPart
  271.         (Class<? extends POIXMLDocumentPart> cls, Class<?>[] classes, Object[] values)
  272.                 throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
  273.             return new MyObjectData();
  274.         }
  275.     }
  276.  
  277.     static class MyObjectData extends POIXMLDocumentPart {
  278.  
  279.         /**
  280.          * Create a new XSLFObjectData node
  281.          */
  282.         MyObjectData() { }
  283.  
  284.         public MyObjectData(final PackagePart part) {
  285.             super(part);
  286.         }
  287.  
  288.         /**
  289.          * XSLFObjectData objects store the actual content in the part directly without keeping a
  290.          * copy like all others therefore we need to handle them differently.
  291.          */
  292.         @Override
  293.         protected void prepareForCommit() {
  294.             // do not clear the part here
  295.         }
  296.  
  297.  
  298.         public void setData(final byte[] data) throws IOException {
  299.             try (final OutputStream os = getPackagePart().getOutputStream()) {
  300.                 os.write(data);
  301.             }
  302.         }
  303.     }
  304. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top