
Untitled
By: a guest on
Apr 25th, 2012 | syntax:
None | size: 0.72 KB | hits: 4 | expires: Never
Extracting the text inside a docx file
I am line 1
I am line 2 I am some other text
I am line 1I am line 2I am some other text.
open System
open System.IO
open System.IO.Packaging
open System.Xml
let getDocxContent (path: string) =
use package = Package.Open(path, FileMode.Open)
let stream = package.GetPart(new Uri("/word/document.xml",UriKind.Relative)).GetStream()
stream.Seek(0L, SeekOrigin.Begin) |> ignore
let xmlDoc = new XmlDocument()
xmlDoc.Load(stream)
xmlDoc.DocumentElement.InnerText
let docData = getDocxContent @"C:a1.docx"
printfn "%s" docData
let xmlDoc = new XmlDocument()
xmlDoc.Load(stream)
let xmlDoc = new XmlDocument()
xmlDoc.PreserveWhitespace <- true
xmlDoc.Load(stream)