Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on Apr 25th, 2012  |  syntax: None  |  size: 0.72 KB  |  hits: 4  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Extracting the text inside a docx file
  2. I am line 1
  3.  
  4. I am line 2      I am some other text
  5.        
  6. I am line 1I am line 2I am some other text.
  7.        
  8. open System
  9.  open System.IO
  10.  open System.IO.Packaging
  11.  open System.Xml
  12.  
  13.  let getDocxContent (path: string) =
  14.  use package = Package.Open(path, FileMode.Open)
  15.  let stream = package.GetPart(new Uri("/word/document.xml",UriKind.Relative)).GetStream()
  16.  stream.Seek(0L, SeekOrigin.Begin) |> ignore
  17.  let xmlDoc = new XmlDocument()
  18.  xmlDoc.Load(stream)
  19.  xmlDoc.DocumentElement.InnerText
  20.  let docData = getDocxContent @"C:a1.docx"
  21.  printfn "%s" docData
  22.        
  23. let xmlDoc = new XmlDocument()
  24. xmlDoc.Load(stream)
  25.        
  26. let xmlDoc = new XmlDocument()
  27. xmlDoc.PreserveWhitespace <- true
  28. xmlDoc.Load(stream)