using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Data;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml;
namespace Kacit.EBoard.CommonControls.Reporting
{
///
/// Helper class for filling in data forms based on Word 2007 documents.
///
public static class FormFiller
{
///
/// Regex used to parse MERGEFIELDs in the provided document.
///
private static readonly Regex instructionRegEx =
new Regex(
@"^[\s]*MERGEFIELD[\s]+(?[#\w]*){1} # This retrieves the field's name (Named Capture Group -> name)
[\s]*(\\\*[\s]+(?[\w]*){1})? # Retrieves field's format flag (Named Capture Group -> Format)
[\s]*(\\b[\s]+[""]?(?[^\\]*){1})? # Retrieves text to display before field data (Named Capture Group -> PreText)
# Retrieves text to display after field data (Named Capture Group -> PostText)
[\s]*(\\f[\s]+[""]?(?[^\\]*){1})?",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
///
/// Fills in a .docx file with the provided data.
///
/// Path to the template that must be used.
/// Dataset with the datatables to use to fill the document tables with. Table names in the dataset should match the table names in the document.
/// Values to fill the document. Keys should match the MERGEFIELD names.
/// The filled-in document.
public static byte[] GetWordReport(string filename, DataSet dataset, Dictionary values)
{
// first read document in as stream
byte[] original = File.ReadAllBytes(filename);
string[] switches = null;
using (var stream = new MemoryStream())
{
stream.Write(original, 0, original.Length);
// Create a Wordprocessing document object.
using (var docx = WordprocessingDocument.Open(stream, true))
{
// 2010/08/01: addition
ConvertFieldCodes(docx.MainDocumentPart.Document);
// first: process all tables
foreach (var field in docx.MainDocumentPart.Document.Descendants())
{
var fieldname = GetFieldName(field, out switches);
if (!string.IsNullOrEmpty(fieldname) &&
fieldname.StartsWith("TBL_"))
{
var wrow = GetFirstParent(field);
if (wrow == null)
{
continue; // can happen: is because table contains multiple fields, and after 1 pass, the initial row is already deleted
}
var wtable = GetFirstParent(wrow);
if (wtable == null)
{
continue; // can happen: is because table contains multiple fields, and after 1 pass, the initial row is already deleted
}
var tablename = GetTableNameFromFieldName(fieldname);
if (dataset == null ||
!dataset.Tables.Contains(tablename) ||
dataset.Tables[tablename].Rows.Count == 0)
{
continue; // don't remove table here: will be done in next pass
}
var table = dataset.Tables[tablename];
var props = new List();
var cellcolumnnames = new List>>();
var paragraphInfo = new List();
var cellfields = new List>();
foreach (var cell in wrow.Descendants())
{
props.Add(cell.GetFirstChild());
var p = cell.GetFirstChild();
if (p != null)
{
var pp = p.GetFirstChild();
paragraphInfo.Add(pp != null ? pp.OuterXml : null);
}
else
{
paragraphInfo.Add(null);
}
var colname = string.Empty;
SimpleField colfield = null;
var subCellFields = new List();
var subColumnNames = new List();
foreach (var cellfield in cell.Descendants())
{
colfield = cellfield;
colname = GetColumnNameFromFieldName(GetFieldName(cellfield, out switches));
subColumnNames.Add(colname);
subCellFields.Add(colfield);
}
cellfields.Add(subCellFields.Count == 0 ? new List() : subCellFields);
if (subColumnNames.Count == 0)
{
cellcolumnnames.Add(null);
}
else
{
cellcolumnnames.Add(new Dictionary>
{
{subColumnNames.First(), subColumnNames}
});
}
}
// keep reference to row properties
var rprops = wrow.GetFirstChild();
foreach (DataRow row in table.Rows)
{
var nrow = new TableRow();
if (rprops != null)
{
nrow.Append(new TableRowProperties(rprops.OuterXml));
}
for (var i = 0; i < props.Count; i++)
{
var cellproperties = new TableCellProperties(props[i].OuterXml);
var cell = new TableCell();
cell.Append(cellproperties);
if (cellcolumnnames[i] != null)
{
var cellColumnNameAsDict = cellcolumnnames[i];
var cellColumnName = cellColumnNameAsDict.First().Key;
if (!table.Columns.Contains(cellColumnName))
{
throw new Exception(
string.Format(
"Unable to complete template: column name '{0}' is unknown in parameter tables !",
cellcolumnnames[i]));
}
foreach (var cn in cellColumnNameAsDict[cellColumnName])
{
var val = row[cn].ToString();
foreach (var cellfield in cellfields[i])
{
if (!cellfield.Instruction.Value.Contains(string.Format("TBL_{0}_{1}",
table.TableName,
cn)))
{
continue;
}
var p = new Paragraph(new ParagraphProperties(paragraphInfo[i]));
p.Append(GetRunElementForText(val, cellfield));
cell.Append(p);
}
}
}
else
{
var p = new Paragraph(new ParagraphProperties(paragraphInfo[i]));
cell.Append(p); // cell must contain at minimum a paragraph !
}
nrow.Append(cell);
}
wtable.Append(nrow);
}
// finally : delete template-row (and thus also the mergefields in the table)
wrow.Remove();
}
}
// clean empty tables
foreach (var field in docx.MainDocumentPart.Document.Descendants())
{
var fieldname = GetFieldName(field, out switches);
if (string.IsNullOrEmpty(fieldname) || !fieldname.StartsWith("TBL_")) continue;
var wrow = GetFirstParent(field);
if (wrow == null)
{
continue; // can happen: is because table contains multiple fields, and after 1 pass, the initial row is already deleted
}
var wtable = GetFirstParent(wrow);
if (wtable == null)
{
continue; // can happen: is because table contains multiple fields, and after 1 pass, the initial row is already deleted
}
var tablename = GetTableNameFromFieldName(fieldname);
if (dataset == null ||
!dataset.Tables.Contains(tablename) ||
dataset.Tables[tablename].Rows.Count == 0)
{
// if there's a 'dt' switch: delete Word-table
if (switches.Contains("dt"))
{
wtable.Remove();
}
}
}
// next : process all remaining fields in the main document
FillWordFieldsInElement(values, docx.MainDocumentPart.Document);
docx.MainDocumentPart.Document.Save(); // save main document back in package
// process header(s)
foreach (HeaderPart hpart in docx.MainDocumentPart.HeaderParts)
{
// 2010/08/01: addition
ConvertFieldCodes(hpart.Header);
FillWordFieldsInElement(values, hpart.Header);
hpart.Header.Save(); // save header back in package
}
// process footer(s)
foreach (FooterPart fpart in docx.MainDocumentPart.FooterParts)
{
// 2010/08/01: addition
ConvertFieldCodes(fpart.Footer);
FillWordFieldsInElement(values, fpart.Footer);
fpart.Footer.Save(); // save footer back in package
}
}
// get package bytes
stream.Seek(0, SeekOrigin.Begin);
byte[] data = stream.ToArray();
return data;
}
}
///
/// Applies any formatting specified to the pre and post text as
/// well as to fieldValue.
///
/// The format flag to apply.
/// The data value being inserted.
/// The text to appear before fieldValue, if any.
/// The text to appear after fieldValue, if any.
/// The formatted text; [0] = fieldValue, [1] = preText, [2] = postText.
/// Throw if fieldValue, preText, or postText are null.
internal static string[] ApplyFormatting(string format, string fieldValue, string preText, string postText)
{
string[] valuesToReturn = new string[3];
if ("UPPER".Equals(format))
{
// Convert everything to uppercase.
valuesToReturn[0] = fieldValue.ToUpper(CultureInfo.CurrentCulture);
valuesToReturn[1] = preText.ToUpper(CultureInfo.CurrentCulture);
valuesToReturn[2] = postText.ToUpper(CultureInfo.CurrentCulture);
}
else if ("LOWER".Equals(format))
{
// Convert everything to lowercase.
valuesToReturn[0] = fieldValue.ToLower(CultureInfo.CurrentCulture);
valuesToReturn[1] = preText.ToLower(CultureInfo.CurrentCulture);
valuesToReturn[2] = postText.ToLower(CultureInfo.CurrentCulture);
}
else if ("FirstCap".Equals(format))
{
// Capitalize the first letter, everything else is lowercase.
if (!string.IsNullOrEmpty(fieldValue))
{
valuesToReturn[0] = fieldValue.Substring(0, 1).ToUpper(CultureInfo.CurrentCulture);
if (fieldValue.Length > 1)
{
valuesToReturn[0] = valuesToReturn[0] + fieldValue.Substring(1).ToLower(CultureInfo.CurrentCulture);
}
}
if (!string.IsNullOrEmpty(preText))
{
valuesToReturn[1] = preText.Substring(0, 1).ToUpper(CultureInfo.CurrentCulture);
if (fieldValue.Length > 1)
{
valuesToReturn[1] = valuesToReturn[1] + preText.Substring(1).ToLower(CultureInfo.CurrentCulture);
}
}
if (!string.IsNullOrEmpty(postText))
{
valuesToReturn[2] = postText.Substring(0, 1).ToUpper(CultureInfo.CurrentCulture);
if (fieldValue.Length > 1)
{
valuesToReturn[2] = valuesToReturn[2] + postText.Substring(1).ToLower(CultureInfo.CurrentCulture);
}
}
}
else if ("Caps".Equals(format))
{
// Title casing: the first letter of every word should be capitalized.
valuesToReturn[0] = ToTitleCase(fieldValue);
valuesToReturn[1] = ToTitleCase(preText);
valuesToReturn[2] = ToTitleCase(postText);
}
else
{
valuesToReturn[0] = fieldValue;
valuesToReturn[1] = preText;
valuesToReturn[2] = postText;
}
return valuesToReturn;
}
///
/// Executes the field switches on a given element.
/// The possible switches are:
///
/// dt : delete table
/// dr : delete row
/// dp : delete paragraph
///
///
/// The element being operated on.
/// The switched to be executed.
internal static void ExecuteSwitches(OpenXmlElement element, string[] switches)
{
if (switches == null || switches.Count() == 0)
{
return;
}
// check switches (switches are always lowercase)
if (switches.Contains("dp"))
{
Paragraph p = GetFirstParent(element);
if (p != null)
{
p.Remove();
}
}
else if (switches.Contains("dr"))
{
TableRow row = GetFirstParent(element);
if (row != null)
{
row.Remove();
}
}
else if (switches.Contains("dt"))
{
Table table = GetFirstParent(element);
if (table != null)
{
table.Remove();
}
}
}
///
/// Fills all the that are found in a given .
///
/// The values to insert; keys should match the placeholder names, values are the data to insert.
/// The document element taht will contain the new values.
internal static void FillWordFieldsInElement(Dictionary values, OpenXmlElement element)
{
string[] switches;
string[] options;
string[] formattedText;
Dictionary emptyfields = new Dictionary();
// First pass: fill in data, but do not delete empty fields. Deletions silently break the loop.
var list = element.Descendants().ToArray();
foreach (var field in list)
{
string fieldname = GetFieldNameWithOptions(field, out switches, out options);
if (!string.IsNullOrEmpty(fieldname))
{
if (values.ContainsKey(fieldname)
&& !string.IsNullOrEmpty(values[fieldname]))
{
formattedText = ApplyFormatting(options[0], values[fieldname], options[1], options[2]);
// Prepend any text specified to appear before the data in the MergeField
if (!string.IsNullOrEmpty(options[1]))
{
field.Parent.InsertBeforeSelf(GetPreOrPostParagraphToInsert(formattedText[1], field));
}
// Append any text specified to appear after the data in the MergeField
if (!string.IsNullOrEmpty(options[2]))
{
field.Parent.InsertAfterSelf(GetPreOrPostParagraphToInsert(formattedText[2], field));
}
// replace mergefield with text
field.Parent.ReplaceChild(GetRunElementForText(formattedText[0], field), field);
}
else
{
// keep track of unknown or empty fields
emptyfields[field] = switches;
}
}
}
// second pass : clear empty fields
foreach (KeyValuePair kvp in emptyfields)
{
// if field is unknown or empty: execute switches and remove it from document !
ExecuteSwitches(kvp.Key, kvp.Value);
kvp.Key.Remove();
}
}
///
/// Returns the columnname from a given fieldname from a Mergefield
/// The instruction of a table-Mergefield is formatted as TBL_tablename_columnname
///
/// The field name.
/// The column name.
/// Thrown when fieldname is not formatted as TBL_tablename_columname.
internal static string GetColumnNameFromFieldName(string fieldname)
{
// Column name is after the second underscore.
int pos1 = fieldname.IndexOf('_');
if (pos1 <= 0)
{
throw new ArgumentException("Error: table-MERGEFIELD should be formatted as follows: TBL_tablename_columnname.");
}
int pos2 = fieldname.IndexOf('_', pos1 + 1);
if (pos2 <= 0)
{
throw new ArgumentException("Error: table-MERGEFIELD should be formatted as follows: TBL_tablename_columnname.");
}
return fieldname.Substring(pos2 + 1);
}
///
/// Returns the fieldname and switches from the given mergefield-instruction
/// Note: the switches are always returned lowercase !
///
/// The field being examined.
/// An array of switches to apply to the field.
/// The name of the field.
internal static string GetFieldName(SimpleField field, out string[] switches)
{
var a = field.GetAttribute("instr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
switches = new string[0];
string fieldname = string.Empty;
string instruction = a.Value;
if (!string.IsNullOrEmpty(instruction))
{
Match m = instructionRegEx.Match(instruction);
if (m.Success)
{
fieldname = m.Groups["name"].ToString().Trim();
int pos = fieldname.IndexOf('#');
if (pos > 0)
{
// Process the switches, correct the fieldname.
switches = fieldname.Substring(pos + 1).ToLower().Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries);
fieldname = fieldname.Substring(0, pos);
}
}
}
return fieldname;
}
///
/// Returns the fieldname and switches from the given mergefield-instruction
/// Note: the switches are always returned lowercase !
/// Note 2: options holds values for formatting and text to insert before and/or after the field value.
/// options[0] = Formatting (Upper, Lower, Caps a.k.a. title case, FirstCap)
/// options[1] = Text to insert before data
/// options[2] = Text to insert after data
///
/// The field being examined.
/// An array of switches to apply to the field.
/// Formatting options to apply.
/// The name of the field.
internal static string GetFieldNameWithOptions(SimpleField field, out string[] switches, out string[] options)
{
var a = field.GetAttribute("instr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
switches = new string[0];
options = new string[3];
string fieldname = string.Empty;
string instruction = a.Value;
if (!string.IsNullOrEmpty(instruction))
{
Match m = instructionRegEx.Match(instruction);
if (m.Success)
{
fieldname = m.Groups["name"].ToString().Trim();
options[0] = m.Groups["Format"].Value.Trim();
options[1] = m.Groups["PreText"].Value.Trim();
options[2] = m.Groups["PostText"].Value.Trim();
int pos = fieldname.IndexOf('#');
if (pos > 0)
{
// Process the switches, correct the fieldname.
switches = fieldname.Substring(pos + 1).ToLower().Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries);
fieldname = fieldname.Substring(0, pos);
}
}
}
return fieldname;
}
///
/// Returns the first parent of a given that corresponds
/// to the given type.
/// This methods is different from the Ancestors-method on the OpenXmlElement in the sense that
/// this method will return only the first-parent in direct line (closest to the given element).
///
/// The type of element being searched for.
/// The element being examined.
/// The first parent of the element of the specified type.
internal static T GetFirstParent(OpenXmlElement element)
where T : OpenXmlElement
{
if (element.Parent == null)
{
return null;
}
else if (element.Parent.GetType() == typeof(T))
{
return element.Parent as T;
}
else
{
return GetFirstParent(element.Parent);
}
}
///
/// Creates a paragraph to house text that should appear before or after the MergeField.
///
/// The text to display.
/// The MergeField that will have its properties mimiced.
/// An OpenXml Paragraph ready to insert.
internal static Paragraph GetPreOrPostParagraphToInsert(string text, SimpleField fieldToMimic)
{
Run runToInsert = GetRunElementForText(text, fieldToMimic);
Paragraph paragraphToInsert = new Paragraph();
paragraphToInsert.Append(runToInsert);
return paragraphToInsert;
}
///
/// Returns a -openxml element for the given text.
/// Specific about this run-element is that it can describe multiple-line and tabbed-text.
/// The placeholder can be provided too, to allow duplicating the formatting.
///
/// The text to be inserted.
/// The placeholder where the text will be inserted.
/// A new -openxml element containing the specified text.
internal static Run GetRunElementForText(string text, SimpleField placeHolder)
{
string rpr = null;
if (placeHolder != null)
{
var xdoc = XDocument.Parse((placeHolder.Parent).OuterXml.Replace(placeHolder.OuterXml, string.Empty));
if (xdoc.Root != null)
{
var xrpr = xdoc.Root.Elements().FirstOrDefault(x => x.Name.LocalName == "rPr");
if (xrpr != null)
rpr = xrpr.ToString();
}
}
var r = new Run();
if (!string.IsNullOrEmpty(rpr))
{
r.AppendChild(new RunProperties(rpr));
}
if (!string.IsNullOrEmpty(text))
{
// first process line breaks
string[] split = text.Split(new string[] { "\n" }, StringSplitOptions.None);
bool first = true;
foreach (string s in split)
{
if (!first)
{
r.Append(new Break());
}
first = false;
// then process tabs
bool firsttab = true;
string[] tabsplit = s.Split(new[] { "\t" }, StringSplitOptions.None);
foreach (string tabtext in tabsplit)
{
if (!firsttab)
{
r.Append(new TabChar());
}
r.AppendChild(new Text(tabtext));
firsttab = false;
}
}
}
return r;
}
///
/// Returns the table name from a given fieldname from a Mergefield.
/// The instruction of a table-Mergefield is formatted as TBL_tablename_columnname
///
/// The field name.
/// The table name.
/// Thrown when fieldname is not formatted as TBL_tablename_columname.
internal static string GetTableNameFromFieldName(string fieldname)
{
int pos1 = fieldname.IndexOf('_');
if (pos1 <= 0)
{
throw new ArgumentException("Error: table-MERGEFIELD should be formatted as follows: TBL_tablename_columnname.");
}
int pos2 = fieldname.IndexOf('_', pos1 + 1);
if (pos2 <= 0)
{
throw new ArgumentException("Error: table-MERGEFIELD should be formatted as follows: TBL_tablename_columnname.");
}
return fieldname.Substring(pos1 + 1, pos2 - pos1 - 1);
}
///
/// Title-cases a string, capitalizing the first letter of every word.
///
/// The string to convert.
/// The string after title-casing.
internal static string ToTitleCase(string toConvert)
{
return ToTitleCaseHelper(toConvert, string.Empty);
}
///
/// Title-cases a string, capitalizing the first letter of every word.
///
/// The string to convert.
/// The part of the string already converted. Seed with an empty string.
/// The string after title-casing.
internal static string ToTitleCaseHelper(string toConvert, string alreadyConverted)
{
/*
* Tail-recursive title-casing implementation.
* Edge case: toConvert is empty, null, or just white space. If so, return alreadyConverted.
* Else: Capitalize the first letter of the first word in toConvert, append that to alreadyConverted and recur.
*/
if (string.IsNullOrEmpty(toConvert))
{
return alreadyConverted;
}
else
{
int indexOfFirstSpace = toConvert.IndexOf(' ');
string firstWord, restOfString;
// Check to see if we're on the last word or if there are more.
if (indexOfFirstSpace != -1)
{
firstWord = toConvert.Substring(0, indexOfFirstSpace);
restOfString = toConvert.Substring(indexOfFirstSpace).Trim();
}
else
{
firstWord = toConvert.Substring(0);
restOfString = string.Empty;
}
System.Text.StringBuilder sb = new StringBuilder();
sb.Append(alreadyConverted);
sb.Append(" ");
sb.Append(firstWord.Substring(0, 1).ToUpper(CultureInfo.CurrentCulture));
if (firstWord.Length > 1)
{
sb.Append(firstWord.Substring(1).ToLower(CultureInfo.CurrentCulture));
}
return ToTitleCaseHelper(restOfString, sb.ToString());
}
}
///
/// Since MS Word 2010 the SimpleField element is not longer used. It has been replaced by a combination of
/// Run elements and a FieldCode element. This method will convert the new format to the old SimpleField-compliant
/// format.
///
///
internal static void ConvertFieldCodes(OpenXmlElement mainElement)
{
// search for all the Run elements
Run[] runs = mainElement.Descendants().ToArray();
if (runs.Length == 0) return;
Dictionary newfields = new Dictionary();
int cursor = 0;
do
{
Run run = runs[cursor];
if (run.HasChildren && run.Descendants().Count() > 0
&& (run.Descendants().First().FieldCharType & FieldCharValues.Begin) == FieldCharValues.Begin)
{
List innerRuns = new List();
innerRuns.Add(run);
// loop until we find the 'end' FieldChar
bool found = false;
string instruction = null;
RunProperties runprop = null;
do
{
cursor++;
run = runs[cursor];
innerRuns.Add(run);
if (run.HasChildren && run.Descendants().Count() > 0)
instruction += run.GetFirstChild().Text;
if (run.HasChildren && run.Descendants().Count() > 0
&& (run.Descendants().First().FieldCharType & FieldCharValues.End) == FieldCharValues.End)
{
found = true;
}
if (run.HasChildren && run.Descendants().Count() > 0)
runprop = run.GetFirstChild();
} while (found == false && cursor < runs.Length);
// something went wrong : found Begin but no End. Throw exception
if (!found)
throw new Exception("Found a Begin FieldChar but no End !");
if (!string.IsNullOrEmpty(instruction))
{
// build new Run containing a SimpleField
Run newrun = new Run();
if (runprop != null)
newrun.AppendChild(runprop.CloneNode(true));
SimpleField simplefield = new SimpleField();
simplefield.Instruction = instruction;
newrun.AppendChild(simplefield);
newfields.Add(newrun, innerRuns.ToArray());
}
}
cursor++;
} while (cursor < runs.Length);
// replace all FieldCodes by old-style SimpleFields
foreach (KeyValuePair kvp in newfields)
{
kvp.Value[0].Parent.ReplaceChild(kvp.Key, kvp.Value[0]);
for (int i = 1; i < kvp.Value.Length; i++)
kvp.Value[i].Remove();
}
}
}
}