using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.IO; using System.Data; using System.Xml.Linq; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using DocumentFormat.OpenXml; namespace Kacit.EBoard.CommonControls.Reporting { ///

/// Helper class for filling in data forms based on Word 2007 documents. ///

public static class FormFiller { ///

/// Regex used to parse MERGEFIELDs in the provided document. ///

private static readonly Regex instructionRegEx = new Regex( @"^[\s]*MERGEFIELD[\s]+(?[#\w]*){1} # This retrieves the field's name (Named Capture Group -> name) [\s]*(\\\*[\s]+(?[\w]*){1})? # Retrieves field's format flag (Named Capture Group -> Format) [\s]*(\\b[\s]+[""]?(?[^\\]*){1})? # Retrieves text to display before field data (Named Capture Group -> PreText) # Retrieves text to display after field data (Named Capture Group -> PostText) [\s]*(\\f[\s]+[""]?(?[^\\]*){1})?", RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); ///

/// Fills in a .docx file with the provided data. ///

/// Path to the template that must be used. /// Dataset with the datatables to use to fill the document tables with. Table names in the dataset should match the table names in the document. /// Values to fill the document. Keys should match the MERGEFIELD names. /// The filled-in document. public static byte[] GetWordReport(string filename, DataSet dataset, Dictionary values) { // first read document in as stream byte[] original = File.ReadAllBytes(filename); string[] switches = null; using (var stream = new MemoryStream()) { stream.Write(original, 0, original.Length); // Create a Wordprocessing document object. using (var docx = WordprocessingDocument.Open(stream, true)) { // 2010/08/01: addition ConvertFieldCodes(docx.MainDocumentPart.Document); // first: process all tables foreach (var field in docx.MainDocumentPart.Document.Descendants()) { var fieldname = GetFieldName(field, out switches); if (!string.IsNullOrEmpty(fieldname) && fieldname.StartsWith("TBL_")) { var wrow = GetFirstParent(field); if (wrow == null) { continue; // can happen: is because table contains multiple fields, and after 1 pass, the initial row is already deleted } var wtable = GetFirstParent(wrow); if (wtable == null) { continue; // can happen: is because table contains multiple fields, and after 1 pass, the initial row is already deleted } var tablename = GetTableNameFromFieldName(fieldname); if (dataset == null || !dataset.Tables.Contains(tablename) || dataset.Tables[tablename].Rows.Count == 0) { continue; // don't remove table here: will be done in next pass } var table = dataset.Tables[tablename]; var props = new List(); var cellcolumnnames = new List>>(); var paragraphInfo = new List(); var cellfields = new List>(); foreach (var cell in wrow.Descendants()) { props.Add(cell.GetFirstChild()); var p = cell.GetFirstChild(); if (p != null) { var pp = p.GetFirstChild(); paragraphInfo.Add(pp != null ? pp.OuterXml : null); } else { paragraphInfo.Add(null); } var colname = string.Empty; SimpleField colfield = null; var subCellFields = new List(); var subColumnNames = new List(); foreach (var cellfield in cell.Descendants()) { colfield = cellfield; colname = GetColumnNameFromFieldName(GetFieldName(cellfield, out switches)); subColumnNames.Add(colname); subCellFields.Add(colfield); } cellfields.Add(subCellFields.Count == 0 ? new List() : subCellFields); if (subColumnNames.Count == 0) { cellcolumnnames.Add(null); } else { cellcolumnnames.Add(new Dictionary> { {subColumnNames.First(), subColumnNames} }); } } // keep reference to row properties var rprops = wrow.GetFirstChild(); foreach (DataRow row in table.Rows) { var nrow = new TableRow(); if (rprops != null) { nrow.Append(new TableRowProperties(rprops.OuterXml)); } for (var i = 0; i < props.Count; i++) { var cellproperties = new TableCellProperties(props[i].OuterXml); var cell = new TableCell(); cell.Append(cellproperties); if (cellcolumnnames[i] != null) { var cellColumnNameAsDict = cellcolumnnames[i]; var cellColumnName = cellColumnNameAsDict.First().Key; if (!table.Columns.Contains(cellColumnName)) { throw new Exception( string.Format( "Unable to complete template: column name '{0}' is unknown in parameter tables !", cellcolumnnames[i])); } foreach (var cn in cellColumnNameAsDict[cellColumnName]) { var val = row[cn].ToString(); foreach (var cellfield in cellfields[i]) { if (!cellfield.Instruction.Value.Contains(string.Format("TBL_{0}_{1}", table.TableName, cn))) { continue; } var p = new Paragraph(new ParagraphProperties(paragraphInfo[i])); p.Append(GetRunElementForText(val, cellfield)); cell.Append(p); } } } else { var p = new Paragraph(new ParagraphProperties(paragraphInfo[i])); cell.Append(p); // cell must contain at minimum a paragraph ! } nrow.Append(cell); } wtable.Append(nrow); } // finally : delete template-row (and thus also the mergefields in the table) wrow.Remove(); } } // clean empty tables foreach (var field in docx.MainDocumentPart.Document.Descendants()) { var fieldname = GetFieldName(field, out switches); if (string.IsNullOrEmpty(fieldname) || !fieldname.StartsWith("TBL_")) continue; var wrow = GetFirstParent(field); if (wrow == null) { continue; // can happen: is because table contains multiple fields, and after 1 pass, the initial row is already deleted } var wtable = GetFirstParent

(wrow); if (wtable == null) { continue; // can happen: is because table contains multiple fields, and after 1 pass, the initial row is already deleted } var tablename = GetTableNameFromFieldName(fieldname); if (dataset == null || !dataset.Tables.Contains(tablename) || dataset.Tables[tablename].Rows.Count == 0) { // if there's a 'dt' switch: delete Word-table if (switches.Contains("dt")) { wtable.Remove(); } } } // next : process all remaining fields in the main document FillWordFieldsInElement(values, docx.MainDocumentPart.Document); docx.MainDocumentPart.Document.Save(); // save main document back in package // process header(s) foreach (HeaderPart hpart in docx.MainDocumentPart.HeaderParts) { // 2010/08/01: addition ConvertFieldCodes(hpart.Header); FillWordFieldsInElement(values, hpart.Header); hpart.Header.Save(); // save header back in package } // process footer(s) foreach (FooterPart fpart in docx.MainDocumentPart.FooterParts) { // 2010/08/01: addition ConvertFieldCodes(fpart.Footer); FillWordFieldsInElement(values, fpart.Footer); fpart.Footer.Save(); // save footer back in package } } // get package bytes stream.Seek(0, SeekOrigin.Begin); byte[] data = stream.ToArray(); return data; } } ///

/// Applies any formatting specified to the pre and post text as /// well as to fieldValue. ///

/// The format flag to apply. /// The data value being inserted. /// The text to appear before fieldValue, if any. /// The text to appear after fieldValue, if any. /// The formatted text; [0] = fieldValue, [1] = preText, [2] = postText. /// Throw if fieldValue, preText, or postText are null. internal static string[] ApplyFormatting(string format, string fieldValue, string preText, string postText) { string[] valuesToReturn = new string[3]; if ("UPPER".Equals(format)) { // Convert everything to uppercase. valuesToReturn[0] = fieldValue.ToUpper(CultureInfo.CurrentCulture); valuesToReturn[1] = preText.ToUpper(CultureInfo.CurrentCulture); valuesToReturn[2] = postText.ToUpper(CultureInfo.CurrentCulture); } else if ("LOWER".Equals(format)) { // Convert everything to lowercase. valuesToReturn[0] = fieldValue.ToLower(CultureInfo.CurrentCulture); valuesToReturn[1] = preText.ToLower(CultureInfo.CurrentCulture); valuesToReturn[2] = postText.ToLower(CultureInfo.CurrentCulture); } else if ("FirstCap".Equals(format)) { // Capitalize the first letter, everything else is lowercase. if (!string.IsNullOrEmpty(fieldValue)) { valuesToReturn[0] = fieldValue.Substring(0, 1).ToUpper(CultureInfo.CurrentCulture); if (fieldValue.Length > 1) { valuesToReturn[0] = valuesToReturn[0] + fieldValue.Substring(1).ToLower(CultureInfo.CurrentCulture); } } if (!string.IsNullOrEmpty(preText)) { valuesToReturn[1] = preText.Substring(0, 1).ToUpper(CultureInfo.CurrentCulture); if (fieldValue.Length > 1) { valuesToReturn[1] = valuesToReturn[1] + preText.Substring(1).ToLower(CultureInfo.CurrentCulture); } } if (!string.IsNullOrEmpty(postText)) { valuesToReturn[2] = postText.Substring(0, 1).ToUpper(CultureInfo.CurrentCulture); if (fieldValue.Length > 1) { valuesToReturn[2] = valuesToReturn[2] + postText.Substring(1).ToLower(CultureInfo.CurrentCulture); } } } else if ("Caps".Equals(format)) { // Title casing: the first letter of every word should be capitalized. valuesToReturn[0] = ToTitleCase(fieldValue); valuesToReturn[1] = ToTitleCase(preText); valuesToReturn[2] = ToTitleCase(postText); } else { valuesToReturn[0] = fieldValue; valuesToReturn[1] = preText; valuesToReturn[2] = postText; } return valuesToReturn; } ///

/// Executes the field switches on a given element. /// The possible switches are: /// ///

dt : delete table

///

dr : delete row

///

dp : delete paragraph

/// ///

/// The element being operated on. /// The switched to be executed. internal static void ExecuteSwitches(OpenXmlElement element, string[] switches) { if (switches == null || switches.Count() == 0) { return; } // check switches (switches are always lowercase) if (switches.Contains("dp")) { Paragraph p = GetFirstParent(element); if (p != null) { p.Remove(); } } else if (switches.Contains("dr")) { TableRow row = GetFirstParent(element); if (row != null) { row.Remove(); } } else if (switches.Contains("dt")) { Table table = GetFirstParent

(element); if (table != null) { table.Remove(); } } } ///

/// Fills all the that are found in a given . ///

/// The values to insert; keys should match the placeholder names, values are the data to insert. /// The document element taht will contain the new values. internal static void FillWordFieldsInElement(Dictionary values, OpenXmlElement element) { string[] switches; string[] options; string[] formattedText; Dictionary emptyfields = new Dictionary(); // First pass: fill in data, but do not delete empty fields. Deletions silently break the loop. var list = element.Descendants().ToArray(); foreach (var field in list) { string fieldname = GetFieldNameWithOptions(field, out switches, out options); if (!string.IsNullOrEmpty(fieldname)) { if (values.ContainsKey(fieldname) && !string.IsNullOrEmpty(values[fieldname])) { formattedText = ApplyFormatting(options[0], values[fieldname], options[1], options[2]); // Prepend any text specified to appear before the data in the MergeField if (!string.IsNullOrEmpty(options[1])) { field.Parent.InsertBeforeSelf(GetPreOrPostParagraphToInsert(formattedText[1], field)); } // Append any text specified to appear after the data in the MergeField if (!string.IsNullOrEmpty(options[2])) { field.Parent.InsertAfterSelf(GetPreOrPostParagraphToInsert(formattedText[2], field)); } // replace mergefield with text field.Parent.ReplaceChild(GetRunElementForText(formattedText[0], field), field); } else { // keep track of unknown or empty fields emptyfields[field] = switches; } } } // second pass : clear empty fields foreach (KeyValuePair kvp in emptyfields) { // if field is unknown or empty: execute switches and remove it from document ! ExecuteSwitches(kvp.Key, kvp.Value); kvp.Key.Remove(); } } ///

/// Returns the columnname from a given fieldname from a Mergefield /// The instruction of a table-Mergefield is formatted as TBL_tablename_columnname ///

/// The field name. /// The column name. /// Thrown when fieldname is not formatted as TBL_tablename_columname. internal static string GetColumnNameFromFieldName(string fieldname) { // Column name is after the second underscore. int pos1 = fieldname.IndexOf('_'); if (pos1 <= 0) { throw new ArgumentException("Error: table-MERGEFIELD should be formatted as follows: TBL_tablename_columnname."); } int pos2 = fieldname.IndexOf('_', pos1 + 1); if (pos2 <= 0) { throw new ArgumentException("Error: table-MERGEFIELD should be formatted as follows: TBL_tablename_columnname."); } return fieldname.Substring(pos2 + 1); } ///

/// Returns the fieldname and switches from the given mergefield-instruction /// Note: the switches are always returned lowercase ! ///

/// The field being examined. /// An array of switches to apply to the field. /// The name of the field. internal static string GetFieldName(SimpleField field, out string[] switches) { var a = field.GetAttribute("instr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); switches = new string[0]; string fieldname = string.Empty; string instruction = a.Value; if (!string.IsNullOrEmpty(instruction)) { Match m = instructionRegEx.Match(instruction); if (m.Success) { fieldname = m.Groups["name"].ToString().Trim(); int pos = fieldname.IndexOf('#'); if (pos > 0) { // Process the switches, correct the fieldname. switches = fieldname.Substring(pos + 1).ToLower().Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries); fieldname = fieldname.Substring(0, pos); } } } return fieldname; } ///

/// Returns the fieldname and switches from the given mergefield-instruction /// Note: the switches are always returned lowercase ! /// Note 2: options holds values for formatting and text to insert before and/or after the field value. /// options[0] = Formatting (Upper, Lower, Caps a.k.a. title case, FirstCap) /// options[1] = Text to insert before data /// options[2] = Text to insert after data ///

/// The field being examined. /// An array of switches to apply to the field. /// Formatting options to apply. /// The name of the field. internal static string GetFieldNameWithOptions(SimpleField field, out string[] switches, out string[] options) { var a = field.GetAttribute("instr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); switches = new string[0]; options = new string[3]; string fieldname = string.Empty; string instruction = a.Value; if (!string.IsNullOrEmpty(instruction)) { Match m = instructionRegEx.Match(instruction); if (m.Success) { fieldname = m.Groups["name"].ToString().Trim(); options[0] = m.Groups["Format"].Value.Trim(); options[1] = m.Groups["PreText"].Value.Trim(); options[2] = m.Groups["PostText"].Value.Trim(); int pos = fieldname.IndexOf('#'); if (pos > 0) { // Process the switches, correct the fieldname. switches = fieldname.Substring(pos + 1).ToLower().Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries); fieldname = fieldname.Substring(0, pos); } } } return fieldname; } ///

/// Returns the first parent of a given that corresponds /// to the given type. /// This methods is different from the Ancestors-method on the OpenXmlElement in the sense that /// this method will return only the first-parent in direct line (closest to the given element). ///

/// The type of element being searched for. /// The element being examined. /// The first parent of the element of the specified type. internal static T GetFirstParent(OpenXmlElement element) where T : OpenXmlElement { if (element.Parent == null) { return null; } else if (element.Parent.GetType() == typeof(T)) { return element.Parent as T; } else { return GetFirstParent(element.Parent); } } ///

/// Creates a paragraph to house text that should appear before or after the MergeField. ///

/// The text to display. /// The MergeField that will have its properties mimiced. /// An OpenXml Paragraph ready to insert. internal static Paragraph GetPreOrPostParagraphToInsert(string text, SimpleField fieldToMimic) { Run runToInsert = GetRunElementForText(text, fieldToMimic); Paragraph paragraphToInsert = new Paragraph(); paragraphToInsert.Append(runToInsert); return paragraphToInsert; } ///

/// Returns a -openxml element for the given text. /// Specific about this run-element is that it can describe multiple-line and tabbed-text. /// The placeholder can be provided too, to allow duplicating the formatting. ///

/// The text to be inserted. /// The placeholder where the text will be inserted. /// A new -openxml element containing the specified text. internal static Run GetRunElementForText(string text, SimpleField placeHolder) { string rpr = null; if (placeHolder != null) { var xdoc = XDocument.Parse((placeHolder.Parent).OuterXml.Replace(placeHolder.OuterXml, string.Empty)); if (xdoc.Root != null) { var xrpr = xdoc.Root.Elements().FirstOrDefault(x => x.Name.LocalName == "rPr"); if (xrpr != null) rpr = xrpr.ToString(); } } var r = new Run(); if (!string.IsNullOrEmpty(rpr)) { r.AppendChild(new RunProperties(rpr)); } if (!string.IsNullOrEmpty(text)) { // first process line breaks string[] split = text.Split(new string[] { "\n" }, StringSplitOptions.None); bool first = true; foreach (string s in split) { if (!first) { r.Append(new Break()); } first = false; // then process tabs bool firsttab = true; string[] tabsplit = s.Split(new[] { "\t" }, StringSplitOptions.None); foreach (string tabtext in tabsplit) { if (!firsttab) { r.Append(new TabChar()); } r.AppendChild(new Text(tabtext)); firsttab = false; } } } return r; } ///

/// Returns the table name from a given fieldname from a Mergefield. /// The instruction of a table-Mergefield is formatted as TBL_tablename_columnname ///

/// The field name. /// The table name. /// Thrown when fieldname is not formatted as TBL_tablename_columname. internal static string GetTableNameFromFieldName(string fieldname) { int pos1 = fieldname.IndexOf('_'); if (pos1 <= 0) { throw new ArgumentException("Error: table-MERGEFIELD should be formatted as follows: TBL_tablename_columnname."); } int pos2 = fieldname.IndexOf('_', pos1 + 1); if (pos2 <= 0) { throw new ArgumentException("Error: table-MERGEFIELD should be formatted as follows: TBL_tablename_columnname."); } return fieldname.Substring(pos1 + 1, pos2 - pos1 - 1); } ///

/// Title-cases a string, capitalizing the first letter of every word. ///

/// The string to convert. /// The string after title-casing. internal static string ToTitleCase(string toConvert) { return ToTitleCaseHelper(toConvert, string.Empty); } ///

/// Title-cases a string, capitalizing the first letter of every word. ///

/// The string to convert. /// The part of the string already converted. Seed with an empty string. /// The string after title-casing. internal static string ToTitleCaseHelper(string toConvert, string alreadyConverted) { /* * Tail-recursive title-casing implementation. * Edge case: toConvert is empty, null, or just white space. If so, return alreadyConverted. * Else: Capitalize the first letter of the first word in toConvert, append that to alreadyConverted and recur. */ if (string.IsNullOrEmpty(toConvert)) { return alreadyConverted; } else { int indexOfFirstSpace = toConvert.IndexOf(' '); string firstWord, restOfString; // Check to see if we're on the last word or if there are more. if (indexOfFirstSpace != -1) { firstWord = toConvert.Substring(0, indexOfFirstSpace); restOfString = toConvert.Substring(indexOfFirstSpace).Trim(); } else { firstWord = toConvert.Substring(0); restOfString = string.Empty; } System.Text.StringBuilder sb = new StringBuilder(); sb.Append(alreadyConverted); sb.Append(" "); sb.Append(firstWord.Substring(0, 1).ToUpper(CultureInfo.CurrentCulture)); if (firstWord.Length > 1) { sb.Append(firstWord.Substring(1).ToLower(CultureInfo.CurrentCulture)); } return ToTitleCaseHelper(restOfString, sb.ToString()); } } ///

/// Since MS Word 2010 the SimpleField element is not longer used. It has been replaced by a combination of /// Run elements and a FieldCode element. This method will convert the new format to the old SimpleField-compliant /// format. ///

/// internal static void ConvertFieldCodes(OpenXmlElement mainElement) { // search for all the Run elements Run[] runs = mainElement.Descendants().ToArray(); if (runs.Length == 0) return; Dictionary newfields = new Dictionary(); int cursor = 0; do { Run run = runs[cursor]; if (run.HasChildren && run.Descendants().Count() > 0 && (run.Descendants().First().FieldCharType & FieldCharValues.Begin) == FieldCharValues.Begin) { List innerRuns = new List(); innerRuns.Add(run); // loop until we find the 'end' FieldChar bool found = false; string instruction = null; RunProperties runprop = null; do { cursor++; run = runs[cursor]; innerRuns.Add(run); if (run.HasChildren && run.Descendants().Count() > 0) instruction += run.GetFirstChild().Text; if (run.HasChildren && run.Descendants().Count() > 0 && (run.Descendants().First().FieldCharType & FieldCharValues.End) == FieldCharValues.End) { found = true; } if (run.HasChildren && run.Descendants().Count() > 0) runprop = run.GetFirstChild(); } while (found == false && cursor < runs.Length); // something went wrong : found Begin but no End. Throw exception if (!found) throw new Exception("Found a Begin FieldChar but no End !"); if (!string.IsNullOrEmpty(instruction)) { // build new Run containing a SimpleField Run newrun = new Run(); if (runprop != null) newrun.AppendChild(runprop.CloneNode(true)); SimpleField simplefield = new SimpleField(); simplefield.Instruction = instruction; newrun.AppendChild(simplefield); newfields.Add(newrun, innerRuns.ToArray()); } } cursor++; } while (cursor < runs.Length); // replace all FieldCodes by old-style SimpleFields foreach (KeyValuePair kvp in newfields) { kvp.Value[0].Parent.ReplaceChild(kvp.Key, kvp.Value[0]); for (int i = 1; i < kvp.Value.Length; i++) kvp.Value[i].Remove(); } } } }