Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.Globalization;
- using System.IO;
- using System.Xml.Linq;
- using ICSharpCode.SharpZipLib.Zip;
- namespace Ramp.Data
- {
- /// <summary>
- /// Allows sheets to be read from Microsoft Excel files (.xlsx).
- /// </summary>
- public class XlsxReader : IDisposable
- {
- private readonly bool _keepOpen;
- private readonly ZipFile _file;
- private bool _haveGlobals;
- private Dictionary<string, string> _sheetIds;
- private Dictionary<string, string> _sharedStrings;
- public XlsxReader(string filename)
- : this(new ZipFile(filename))
- {
- }
- public XlsxReader(Stream stream, bool keepOpen = false)
- : this(new ZipFile(stream), keepOpen)
- {
- }
- public XlsxReader(ZipFile file, bool keepOpen = false)
- {
- Validate.ArgumentNotNull(file, "file");
- _file = file;
- _keepOpen = keepOpen;
- }
- public void Dispose()
- {
- if (!_keepOpen)
- _file.Close();
- }
- /// <summary>
- /// Gets the names of all sheets in the document.
- /// </summary>
- /// <returns> A collection containing the names of all sheets in the document. </returns>
- public ICollection<string> GetSheetNames()
- {
- EnsureGlobals();
- return _sheetIds.Keys;
- }
- /// <summary>
- /// Reads the sheet with the specified name.
- /// </summary>
- /// <param name="name"> A sheet name. </param>
- /// <returns> A two-dimensional array with the contents of the specified sheet. </returns>
- public string[,] ReadSheet(string name)
- {
- EnsureGlobals();
- string id;
- if (!_sheetIds.TryGetValue(name, out id))
- throw new ArgumentOutOfRangeException("name", "sheet not found: " + name);
- XDocument doc = LoadXmlEntry(String.Format("xl/worksheets/sheet{0}.xml", id));
- // ReSharper disable once PossibleNullReferenceException
- XNamespace ns = doc.Root.GetDefaultNamespace();
- XName rowElementName = ns + "row";
- XName columnElementName = ns + "c";
- XName valueElementName = ns + "v";
- XName crefAttributeName = "r";
- XName typeAttributeName = "t";
- int? columnCount = null;
- var currentRow = new List<string>();
- var rows = new List<string[]>();
- foreach (XElement row in doc.Descendants(rowElementName))
- {
- foreach (XElement column in row.Elements(columnElementName))
- {
- var cref = (string) column.Attribute(crefAttributeName);
- var type = (string) column.Attribute(typeAttributeName);
- var value = (string) column.Element(valueElementName);
- if (type == "s")
- value = _sharedStrings[value];
- int columnIndex, rowIndex;
- ParseCellReference(cref, out columnIndex, out rowIndex);
- if (currentRow.Count <= columnIndex)
- currentRow.Resize(columnIndex + 1);
- currentRow[columnIndex] = value;
- }
- if (!columnCount.HasValue)
- columnCount = currentRow.Count;
- currentRow.Resize(columnCount.Value);
- rows.Add(currentRow.ToArray());
- currentRow.Clear();
- }
- if (!columnCount.HasValue)
- return new string[0, 0];
- var result = new string[rows.Count, columnCount.Value];
- for (int r = 0; r < rows.Count; r++)
- for (int c = 0; c < columnCount; c++)
- result[r, c] = rows[r][c];
- return result;
- }
- private void EnsureGlobals()
- {
- if (_haveGlobals)
- return;
- // Load all sheet names and ids from the workbook file
- {
- XDocument doc = LoadXmlEntry("xl/workbook.xml");
- // ReSharper disable once PossibleNullReferenceException
- XNamespace ns = doc.Root.GetDefaultNamespace();
- _sheetIds = new Dictionary<string, string>();
- foreach (XElement sheet in doc.Descendants(ns + "sheet"))
- {
- var name = (string) sheet.Attribute("name");
- var sheetId = (string) sheet.Attribute("sheetId");
- _sheetIds.Add(name, sheetId);
- }
- }
- // Load all shared strings
- {
- XDocument doc = LoadXmlEntry("xl/sharedStrings.xml");
- // ReSharper disable once PossibleNullReferenceException
- XNamespace ns = doc.Root.GetDefaultNamespace();
- _sharedStrings = new Dictionary<string, string>();
- int index = 0;
- foreach (XElement text in doc.Descendants(ns + "t"))
- {
- _sharedStrings.Add(index.ToString(CultureInfo.InvariantCulture), text.Value);
- index++;
- }
- }
- _haveGlobals = true;
- }
- private XDocument LoadXmlEntry(string name)
- {
- ZipEntry wb = _file.GetEntry(name);
- if (wb == null)
- throw new InvalidDataException("entry not found: " + name);
- using (var stream = _file.GetInputStream(wb))
- return XDocument.Load(stream);
- }
- // Parse a cell reference in the format ABC123 to get a column and row index.
- private static void ParseCellReference(string cr, out int columnIndex, out int rowIndex)
- {
- int firstDigit = -1;
- for (int i = 0; i < cr.Length; i++)
- {
- if (Char.IsDigit(cr[i]))
- {
- firstDigit = i;
- break;
- }
- }
- if (firstDigit == -1)
- throw new FormatException("invalid cell reference: " + cr);
- string columnName = cr.Substring(0, firstDigit);
- int columnNumber = 0;
- int magnitude = columnName.Length - 1;
- foreach (char c in columnName)
- {
- int val = c - 'A' + 1;
- int mult = magnitude != 0 ? (int) Math.Pow(26, magnitude) : 1;
- columnNumber += val * mult;
- magnitude--;
- }
- columnIndex = columnNumber - 1;
- rowIndex = int.Parse(cr.Substring(firstDigit)) - 1;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement