Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- namespace Tool_ExtractTSV
- {
- public class TsvExtractor
- {
- private string fileContent;
- private string headerRow;
- private readonly char[] FieldSeparator = new char[] { '\t' };
- private TsvExtractor() { }
- /// <summary>
- /// Default constructor. Build an extractor on a file.
- /// </summary>
- /// <param name="filePath">File to extract data from.</param>
- public TsvExtractor(string filePath)
- : this(new FileInfo(filePath))
- {
- }
- /// <summary>
- /// Build an extractor on a file.
- /// </summary>
- /// <param name="fileToExtract">File to extract data from.</param>
- public TsvExtractor(FileInfo fileToExtract)
- {
- if (fileToExtract.Exists)
- {
- using (StreamReader fileReader = new StreamReader(fileToExtract.OpenRead(), Encoding.Default))
- {
- this.headerRow = fileReader.ReadLine();
- this.fileContent = fileReader.ReadToEnd();
- }
- }
- else
- {
- throw new ArgumentOutOfRangeException("fileToExtract", string.Format("The specified file '{0}' dopesn't exist.", fileToExtract.FullName));
- }
- }
- /// <summary>
- /// Extract and concatenate all content in a table. Useful if file doesn't seems to have headers.
- /// </summary>
- /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
- /// <returns>A table of elements (2 dimensions string array).</returns>
- public string[][] GetAllContent(bool keepBlankValues)
- {
- List<string[]> allContent = new List<string[]>();
- allContent.Add(this.GetHeaders(keepBlankValues));
- allContent.AddRange(this.GetData(keepBlankValues));
- return allContent.ToArray();
- }
- /// <summary>
- /// Extract and split top row of file in an array.
- /// </summary>
- /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
- /// <returns>Strings of the top row.</returns>
- public string[] GetHeaders(bool keepBlankValues)
- {
- return this.SplitAndClean(this.headerRow, keepBlankValues, true);
- }
- /// <summary>
- /// Extract and concatenate all data in a table. Useful if file seems to have headers in top row.
- /// </summary>
- /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
- /// <returns>A table of elements (2 dimensions string array).</returns>
- public string[][] GetData(bool keepBlankValues)
- {
- List<string[]> dataContent = new List<string[]>();
- using (StringReader dataReader = new StringReader(this.fileContent))
- {
- string currentLine = dataReader.ReadLine();
- while (currentLine != null)
- {
- dataContent.Add(this.SplitAndClean(currentLine, keepBlankValues, true));
- currentLine = dataReader.ReadLine();
- }
- }
- return dataContent.ToArray();
- }
- /// <summary>
- /// Split and clean the given string, following TSV rules.
- /// </summary>
- /// <param name="lineToSplit"></param>
- /// <param name="keepBlankValues"></param>
- /// <param name="cleanDoubleQuotes"></param>
- /// <returns></returns>
- private string[] SplitAndClean(string lineToSplit, bool keepBlankValues, bool cleanDoubleQuotes)
- {
- if (cleanDoubleQuotes)
- {
- return lineToSplit.Split(FieldSeparator, keepBlankValues ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None).Select(v => this.CleanDoubleQuotes(v)).ToArray();
- }
- else
- {
- return lineToSplit.Split(FieldSeparator, keepBlankValues ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None);
- }
- }
- /// <summary>
- /// Remove double-quotes if they both appear at beggining and end of the string.
- /// </summary>
- /// <param name="value"></param>
- /// <returns></returns>
- private string CleanDoubleQuotes(string value)
- {
- return value.StartsWith("\"") && value.EndsWith("\"") ?
- value.Substring(1, value.Length - 2) :
- value;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement