Advertisement
PtiTom

TSV Extractor

Oct 19th, 2013
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 4.03 KB | None | 0 0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Text;
  6. using System.Threading.Tasks;
  7.  
  8. namespace Tool_ExtractTSV
  9. {
  10.     public class TsvExtractor
  11.     {
  12.         private string fileContent;
  13.         private string headerRow;
  14.  
  15.         private readonly char[] FieldSeparator = new char[] { '\t' };
  16.  
  17.         private TsvExtractor() { }
  18.  
  19.         /// <summary>
  20.         /// Default constructor. Build an extractor on a file.
  21.         /// </summary>
  22.         /// <param name="filePath">File to extract data from.</param>
  23.         public TsvExtractor(string filePath)
  24.             : this(new FileInfo(filePath))
  25.         {
  26.         }
  27.  
  28.         /// <summary>
  29.         /// Build an extractor on a file.
  30.         /// </summary>
  31.         /// <param name="fileToExtract">File to extract data from.</param>
  32.         public TsvExtractor(FileInfo fileToExtract)
  33.         {
  34.             if (fileToExtract.Exists)
  35.             {
  36.                 using (StreamReader fileReader = new StreamReader(fileToExtract.OpenRead(), Encoding.Default))
  37.                 {
  38.                     this.headerRow = fileReader.ReadLine();
  39.                     this.fileContent = fileReader.ReadToEnd();
  40.                 }
  41.             }
  42.             else
  43.             {
  44.                 throw new ArgumentOutOfRangeException("fileToExtract", string.Format("The specified file '{0}' dopesn't exist.", fileToExtract.FullName));
  45.             }
  46.         }
  47.  
  48.         /// <summary>
  49.         /// Extract and concatenate all content in a table. Useful if file doesn't seems to have headers.
  50.         /// </summary>
  51.         /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
  52.         /// <returns>A table of elements (2 dimensions string array).</returns>
  53.         public string[][] GetAllContent(bool keepBlankValues)
  54.         {
  55.             List<string[]> allContent = new List<string[]>();
  56.             allContent.Add(this.GetHeaders(keepBlankValues));
  57.             allContent.AddRange(this.GetData(keepBlankValues));
  58.             return allContent.ToArray();
  59.         }
  60.  
  61.         /// <summary>
  62.         /// Extract and split top row of file in an array.
  63.         /// </summary>
  64.         /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
  65.         /// <returns>Strings of the top row.</returns>
  66.         public string[] GetHeaders(bool keepBlankValues)
  67.         {
  68.             return this.SplitAndClean(this.headerRow, keepBlankValues, true);
  69.         }
  70.  
  71.         /// <summary>
  72.         /// Extract and concatenate all data in a table. Useful if file seems to have headers in top row.
  73.         /// </summary>
  74.         /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
  75.         /// <returns>A table of elements (2 dimensions string array).</returns>
  76.         public string[][] GetData(bool keepBlankValues)
  77.         {
  78.             List<string[]> dataContent = new List<string[]>();
  79.             using (StringReader dataReader = new StringReader(this.fileContent))
  80.             {
  81.                 string currentLine = dataReader.ReadLine();
  82.                 while (currentLine != null)
  83.                 {
  84.                     dataContent.Add(this.SplitAndClean(currentLine, keepBlankValues, true));
  85.                     currentLine = dataReader.ReadLine();
  86.                 }
  87.             }
  88.  
  89.             return dataContent.ToArray();
  90.         }
  91.  
  92.         /// <summary>
  93.         /// Split and clean the given string, following TSV rules.
  94.         /// </summary>
  95.         /// <param name="lineToSplit"></param>
  96.         /// <param name="keepBlankValues"></param>
  97.         /// <param name="cleanDoubleQuotes"></param>
  98.         /// <returns></returns>
  99.         private string[] SplitAndClean(string lineToSplit, bool keepBlankValues, bool cleanDoubleQuotes)
  100.         {
  101.             if (cleanDoubleQuotes)
  102.             {
  103.                 return lineToSplit.Split(FieldSeparator, keepBlankValues ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None).Select(v => this.CleanDoubleQuotes(v)).ToArray();
  104.             }
  105.             else
  106.             {
  107.                 return lineToSplit.Split(FieldSeparator, keepBlankValues ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None);
  108.             }
  109.         }
  110.  
  111.         /// <summary>
  112.         /// Remove double-quotes if they both appear at beggining and end of the string.
  113.         /// </summary>
  114.         /// <param name="value"></param>
  115.         /// <returns></returns>
  116.         private string CleanDoubleQuotes(string value)
  117.         {
  118.             return value.StartsWith("\"") && value.EndsWith("\"") ?
  119.                 value.Substring(1, value.Length - 2) :
  120.                 value;
  121.         }
  122.     }
  123. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement