PtiTom

TSV Extractor

Oct 19th, 2013
38
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Text;
  6. using System.Threading.Tasks;
  7.  
  8. namespace Tool_ExtractTSV
  9. {
  10.     public class TsvExtractor
  11.     {
  12.         private string fileContent;
  13.         private string headerRow;
  14.  
  15.         private readonly char[] FieldSeparator = new char[] { '\t' };
  16.  
  17.         private TsvExtractor() { }
  18.  
  19.         /// <summary>
  20.         /// Default constructor. Build an extractor on a file.
  21.         /// </summary>
  22.         /// <param name="filePath">File to extract data from.</param>
  23.         public TsvExtractor(string filePath)
  24.             : this(new FileInfo(filePath))
  25.         {
  26.         }
  27.  
  28.         /// <summary>
  29.         /// Build an extractor on a file.
  30.         /// </summary>
  31.         /// <param name="fileToExtract">File to extract data from.</param>
  32.         public TsvExtractor(FileInfo fileToExtract)
  33.         {
  34.             if (fileToExtract.Exists)
  35.             {
  36.                 using (StreamReader fileReader = new StreamReader(fileToExtract.OpenRead(), Encoding.Default))
  37.                 {
  38.                     this.headerRow = fileReader.ReadLine();
  39.                     this.fileContent = fileReader.ReadToEnd();
  40.                 }
  41.             }
  42.             else
  43.             {
  44.                 throw new ArgumentOutOfRangeException("fileToExtract", string.Format("The specified file '{0}' dopesn't exist.", fileToExtract.FullName));
  45.             }
  46.         }
  47.  
  48.         /// <summary>
  49.         /// Extract and concatenate all content in a table. Useful if file doesn't seems to have headers.
  50.         /// </summary>
  51.         /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
  52.         /// <returns>A table of elements (2 dimensions string array).</returns>
  53.         public string[][] GetAllContent(bool keepBlankValues)
  54.         {
  55.             List<string[]> allContent = new List<string[]>();
  56.             allContent.Add(this.GetHeaders(keepBlankValues));
  57.             allContent.AddRange(this.GetData(keepBlankValues));
  58.             return allContent.ToArray();
  59.         }
  60.  
  61.         /// <summary>
  62.         /// Extract and split top row of file in an array.
  63.         /// </summary>
  64.         /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
  65.         /// <returns>Strings of the top row.</returns>
  66.         public string[] GetHeaders(bool keepBlankValues)
  67.         {
  68.             return this.SplitAndClean(this.headerRow, keepBlankValues, true);
  69.         }
  70.  
  71.         /// <summary>
  72.         /// Extract and concatenate all data in a table. Useful if file seems to have headers in top row.
  73.         /// </summary>
  74.         /// <param name="keepBlankValues"><c>true</c> to keep empty values (contiguous tabs) in table. <c>false</c> else.</param>
  75.         /// <returns>A table of elements (2 dimensions string array).</returns>
  76.         public string[][] GetData(bool keepBlankValues)
  77.         {
  78.             List<string[]> dataContent = new List<string[]>();
  79.             using (StringReader dataReader = new StringReader(this.fileContent))
  80.             {
  81.                 string currentLine = dataReader.ReadLine();
  82.                 while (currentLine != null)
  83.                 {
  84.                     dataContent.Add(this.SplitAndClean(currentLine, keepBlankValues, true));
  85.                     currentLine = dataReader.ReadLine();
  86.                 }
  87.             }
  88.  
  89.             return dataContent.ToArray();
  90.         }
  91.  
  92.         /// <summary>
  93.         /// Split and clean the given string, following TSV rules.
  94.         /// </summary>
  95.         /// <param name="lineToSplit"></param>
  96.         /// <param name="keepBlankValues"></param>
  97.         /// <param name="cleanDoubleQuotes"></param>
  98.         /// <returns></returns>
  99.         private string[] SplitAndClean(string lineToSplit, bool keepBlankValues, bool cleanDoubleQuotes)
  100.         {
  101.             if (cleanDoubleQuotes)
  102.             {
  103.                 return lineToSplit.Split(FieldSeparator, keepBlankValues ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None).Select(v => this.CleanDoubleQuotes(v)).ToArray();
  104.             }
  105.             else
  106.             {
  107.                 return lineToSplit.Split(FieldSeparator, keepBlankValues ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None);
  108.             }
  109.         }
  110.  
  111.         /// <summary>
  112.         /// Remove double-quotes if they both appear at beggining and end of the string.
  113.         /// </summary>
  114.         /// <param name="value"></param>
  115.         /// <returns></returns>
  116.         private string CleanDoubleQuotes(string value)
  117.         {
  118.             return value.StartsWith("\"") && value.EndsWith("\"") ?
  119.                 value.Substring(1, value.Length - 2) :
  120.                 value;
  121.         }
  122.     }
  123. }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×