Advertisement
stkirov

25.ExtractTextFromHTML

Jan 18th, 2013
580
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 0.67 KB | None | 0 0
  1. using System;
  2. using System.IO;
  3. using System.Text.RegularExpressions;
  4. class ExtractText
  5. {
  6.     static void Main()
  7.     {
  8.         StreamReader reader = new StreamReader(@"..\..\..\text.html");
  9.         using (reader)
  10.         {
  11.             string line = string.Empty;
  12.             MatchCollection matchProtocolAndSiteName = Regex.Matches(line, @"(?<=^|>)[^><]+?(?=<|$)");
  13.             while ((line = reader.ReadLine()) != null)
  14.             {
  15.                 matchProtocolAndSiteName = Regex.Matches(line, @"(?<=^|>)[^><]+?(?=<|$)");
  16.  
  17.                 foreach (var word in matchProtocolAndSiteName)
  18.                     Console.WriteLine(word);
  19.             }
  20.         }
  21.     }
  22. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement