Advertisement
Filkolev

Extract Hyperlinks

May 27th, 2015
471
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 1.09 KB | None | 0 0
  1. using System;
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4.  
  5. public class ExtractHyperlinks
  6. {
  7.     public static void Main()
  8.     {
  9.         StringBuilder html = new StringBuilder();
  10.  
  11.         string input = Console.ReadLine();
  12.  
  13.         while (input != "END")
  14.         {
  15.             html.AppendLine(input);
  16.             input = Console.ReadLine();
  17.         }
  18.  
  19.         const string pattern = @"<\s*a\s+(?:[^<>]*\s+)?href\s*=\s*(?:(?:'([^'>]+)')|(?:""([^"">]+)"")|([^\s>]+))[^>]*>";
  20.  
  21.         var matches = Regex.Matches(html.ToString(), pattern);
  22.  
  23.         foreach (Match match in matches)
  24.         {
  25.             if (match.Groups[1].ToString() != string.Empty)
  26.             {
  27.                 Console.WriteLine(match.Groups[1].ToString());
  28.             }
  29.             else if (match.Groups[2].ToString() != string.Empty)
  30.             {
  31.                 Console.WriteLine(match.Groups[2].ToString());
  32.             }
  33.             else if (match.Groups[3].ToString() != string.Empty)
  34.             {
  35.                 Console.WriteLine(match.Groups[3].ToString());
  36.             }
  37.         }
  38.     }
  39. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement