Advertisement
dimipan80

Extract Hyperlinks

May 13th, 2015
263
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 1.92 KB | None | 0 0
  1. /* Write a JavaScript function to extract all hyperlinks (<href=…>) from given text. The text comes as array of strings, passed as parameter to your function. Print at the console the href values in the text. The input text is standard HTML code. It may hold many tags and can be formatted in many different forms (with or without whitespace). The <a> elements may have many attributes, not only href. You should extract only the values of the href attributes of all <a> elements. Print at the console the href values in the text, each at a separate line, in the order they come from the input. The input will be well formed HTML fragment (all tags and attributes will be correctly closed). Attribute values will never hold tags and hyperlinks, e.g. "<img alt='<a href="hello">' />" is invalid. Commented links are also extracted. */
  2.  
  3. namespace _06.Extract_Hyperlinks
  4. {
  5.     using System;
  6.     using System.Text;
  7.     using System.Text.RegularExpressions;
  8.  
  9.     class ExtractHyperlinks
  10.     {
  11.         static void Main(string[] args)
  12.         {
  13.             StringBuilder htmlText = new StringBuilder();
  14.             StringBuilder inputLine = new StringBuilder(Console.ReadLine());
  15.             while (inputLine.ToString() != "END")
  16.             {
  17.                 htmlText.Append(inputLine);
  18.                 inputLine.Clear();
  19.                 inputLine.Append(Console.ReadLine());
  20.             }
  21.  
  22.             const string pattern = @"<a\s+([^>]+\s+)?href\s*=\s*('([^']*)'|""([^""]*)|([^\s>]+))[^>]*>";
  23.  
  24.             MatchCollection matches = Regex.Matches(htmlText.ToString(), pattern);
  25.             foreach (Match match in matches)
  26.             {
  27.                 Console.WriteLine((match.Groups[3].Value.ToString() != string.Empty) ?
  28.                     match.Groups[3].Value :
  29.                     (match.Groups[4].Value.ToString() != string.Empty) ?
  30.                     match.Groups[4].Value : match.Groups[5].Value);
  31.             }
  32.         }
  33.     }
  34. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement