Advertisement
Guest User

Untitled

a guest
Apr 28th, 2017
311
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 1.07 KB | None | 0 0
  1. //download html
  2.  
  3. WebClient wb = new WebClient();
  4.  
  5. string html = wb.DownloadString("http://www.stackoverflow.com");
  6.  
  7. // regex test
  8.  
  9. Stopwatch regexStopwatch = Stopwatch.StartNew();
  10.  
  11. Match m = Regex.Match(html, "href\\s*=\\s*(?:[\"'](?<1>[^\"']*)[\"']|(?<1>\\S+))", RegexOptions.IgnoreCase);
  12.  
  13. List<string> ListOfURLs = new List<string>();
  14.  
  15. while (m.Success)
  16. {
  17.     string urlChecked = m.Groups[1].ToString().Trim();
  18.     if (!string.IsNullOrEmpty(urlChecked)) ListOfURLs.Add(urlChecked);
  19.     m = m.NextMatch();
  20. }
  21.  
  22. regexStopwatch.Stop();
  23.  
  24. Console.WriteLine(regexStopwatch.ElapsedMilliseconds);
  25.  
  26. // agility test
  27.  
  28. Stopwatch agilityStopwatch = Stopwatch.StartNew();
  29.  
  30. HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
  31.  
  32. htmlDoc.OptionFixNestedTags = true;
  33.  
  34. htmlDoc.LoadHtml(html);
  35.  
  36. List<string> linkList = htmlDoc.DocumentNode.SelectNodes("//a[@href]").Select(n => n.GetAttributeValue("href", string.Empty).Trim()).Where(link => !string.IsNullOrEmpty(link)).ToList();
  37.  
  38. agilityStopwatch.Stop();
  39.  
  40. Console.WriteLine(agilityStopwatch.ElapsedMilliseconds);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement