Advertisement
klippa

WebCrawl

Aug 17th, 2022
941
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 0.72 KB | None | 0 0
  1. using System.Threading.Tasks;
  2.  
  3. class Solution {
  4.     public IList<string> Crawl(string startUrl, HtmlParser htmlParser) {
  5.         var uri = new Uri(startUrl);
  6.         var visited = new HashSet<string>();
  7.         var queue = new List<string>();
  8.         queue.Add(startUrl);
  9.  
  10.         while (queue.Count > 0)
  11.         {
  12.             var nextWave = new List<string>();
  13.             Parallel.ForEach(queue, (next) =>
  14.             {
  15.                 if (visited.Contains(next) || !next.StartsWith($"http://{uri.Host}")) return;
  16.  
  17.                 visited.Add(next);
  18.                 nextWave.AddRange(htmlParser.GetUrls(next));
  19.             });
  20.             queue = nextWave;
  21.         }
  22.  
  23.         return visited.ToArray();
  24.     }
  25. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement