Guest User

Untitled

a guest
Jun 20th, 2018
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.06 KB | None | 0 0
  1. protected void CrawlUri(object o)
  2. {
  3.  
  4. try
  5. {
  6. Interlocked.Increment(ref _threadCount);
  7. Uri uri = (Uri)o;
  8.  
  9. foreach (Match match in _regex.Matches(GetWebResponse(uri)))
  10. {
  11. Uri newUri = new Uri(uri, match.Value);
  12.  
  13. if (!_uriCollection.Contains(newUri))
  14. {
  15. _uriCollection.Add(newUri);
  16. ThreadPool.QueueUserWorkItem(_waitCallback, newUri);
  17. }
  18. }
  19. }
  20. catch
  21. {
  22. // Handle exceptions
  23. }
  24. finally
  25. {
  26. Interlocked.Decrement(ref _threadCount);
  27. }
  28.  
  29. // If there are no more threads running then signal the waithandle
  30. if (_threadCount == 0)
  31. _eventWaitHandle.Set();
  32. }
  33.  
  34. // Request first page (based on host)
  35. Uri root = new Uri(context.Request.Url.GetLeftPart(UriPartial.Authority));
  36.  
  37. // Begin threaded crawling of the Uri
  38. ThreadPool.QueueUserWorkItem(_waitCallback, root);
  39. Thread.Sleep(5000); // TEMP SOLUTION: Sleep for 5 seconds
  40. _eventWaitHandle.WaitOne();
  41.  
  42. // Server the Xml Sitemap
  43. context.Response.ContentType = "text/xml";
  44. context.Response.Write(GetXml().OuterXml);
  45.  
  46. public class CrawlUriTool
  47. {
  48. private Regex regex;
  49. private int pendingRequests;
  50. private List<Uri> uriCollection;
  51. private object uriCollectionSync = new object();
  52. private ManualResetEvent crawlCompletedEvent;
  53.  
  54. public List<Uri> CrawlUri(Uri uri)
  55. {
  56. this.pendingRequests = 0;
  57. this.uriCollection = new List<Uri>();
  58. this.crawlCompletedEvent = new ManualResetEvent(false);
  59. this.StartUriCrawl(uri);
  60. this.crawlCompletedEvent.WaitOne();
  61.  
  62. return this.uriCollection;
  63. }
  64.  
  65. private void StartUriCrawl(Uri uri)
  66. {
  67. Interlocked.Increment(ref this.pendingRequests);
  68.  
  69. HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
  70.  
  71. request.BeginGetResponse(this.UriCrawlCallback, request);
  72. }
  73.  
  74. private void UriCrawlCallback(IAsyncResult asyncResult)
  75. {
  76. HttpWebRequest request = asyncResult.AsyncState as HttpWebRequest;
  77.  
  78. try
  79. {
  80. HttpWebResponse response = (HttpWebResponse)request.EndGetResponse(asyncResult);
  81.  
  82. string responseText = this.GetTextFromResponse(response); // not included
  83.  
  84. foreach (Match match in this.regex.Matches(responseText))
  85. {
  86. Uri newUri = new Uri(response.ResponseUri, match.Value);
  87.  
  88. lock (this.uriCollectionSync)
  89. {
  90. if (!this.uriCollection.Contains(newUri))
  91. {
  92. this.uriCollection.Add(newUri);
  93. this.StartUriCrawl(newUri);
  94. }
  95. }
  96. }
  97. }
  98. catch (WebException exception)
  99. {
  100. // handle exception
  101. }
  102. finally
  103. {
  104. if (Interlocked.Decrement(ref this.pendingRequests) == 0)
  105. {
  106. this.crawlCompletedEvent.Set();
  107. }
  108. }
  109. }
  110. }
Add Comment
Please, Sign In to add comment