Advertisement
Guest User

Untitled

a guest
Apr 27th, 2017
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.59 KB | None | 0 0
  1. ############ These can be changed ########################
  2. #Homepage to start the crawl - n.b. the / forwardslash matters....
  3. $homepage = ""
  4.  
  5. #Used as validation for relative links, this also stops the site crawling outside of this domain (by breaking the URL for anythign other #than $rawdomain)
  6. $rawdomain = ""
  7. $outputfile = "c:\scripts\listofURLs.txt"
  8.  
  9.  
  10. #Depth to drill into the website, this is actually the amount of times to loop through the hashtable (While adding new entries as we go)
  11. $loopnumber = 10
  12. #write-host $loopnumber
  13.  
  14. ############ End # These can be changed # End ############
  15.  
  16. #Arrays to contains the data
  17. $UrlHash = @{}
  18. $TempURLHash =@{}
  19. $TempURLHash1 =@{}
  20.  
  21. #This does the raw crawl of $urls passed into it
  22. function global:FindURL($url){
  23. return @((invoke-webrequest -uri $url).links.href)
  24. write-host $url
  25. }
  26.  
  27.  
  28. #Add everything after 1st level to the hashtable
  29. function CallFromTheHash ($HashURL) {
  30. FindUrl -url $HashURL Where-Object { -not $UrlHash[$_] } | ForEach-Object { $UrlHash[$_] = $_ }
  31.  
  32. #Logging to screen
  33. get-date
  34. $UrlHash.count
  35. write-host $HashURL
  36. }
  37.  
  38.  
  39. #Add homepage to hashtable
  40. $UrlHash[$homepage] = $homepage
  41. $UrlHash.count
  42.  
  43.  
  44. #Call funtion with $homepage and add results to hashtable
  45. FindUrl -url $rawdomain + $homepage Where-Object { -not $UrlHash[$_] } | ForEach-Object { $UrlHash[$_] = $_ }
  46.  
  47.  
  48.  
  49. #Loop through hashtable contents
  50. $i = 2
  51. For(;$i -le $loopnumber; )
  52. {
  53.  
  54. #clone the $urlhash hash table to another hashtable so we can loop through and pass to CallFromTheHash where it's added to $urlhash
  55. $masterArray = $urlhash.keys
  56. foreach ($halfurl in $masterArray) {
  57. Where-Object{ -not $tempURLHash[$halfurl]} | $TempURLHash += @{$halfurl = $halfurl}
  58. }
  59.  
  60. #Add domain to urls in the hashtable
  61. foreach ($newhalfurl in $tempURLHash.keys) {
  62. #Add domain to URL - validates that the crawl stays on the $rawdomain and all upper domains
  63. if(-not($newhalfurl.contains($rawdomain))){
  64. $fullhashurl = $rawdomain + $newhalfurl}
  65. CallFromTheHash $fullhashurl
  66.  
  67. }
  68. #Hit the loop again - see $loopnumber for the loop count - this will dive another layer into the site, got as deep as you like.
  69. Write-Host "Looping AGAIIIIINNNN" -ForegroundColor red -backgroundcolor DarkYellow
  70. $i++
  71. }
  72.  
  73. #Get contents of $urlhash and append it into $TempURLHas1 ready for logging
  74. foreach ($urlfrag in $urlhash.keys) {
  75. $fullurl = $urlfrag
  76. $TempURLHash1 += @{$fullurl = $fullurl}
  77. $TempURLHash1.count
  78. }
  79.  
  80.  
  81.  
  82. #Output of URL results
  83. $TempURLHash1 | out-file -width 900 $outputfile
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement