amloessb

Scrape-Files.ps1

Jul 20th, 2013
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <#
  2. Sequential Web Scraper (for PowerShell) v1.0
  3. Written by Aaron Loessberg-Zahl
  4. Last modified 20 July 2013
  5.  
  6. Scrapes images/files/etc that are named sequentially.  Will properly skip files
  7. that return a HTTP 404 error.
  8.  
  9. For comments/questions/bugs, please contact
  10.  
  11. ----------------------------------------------------------------------------
  12. "THE BEER-WARE LICENSE" (Revision 2659):
  13. <[email protected]> wrote this file. As long as you retain this
  14. notice, you can do whatever you want with this stuff. If we meet some day,
  15. and you think this stuff is worth it, you can buy me a beer in return.
  16. ----------------------------------------------------------------------------
  17.  
  18. Changelog:
  19. v1.0    07-20-2013      amloessb        Created and debugged
  20. #>
  21.  
  22. Param (
  23.     [String] $BaseURL,
  24.     [String] $Separators,
  25.     [Int] $Padding,
  26.     [Int] $EndNumber,
  27.     [String] $SaveTo
  28. )
  29.  
  30. Function butLast ([Array] $arr, [String] $sep) {
  31.     $return = ""
  32.     $num = 0
  33.     If ($sep) {
  34.         While ($num -le ($arr.Length - 2)) {
  35.             $return += $arr[$num] + $sep
  36.             $num ++
  37.         }
  38.         Return $return
  39.     }
  40.     Return $arr[0..($arr.Length - 2)]
  41. }
  42.  
  43. $ErrorActionPreference = "Stop"
  44.  
  45. Add-Type -AssemblyName System.Web
  46.  
  47. If (!($SaveTo.EndsWith("\"))) {
  48.     $SaveTo += "\"
  49. }
  50.  
  51. $splitURL = $BaseURL.split("/")
  52. $filename = $splitURL[-1]
  53. $arrURL = $filename.split($Separators, [StringSplitOptions]'RemoveEmptyEntries')
  54. $currentLink = [Int] $arrURL[-2]
  55.  
  56. $wc = New-Object system.Net.WebClient
  57.  
  58. $badLinks = 0
  59.  
  60. While ($currentLink -le $EndNumber) {
  61.     If ($arrURL.Length -gt 2) {
  62.         $url = (butLast $splitURL "/") + $arrURL[0] + $Separators[0] + "{0:D$padding}" -f $currentLink + $Separators[1] + $arrURL[-1]
  63.     } Else {
  64.         $url = (butLast $splitURL "/") + $Separators[0] + "{0:D$padding}" -f $currentLink + $Separators[1] + $arrURL[-1]
  65.     }
  66.     $nameOnDisk = $url.split("/")[-1]
  67.     Write-Progress -Activity "Sequential Web Scraper" -Status "Scraping files..." -CurrentOperation $nameOnDisk
  68.     Try {
  69.         $wc.DownloadFile($url, $SaveTo + $nameOnDisk)
  70.     } Catch {
  71.         $badLinks ++
  72.     }
  73.     $currentLink ++
  74. }
  75.  
  76. Write-Progress -Activity "Sequential Web Scraper" -Status "Completed" -Completed
Advertisement
Add Comment
Please, Sign In to add comment