Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <#
- Sequential Web Scraper (for PowerShell) v1.0
- Written by Aaron Loessberg-Zahl
- Last modified 20 July 2013
- Scrapes images/files/etc that are named sequentially. Will properly skip files
- that return a HTTP 404 error.
- For comments/questions/bugs, please contact
- <[email protected]> or <[email protected]>.
- ----------------------------------------------------------------------------
- "THE BEER-WARE LICENSE" (Revision 2659):
- <[email protected]> wrote this file. As long as you retain this
- notice, you can do whatever you want with this stuff. If we meet some day,
- and you think this stuff is worth it, you can buy me a beer in return.
- ----------------------------------------------------------------------------
- Changelog:
- v1.0 07-20-2013 amloessb Created and debugged
- #>
- Param (
- [String] $BaseURL,
- [String] $Separators,
- [Int] $Padding,
- [Int] $EndNumber,
- [String] $SaveTo
- )
- Function butLast ([Array] $arr, [String] $sep) {
- $return = ""
- $num = 0
- If ($sep) {
- While ($num -le ($arr.Length - 2)) {
- $return += $arr[$num] + $sep
- $num ++
- }
- Return $return
- }
- Return $arr[0..($arr.Length - 2)]
- }
- $ErrorActionPreference = "Stop"
- Add-Type -AssemblyName System.Web
- If (!($SaveTo.EndsWith("\"))) {
- $SaveTo += "\"
- }
- $splitURL = $BaseURL.split("/")
- $filename = $splitURL[-1]
- $arrURL = $filename.split($Separators, [StringSplitOptions]'RemoveEmptyEntries')
- $currentLink = [Int] $arrURL[-2]
- $wc = New-Object system.Net.WebClient
- $badLinks = 0
- While ($currentLink -le $EndNumber) {
- If ($arrURL.Length -gt 2) {
- $url = (butLast $splitURL "/") + $arrURL[0] + $Separators[0] + "{0:D$padding}" -f $currentLink + $Separators[1] + $arrURL[-1]
- } Else {
- $url = (butLast $splitURL "/") + $Separators[0] + "{0:D$padding}" -f $currentLink + $Separators[1] + $arrURL[-1]
- }
- $nameOnDisk = $url.split("/")[-1]
- Write-Progress -Activity "Sequential Web Scraper" -Status "Scraping files..." -CurrentOperation $nameOnDisk
- Try {
- $wc.DownloadFile($url, $SaveTo + $nameOnDisk)
- } Catch {
- $badLinks ++
- }
- $currentLink ++
- }
- Write-Progress -Activity "Sequential Web Scraper" -Status "Completed" -Completed
Advertisement
Add Comment
Please, Sign In to add comment