Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- USER_AGENT="Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27"
- SAVE_HOST="www.vice.com"
- WARC_NAME="www.vice.com-panicgrab-20220222"
- AUTHOR_SLUG="author-name" # <<< CHANGE THIS
- for X in {1..42}; do # <<< CHANGE THIS range from 1..42 to however many pages the author has
- wget \
- -e robots=off -r -l 1 --page-requisites --accept-regex=article -D www.vice.com,video-images.vice.com -H \
- --waitretry 5 --timeout 60 --tries 5 --wait 1 -k \
- --warc-header "operator: Archive Team" --warc-cdx --warc-file="$WARC_NAME.$X" \
- -U "$USER_AGENT" https://www.vice.com/en/contributor/$AUTHOR_SLUG?page=$X
- done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement