Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # get all images from a chan board or any basic html page
- # packages required : sudo apt install -y lynx* aria* parallel* exiftool* zenity
- # usage : get_media.sh
- # media will be downloaded in the current directory you're running this command in
- user_input=$(zenity --entry)
- if [ $? = 0 ]; then
- echo "User has entered links + content"
- echo "$user_input" > /tmp/links.txt
- # media to download
- exts=(webm mp4 jpeg txt c cpp m py h ppt pptx docx zip gz gif png jpg pdf mp3 sh webp avi mkv)
- # parallel jobs you want to use (default : half of your cores)
- cores_to_use=$(($(nproc)/2))
- # extract only URLs from the supplied file consisting of text+content
- cat /tmp/links.txt | grep -Eo "(http|https)://[a-zA-Z0-9./?=_%:-]*" | sort -u > links_clean.txt
- # extract media from the URLs
- tmpfile=".urls.tmp"
- while read url ; do lynx -dump -listonly -nonumbers "$url" | sort -u >> "$tmpfile" ; done < links_clean.txt
- # download the extracted media
- for ext in ${exts[*]}; do grep "\.$ext$" "$tmpfile" | tee -a $ext\_links.txt | aria2c --split="$cores_to_use" --max-connection-per-server="$cores_to_use" --max-concurrent-downloads="$cores_to_use" -c -i - -d "$ext"; done
- # clean up
- rm .urls.tmp
- rm *.txt
- #moves the media into folders (tree structure) based on the year, month, and day
- for d in ./*/ ; do (cd "$d" && exiftool "-Directory<FileModifyDate" -d "%Y/%Y-%m-%d" .); done
- else
- echo "User has pressed cancel"
- fi
Advertisement
Add Comment
Please, Sign In to add comment