Guest User

get_media.sh

a guest
Oct 10th, 2021
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.43 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # get all images from a chan board or any basic html page
  4. # packages required : sudo apt install -y lynx* aria* parallel* exiftool* zenity
  5. # usage : get_media.sh
  6. # media will be downloaded in the current directory you're running this command in
  7.  
  8. user_input=$(zenity --entry)
  9. if [ $? = 0 ]; then
  10.  
  11. echo "User has entered links + content"
  12. echo "$user_input" > /tmp/links.txt
  13.  
  14. # media to download
  15. exts=(webm mp4 jpeg txt c cpp m py h ppt pptx docx zip gz gif png jpg pdf mp3 sh webp avi mkv)
  16.  
  17. # parallel jobs you want to use (default : half of your cores)
  18. cores_to_use=$(($(nproc)/2))
  19.  
  20. # extract only URLs from the supplied file consisting of text+content
  21. cat /tmp/links.txt | grep -Eo "(http|https)://[a-zA-Z0-9./?=_%:-]*" | sort -u > links_clean.txt
  22.  
  23. # extract media from the URLs
  24. tmpfile=".urls.tmp"
  25. while read url ; do lynx -dump -listonly -nonumbers "$url" | sort -u >> "$tmpfile" ; done < links_clean.txt
  26.  
  27. # download the extracted media
  28. for ext in ${exts[*]}; do grep "\.$ext$" "$tmpfile" | tee -a $ext\_links.txt | aria2c --split="$cores_to_use" --max-connection-per-server="$cores_to_use" --max-concurrent-downloads="$cores_to_use" -c -i - -d "$ext"; done
  29.  
  30. # clean up
  31. rm .urls.tmp
  32. rm *.txt
  33.  
  34. #moves the media into folders (tree structure) based on the year, month, and day
  35. for d in ./*/ ; do (cd "$d" && exiftool "-Directory<FileModifyDate" -d "%Y/%Y-%m-%d" .); done
  36.  
  37. else
  38.     echo "User has pressed cancel"
  39. fi
Advertisement
Add Comment
Please, Sign In to add comment