Advertisement
coffeeAnon

4chan Thread Archiver

Jan 3rd, 2020
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 5.29 KB | None | 0 0
  1. #!/bin/bash
  2. # 4chan Thread Archiver
  3. # Download a fully functional offline copy of a 4chan thread
  4. # Usage: ./archiver.sh [URL] [OutputDir] [Interval in s] [thumbnails/full 0/1] [only download once = s]
  5. # Depends: bash, wget
  6. # License: shove it up your arse for all I care
  7.  
  8. # happy colours
  9. GR='\033[38;5;82m' ; PN='\033[38;5;171m' ; BL='\033[38;5;27m' ; RD='\033[0;31m' ; PR='\033[0;35m' ; CY='\033[0;36m' ; NC='\033[0m'
  10. # ${}
  11.  
  12. # help screen
  13. if [[ ! $1 || "$1" == "-h" || "$1" == "--help" ]]; then
  14.     echo -e "\n${GR}4chan Thread Archiver${NC}\nDownload a fully functional offline copy of a 4chan thread\n\nUsage:    ./${GR}Archiver.sh${NC} [URL] ${BL}[OutputDir]${NC} ${PR}[update interval[s]]${NC} ${CY}[thumbnails/full 0/1]${NC} ${PN}[s for snapshot]${NC}\nDefaults: ./${GR}Archiver.sh${NC} [none] ${BL}[name of thread]${NC} ${PR}[120]${NC} ${CY}[1]${NC}\ninput nothing or d to set a variable to default\nentering s as the ${PN}5th argument${NC} will download the thread once\n\nExample:  ./${GR}Archiver.sh${NC} https://boards.4channel.org/a/thread/197436238 ${BL}d${NC} ${PR}60${NC} ${CY}0${NC}\n${GR}saves thread${NC} 197436238 to the ${BL}default directory${NC} with an ${PR}update interval of 60 seconds${NC} and it will ${CY}only save thumbnails${NC}.\n"
  15.         exit
  16. fi
  17.  
  18. # Check for wget
  19. if [[ ! $(wget -h) ]]; then
  20.     echo "This script requires wget. $sudo apt-get install wget or whatever equivalent of that on your outlandish distro."
  21.     exit
  22. fi
  23.  
  24. # enter a custom directory from where the commands will be executed
  25. Path="/mnt/j/User/bilder/aImportant/Screencaps/a/Archived_threads/"
  26.  
  27. URL="$1"
  28. OutputDir="$2"
  29. Interval=$3
  30. Images="$4"
  31. Snapshot="$5"
  32.  
  33. # check if thread is available
  34. if [[ ! $(wget -q -O - $URL) ]]; then
  35.     echo -e "\n${RD}Thread not found${NC}\n"
  36.     exit
  37. else
  38.     echo -e "\nThread found\n"
  39. fi
  40.  
  41. # replace empty arguments with default values
  42. if [[ ! $2 || "$2" == "d" ]]; then
  43.     OutputDir=$(wget -q -nv -O - "$URL" | grep -oE "<title>.*</title>" | sed 's/<title>//' | sed 's/<\/title>//' | sed 's/[[:punct:]]//g' | sed $'s/[^[:print:]\t]//g' | sed -e 's/ /_/g' | sed -e 's/__4chan//')
  44. fi
  45. if [[ ! $3 || "$3" == "d" ]]; then
  46.     Interval="120"
  47. fi
  48. if [[ ! $4 || $4 -ge "1" || "$4" == "d" ]]; then
  49.     Hosts="i.4cdn.org,is2.4chan.org,s.4cdn.org"
  50. else
  51.     Hosts="i.4cdn.org --accept-regex s.jpg$"
  52. fi
  53.  
  54. # confirm user input
  55. # unnecessarily convoluted string edit commands
  56. echo -e "current working directory: \n$Path"
  57. echo -e "Thread ${GR}\"$(wget -q -nv -O - "$URL" | grep -oE "<title>.*</title>" | sed 's/<title>//' | sed 's/<\/title>//' | grep -oE "\- .* \-" | sed 's/\- //' | sed 's/\0x20 \-//')\"${NC} will be downloaded to ${BL}$OutputDir${NC}\n"
  58. confirm=3
  59. while [ $confirm -ge 0 ]; do
  60.     echo -ne "press ctrl+C to cancel $confirm\033[0K\r"
  61.     sleep 1
  62.     : $((confirm--))
  63. done
  64.  
  65. echo -e "\n\npreparing download"
  66.  
  67. # if custom save directory was specified, execute wget from there
  68. if [[ $Path ]]; then
  69. cd $Path
  70. fi
  71.  
  72. # create folder
  73. if [[ ! -d "$OutputDir" ]]; then
  74.         mkdir "$OutputDir"
  75.         echo "created folder $OutputDir"
  76. fi
  77.  
  78. # attempt to dowload stylesheets and js
  79. while [[ ! -f ${OutputDir}"/yotsubanew.692.css" ]]; do
  80.     wget -P $OutputDir -nd -r -l 1 -H -D i.4cdn.org,is2.4chan.org,s.4cdn.org -R gif,webm,png,jpg -p -k -N -c --adjust-extension -q $URL
  81. done
  82. echo -e "downloaded stylesheets\n\ncommencing download of thread\n"
  83.  
  84. # define download function
  85. DownloadThread () {
  86.     # the wget command: recursive on one level (-r -l 1) across specified hosts (-H -D),
  87.     # get all required files (-p) replace all links to files with local file path (-k),
  88.     # only download new files (-N), continue interrupted downloads (-c).
  89.     # -nd for no subdirectories, -nv for less output
  90.     wget -P $OutputDir -nd -nv -r -l 1 -H -D $Hosts --reject-regex robots.txt -p -k -N -c --adjust-extension $URL
  91.     # fix mismatched extensions in the html
  92.     cd "$OutputDir"
  93.     find -type f -name \*.html | sed 's/jpg.html/jpg/g' | sed 's/png.html/png/g' | sed 's/webm.html/webm/g' | sed 's/gif.html/gif/g' | sed 's/txt.html/txt/g' | sed 's/txt.tmp.html/txt/g'
  94.     cd ..
  95. }
  96.  
  97. # if userinput $5 was "s", divert path to single download
  98. if [[ $5 == "s" ]]; then
  99.     echo "saving snapshot"
  100.     DownloadThread
  101.     echo -e "\n${GR}Download completed.${NC}\n"
  102.     exit
  103. fi
  104.  
  105. # download thread at an interval until the cycle after archived.gif was downloaded (the thread was archived)
  106. while [[ ! -f ${OutputDir}"/archived.gif" ]]; do
  107.     # check if thread still exists
  108.     if [[ ! $(wget -q -O - $URL) ]]; then
  109.         echo -e "\n${RD}Thread was 404'd. Probably mods being faggots. go throw them a complaint over at irc.rizon.net/4chan\nLiveArchiver stopped.${NC}\nUnfortunately, this might have broken some of the image links and I am unable to get sed to cooperate, so you'll have to open the html with a text editor and search and replace all instances and variations of https://i.4cdn.org/[BOARD]/ for the following domains:\n  i.4cdn.org\n  is2.4chan.org\n  s.4cdn.org\nwith blank for the archived html to properly display images.\nI apologise for the inconvinience."
  110.         exit
  111.     fi
  112.     DownloadThread
  113.     echo ""
  114.     cntdwn=$(($Interval))
  115.     while [ $cntdwn -ge 0 ]; do
  116.         echo -ne "Cycle completed, sleeping for $cntdwn seconds\033[0K\r"
  117.         sleep 1
  118.         : $((cntdwn--))
  119.     done
  120.     echo ""
  121. done
  122.  
  123. echo -e "\n${GR}Thread was Archived.\nDownload has been completed. LiveArchiver stopped.${NC}\n"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement