Advertisement
someone_

threaddl

Jan 21st, 2015
305
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2.  
  3. name=$(basename $0)
  4.  
  5. usage () {
  6.     echo "$name - 4chan thread downloader"
  7.     echo ""
  8.     echo "Usage: $name [4chan thread URL] <time value and/or return target>"
  9.     echo ""
  10.     echo "  <time value>"
  11.     echo "     1: run script once"
  12.     echo "     10 - 999: Set the initial"
  13.     echo "      waiting time between runs"
  14.     echo "     Default: 10 seconds"
  15.     echo ""
  16.     echo "  <return target>"
  17.     echo "     Set the target path of the 'Return'"
  18.     echo "      link"
  19.     echo "     Default: ./"
  20.     exit 0
  21. }
  22.  
  23. [ $# -eq 0 ] && usage
  24.  
  25. for arg in $@; do
  26.     urltest=$(echo "$arg" | sed -e "s_^https_http_" | egrep -o "^http://boards.4chan.org/[a-z0-9]+/thread/[0-9]+")
  27.     timetest=$(echo "$arg" |egrep "^[0-9]{1,3}$")
  28.     if [ $urltest ]; then
  29.         URL="$urltest"
  30.     elif [ $timetest ]; then
  31.         SLP="$arg"
  32.     else
  33.         RET="$arg"
  34.     fi
  35. done
  36.  
  37. [ ! $URL ] && usage
  38.  
  39. echo "4chan downloader"
  40.  
  41. LOC=$(echo "$URL" | sed 's_.*/\([^/]\+\)/thread/\([0-9]\+\)_\1\_\2_')
  42.  
  43. if [ ! $LOC ]; then
  44.     echo "Can't determine the thread's number"
  45.     echo "Use valid URL without hash or search tags"
  46.     echo ""
  47.     usage
  48. fi
  49.  
  50. ST="s.4cdn.org"
  51. [ ! $SLP ] && SLP="10"
  52. [ ! $RET ] && RET="./"
  53. SLAP=$SLP
  54. NO=$(echo "$LOC" | grep -o '[0-9]\+$')
  55. BO=$(echo "$LOC" | grep -o '^[^_]\+')
  56. LM=""
  57. alias wget="wget --referer=\"http://boards.4chan.org/"$BO"\""
  58.  
  59. thejob () {
  60.     if [ ! -d $LOC ]; then
  61.         mkdir $LOC
  62.     fi
  63.  
  64.     if [ ! -d $LOC/misc ]; then
  65.         mkdir $LOC/misc
  66.     fi
  67.  
  68.     cd $LOC/misc
  69.  
  70.     touch images_list
  71.  
  72.     egrep "File: <a[^>]*>[^<]*</a>[^<]*" ../../$LOC.html -o | sed -e 's_^File: <a title="\([^"]*\)" href="[^"]*/\([0-9]\+\....\)"[^/]*/a> (\([^)]\+\))$_\2|\3|\1_g' -e 's_^File: <a href="[^"]*/\([0-9]\+\....\)[^>]*>\([^<]*\)</a> (\([^)]\+\))$_\1|\3|\2_g' -e '/^$/d' -e '$ s_$_\n_' > a
  73.  
  74.     cat images_list a | sed -e '$ s@$@\n'$(ls|grep spoiler)'@' | sort | uniq | sed -e '/^\s*$/d' > images_list
  75.  
  76.     rm a
  77.  
  78.     if [ ! -s gallery.html ]; then
  79.         cat <<EOF > gallery.html
  80. <!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="viewport" content="width=device-width, user-scalable=yes, initial-scale=1.0" /><title>Image gallery</title><style type="text/css">/*<![CDATA[*/.loading{display:inline-block;width:0;height:0;border-right:20px solid #39f;border-top:20px solid red;border-left:20px solid yellow;border-bottom:20px solid green;border-radius:20px;-moz-border-radius:20px;-webkit-border-radius:20px;animation:bganim 1.5s ease 0s infinite;-moz-animation:bganim 1.5s ease 0s infinite;-webkit-animation:bganim 1.5s ease 0s infinite}@keyframes bganim{from{transform:rotate(0deg)}to{transform:rotate(360deg)}}@-moz-keyframes bganim{from{-moz-transform:rotate(0deg)}to{-moz-transform:rotate(360deg)}}@-webkit-keyframes bganim{from{-webkit-transform:rotate(0deg)}to{-webkit-transform:rotate(360deg)}}html,body{width:100%;height:100%;margin:0;font-family:Helvetica,Arial,Verdana,sans-serif;text-align:center}div:not(.loading){text-align:center;max-width:250px;padding:5px;border:1px solid black;clear:both;display:inline-block;margin-top:5px}img{max-width:250px;max-height:250px}s{margin-top:3px;display:block;word-wrap:break-word;text-decoration:none}table{border:0;width:100%;height:100%}td{width:50%;text-align:left}td:first-of-type{text-align:right}.middle{vertical-align:middle}/*]]>*/</style></head><body><table><tr><td><div class="loading"></div></td><td class="middle">Loading...</td></tr></table><script type="text/javascript">/*<![CDATA[*/var d=document,\$=function(a){return d.querySelector(a)},html="<h1>Image gallery</h1>",x=new XMLHttpRequest(),td=\$(".middle"),r;x.onreadystatechange=function(){if(x.readyState==4){if(x.status==200){td.innerHTML="Parsing gallery...";var b = x.responseText.split("\n");b.some(function(a){if(b.lastIndexOf(a)==0&&a.indexOf('|')==-1)return;a=a.split("|");if(a.length>1){html+='<div><a href="../'+a[0]+'"><img src="'+a[1].indexOf('Spoiler')==0?b[b.length-1]:(a[0].split(".")[0]+"s.jpg")+'" alt="Image" /></a><s>'+a[1]+'</s><s>'+a[2]+'</s></div>'}return});\$("body").innerHTML=html}else{\$("td").innerHTML="Er<br>Are you trying to";td.innerHTML="ror<br>open this offline?"}}};x.open("GET","images_list",true);x.setRequestHeader("Cache-Control","no-cache");td.innerHTML="Fetching images list...";x.send(null)//]]></script></body></html>
  81. EOF
  82.     fi
  83.  
  84.     cd ../..
  85.  
  86.     egrep "//.\.t\.4cdn\.org/[^.]+\.jpg" $LOC.html -o | sed 's_^//_http:&_g' > $LOC/misc/misc
  87.  
  88.     egrep "//${ST}/image/[^.]+\...." $LOC.html -o | sed 's_^//_http:&_g' | uniq >> $LOC/misc/misc
  89.  
  90.     egrep "//${ST}/image/country/[^.]+\...." $LOC.html -o | sed 's_^//_http:&_g' >> $LOC/misc/misc
  91.  
  92.     egrep "//${ST}/css/[a-z]+\.[0-9]+\.css" $LOC.html -o | sed -e 's_^//_http:&_' | head -n1 > $LOC/misc/css
  93.  
  94.     egrep "//${ST}/css/[a-z]+\.[0-9]+\.css" $LOC.html -o | sed -e 's_^//_http:&_' | egrep 'tomorrow|prettify' >> $LOC/misc/css
  95.  
  96.     egrep 'data-src="[^.]+\.[^"]+' $LOC.html -o | sed 's_^data.src."_http://'$ST'/image/title/_' | head -n1 > $LOC/misc/logo
  97.  
  98.     egrep "//i\.4cdn\.org/[^.]+\.(jpg|png|gif|webm)" $LOC.html -o | sed 's_^//_http:&_g' > $LOC/images
  99.  
  100.     sed -i -e 's@\(</head>\)@\n\1@' $LOC.html
  101.  
  102.     mv $LOC.html a
  103.  
  104.     head -n1 a > $LOC.html
  105.  
  106.     cat << EOF >> $LOC.html
  107. <!<script>/*<![CDATA[*/var d=document,dE=d.documentElement,lS=localStorage,z,i,u="threaddl_arch_theme",e;Element.prototype.gA=function(a){return this.getAttribute(a);};Element.prototype.pN=function(){return this.parentNode;};String.prototype.iO=function(a){return this.indexOf(a);};function s(a,b,c){a.setAttribute(b,c);}function sT(b){lS.setItem(u,b);for(i=0;(z=d.getElementsByTagName("link")[i]);i++){if(z.gA("rel").iO("style")!=1&&z.gA("title")){z.disabled=true;if(z.gA("title")==(b?"switch":"Tomorrow"))z.disabled=false;}}}sT(parseInt(lS.getItem(u))||0);(function(){var a,b,c,e,f,g,h,i,j,k,l=parseInt(lS.qp_opt),m,n=parseInt(lS.img_hover),o;String.prototype.reverse=function(){return this.split("").reverse().join("")};k={hover:function(a){var b,c,d,e,f,g,h;d=a.clientX,e=a.clientY;g=k.el.style;b=dE.clientHeight,c=dE.clientWidth;f=k.el.offsetHeight;h=e-120;g.top=b<=f||h<=0?"0px":h+f>=b?b-f+"px":h+"px";if(d<=c-400){g.left=d+45+"px";return g.right=null}else{g.left=null;return g.right=c-d+45+"px"}},hoverend:function(){a.rm(k.el);return delete k.el}};a=function(a,b){if(b==null){b=d.body}return b.querySelector(a)};a.extend=function(a,b){var c;for(c in b){a[c]=b[c]}};a.extend(a,{id:function(a){return d.getElementById(a)},addStyle:function(b){var c;c=a.el("style",{textContent:b});a.add(d.head,c);return c},x:function(a,b){b===null&&(b=d.body);return d.evaluate(a,b,null,8,null).singleNodeValue},addClass:function(a,b){return a.classList.add(b)},rmClass:function(a,b){return a.classList.remove(b)},rm:function(a){return a.pN().removeChild(a)},tn:function(a){return d.createTextNode(a)},nodes:function(a){var b,c,e=0;if(!(a instanceof Array)){return a}b=d.createDocumentFragment();for(;e<a.length;e++){c=a[e];b.appendChild(c)}return b},add:function(b,c){return b.appendChild(a.nodes(c))},after:function(b,c){return b.pN().insertBefore(a.nodes(c),b.nextSibling)},el:function(b,c){var e=d.createElement(b);if(c){a.extend(e,c)}return e},on:function(a,b,c){var d,e=0,f=b.split(" ");for(;e<f.length;e++){d=f[e];a.addEventListener(d,c,false)}},off:function(a,b,c){var d,e=0,f=b.split(" ");for(;e<f.length;e++){d=f[e];a.removeEventListener(d,c,false)}},visible:function(a){var b = a.getBoundingClientRect();return (b.top+b.height>=0&&(dE.clientHeight-b.bottom)+b.height>=0)}});b=function(a,b){if(b==null){b=d.body}return [].slice.call(b.querySelectorAll(a))};c={post:function(b,d){var e=a.id("pc"+b);e&&a.add(d,c.cleanPost(e.cloneNode(true)))},cleanPost:function(c){var d,e,f,g,h,i=Date.now(),j=a(".post",c),k=_j=_k=_l=0,l=[].slice.call(c.childNodes),m=b(".inline",j),n=b(".inlined",j);for(;k<l.length;k++){if((d=l[k])!==j){a.rm(d)}}for(;_j<m.length;_j++){a.rm(m[_j])}for(;_k<n.length;_k++){a.rmClass(n[_k],"inlined")}(f=b("[id]",c)).push(c);for(;_l<f.length;_l++){(e=f[_l]).id=""+i+"_"+e.id}a.rmClass(c,"forwarded");a.rmClass(c,"qphl");a.rmClass(j,"highlight");a.rmClass(j,"qphl");return c}};j={init:function(){return e.callbacks.push(this.node)},node:function(b){var c=a(".postInfo > .dateTime",b.el);if(b.isInlined){return}j.date=new Date(c.dataset.utc*1e3);c.title="4chan time: "+c.textContent;return c.textContent=j.zeroPad(j.date.getMonth()+1)+"/"+j.zeroPad(j.date.getDate())+"/"+(j.date.getFullYear()-2e3)+"("+j.day[j.date.getDay()]+")"+j.zeroPad(j.date.getHours())+":"+j.zeroPad(j.date.getMinutes())},day:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],zeroPad:function(a){if(10>a){return"0"+a}else{return a}}};g={init:function(){return e.callbacks.push(this.node)},node:function(b){var c,d,e,f,g,j,k={},n=0,m=b.quotes;if(b.isInlined){return}for(;m.length>n;n++){j=m[n];if(g=j.hash.slice(2)){k[g]=true}}c=a.el("a",{href:"#p"+b.ID,className:"backlink",textContent:">>"+b.ID});for(g in k){if(!(e=a.id("pi"+g))||/\bop\b/.test(e.pN().className)){continue}f=c.cloneNode(true);l&&a.on(f,"mouseover",i.mouseover);a.on(f,"click",h.toggle);if(!(d=a.id("blc"+g))){d=a.el("span",{className:"container",id:"blc"+g});a.add(e,d)}a.add(d,[a.tn(" "),f])}}};h={init:function(){return e.callbacks.push(this.node)},node:function(b){var c,d=_j=0,e=b.quotes,f=b.backlinks;for(;e.length>d;d++){c=e[d];if(!c.hash){continue}c.removeAttribute("onclick");a.on(c,"click",h.toggle)}for(;f.length>_j;_j++){a.on(f[_j],"click",h.toggle)}},toggle:function(b){if(b.shiftKey||b.altKey||b.ctrlKey||b.metaKey||b.button!==0){return}b.preventDefault();var c=this.dataset.id||this.hash.slice(2);if(/\binlined\b/.test(this.className)){h.rm(this,c)}else{if(a.x("ancestor::div[contains(@id,'p"+c+"')]",this)){return}h.add(this,c)}return this.classList.toggle("inlined")},add:function(b,d){var e=d,f=a.id("p"+e),g=a.el("div",{id:"i"+e,className:"inline"}),h=/\bbacklink\b/.test(b.className),i=h?b.pN():a.x("ancestor-or-self::*[parent::blockquote][1]",b);a.after(i,g);c.post(e,g);if(!f){return}if(h){a.addClass(f.pN(),"forwarded");++f.dataset.forwarded||(f.dataset.forwarded=1)}},rm:function(c,d){var e,f,g,h,i;e=a.x("following::div[@id='i"+d+"']",c);a.rm(e);i=b(".backlink.inlined",e);for(g=0,h=i.length;h>g;g++){f=i[g];e=a.id(f.hash.slice(1));if(!--e.dataset.forwarded){a.rmClass(e.pN(),"forwarded")}}if(/\bbacklink\b/.test(c.className)){e=a.id("p"+d);if(!--e.dataset.forwarded){return a.rmClass(e.pN(),"forwarded")}}}};i={init:function(){return e.callbacks.push(this.node)},node:function(b){var c,d=_j=0,e=b.quotes,f=b.backlinks;for(;e.length>d;d++){c=e[d];if(c.hash||/\bdeadlink\b/.test(c.className)){a.on(c,"mouseover",i.mouseover)}}for(;f.length>_j;_j++){a.on(f[_j],"mouseover",i.mouseover)}},mouseover:function(e){var f,g=this.hash.slice(2),h,j,l,m=0,n;if(/\binlined\b/.test(this.className)){return}if(h=a.id("qp")){if(h===k.el){delete k.el}a.rm(h)}if(k.el){return}h=k.el=a.el("div",{id:"qp",className:"reply dialog"});k.hover(e);a.add(d.body,h);f=a.id("p"+g);c.post(g,h);a.on(this,"mousemove",k.hover);a.on(this,"mouseout click",i.mouseout);if(!f){return}if(/\bop\b/.test(f.className)){a.addClass(f.pN(),"qphl")}else{a.addClass(f,"qphl")}l=a.x("ancestor::*[@id][1]",this).id.match(/\d+$/)[0];n=b(".quotelink, .backlink",h);for(;n.length>m;m++){j=n[m];if(j.hash.slice(2)===l){a.addClass(j,"forwardlink")}}},mouseout:function(b){var c;k.hoverend();if(c=a.id(this.hash.slice(1))){a.rmClass(c,"qphl");a.rmClass(c.pN(),"qphl")}a.off(this,"mousemove",k.hover);return a.off(this,"mouseout click",i.mouseout)}};m={init:function(){return e.callbacks.push(this.node)},node:function(b){if(!b.img){return}return a.on(b.img,"mouseover",m.mouseover)},mouseover:function(){var b,c;if(b=a.id("ihover")){if(b===k.el){delete k.el}a.rm(b)}if(k.el){return}c={id:'ihover',src:this.pN().href};c.src.reverse().iO('m')==0&&a.extend(c,{autoplay:true,loop:true});b=k.el=a.el(c.loop?'video':'img',c);a.add(d.body,b);a.on(b,"load",m.load);a.on(this,"mousemove",k.hover);return a.on(this,"mouseout",m.mouseout)},load:function(){var a;if(!this.pN()){return}a=this.style;return k.hover({clientX:-45+parseInt(a.left),clientY:120+parseInt(a.top)})},mouseout:function(){k.hoverend();a.off(this,"mousemove",k.hover);return a.off(this,"mouseout",m.mouseout)}};o={videos:[],init:function(){return e.callbacks.push(this.node);a.on(window,'resize scroll visibilitychange',o.videoHandler)},node:function(b){if(!b.img){return}return a.on(b.img.pN(),'click',o.cb.toggle)},cb:{toggle:function(e){if(e.shiftKey||e.altKey||e.ctrlKey||e.metaKey||e.button!==0){return}e.preventDefault();return o.toggle(this)}},toggle:function(b){var c,e=b.firstChild;if(e.hidden){c=b.getBoundingClientRect();if(d.body.gA("class")!=='i'){if(c.top<0){d.body.scrollTop+=c.top-42}if(c.left<0){d.body.scrollLeft+=c.left}}else{if(c.top<0){dE.scrollTop+=c.top-42}if(c.left<0){dE.scrollLeft+=c.left}}return o.contract(e)}else{return o.expand(e)}},contract:function(b){var c=b.nextSibling,d;b.hidden=false;c.hidden=true;if(c.loop){d=o.videos;d=d.splice(d.indexOf(a.rm(c)),1)}},expand:function(b){var c,d,e,f;if(a.x('ancestor-or-self::*[@hidden]',b)){return}b.hidden=true;if((d=b.nextSibling)&&(d.nodeName=='IMG'||(e=d.nodeName =='VIDEO'))){d.hidden=false;e&&d.play();return}c=b.pN();f=c.href;d=f.reverse().iO('m')==0?a.el('video',{src:f,autoplay:!0,loop:!0}):a.el('img',{src:f,style: 'width:100px;height:100px;'});if(d.loop){o.videos.push(d);a.on(d,'canplay',o.videoHandler)}else{!d.naturalWidth?a.on(d,'load',function(){d.removeAttribute("style")}):d.removeAttribute("style")}return a.add(c,d)},videoHandler:function(){var b,c=o.videos,v;for(b=0;c.length>b;b++){v=c[b];a.visible(v)&&!d.hidden?v.play():v.pause()}}};e={init:function(){(function(){var c=b('a.fileThumb'),i=0;/WebKit/.test(navigator.userAgent)||s(d.body,'class','i');for(;c.length>i;i++){var n=a.nodes([a.tn(' '),a.el('a',{textContent:'google',href:'http://www.google.com/searchbyimage?image_url='+c[i].href,target:'_blank'})]);-1!==location.protocol.iO('http')&&c[i].previousElementSibling.appendChild(n)}for(c=b('span.abbr a'),i=0;c.length>i;i++){s(c[i],'onclick','javascript:e=this.pN().pN().lastChild.style;e.display=e.display=="block"?"none":"block"')}}());a.add(a("div.navLinks"),[a.tn(" ["),a.el("a",{href:"javascript:;",textContent:"QuotePreview is "+(l?"ON":"OFF"),onclick:function(){lS.qp_opt=l?0:1;window.location.reload()}}),a.tn("] ["),a.el("a",{href:"javascript:;",textContent:"ImageHover is "+(n?"ON":"OFF"),onclick:function(){lS.img_hover=n?0:1;window.location.reload()}}),a.tn("] ["),a.el("a",{href:"./"+window.location.href.split("/").pop().split(".")[0]+"/misc/gallery.html",textContent:"Image only view"}),a.tn("]")]);j.init();h.init();l&&i.init();n&&m.init();o.init();g.init();e.ready()},ready:function(){var c=[],d=0,f=b(".postContainer",a.id("delform"));for(;f.length>d;d++){c.push(e.preParse(f[d]))}e.node(c);if(MutationObserver=window.MutationObserver||window.WebKitMutationObserver||window.OMutationObserver){observer=new MutationObserver(e.observer);observer.observe(a(".board"),{childList:true,subtree:true})}else{a.on(a(".board"),"DOMNodeInserted",e.listener)}},preParse:function(c){var d=a(".post",c),e=c.pN().className,f={root:c,el:d,"class":d.className,ID:d.id.match(/\d+$/)[0],threadID:a.x("ancestor::div[parent::div[@class='board']]",c).id.match(/\d+$/)[0],isInlined:/\binline\b/.test(e),blockquote:d.lastElementChild,quotes:b("a.quotelink[href^='#p']",d),backlinks:d.getElementsByClassName("backlink"),img:false},g;if(g=a('img[data-md5]',d)){f.img=g}return f},node:function(a){for(var b=0,c=e.callbacks;c.length>b;b++){var d=c[b];try{for(var f=0;a.length>f;f++){d(a[f])}}catch(g){alert("Error: "+g.message+"\nReport the bug to HandyAnon@Steam\n\nURL: "+window.location+"\n"+g.stack)}}},observer:function(a){var b,c,d,f,g,h,i,j;d=[];for(f=0,h=a.length;h>f;f++){c=a[f];j=c.addedNodes;for(g=0,i=j.length;i>g;g++){b=j[g];if(/\bpostContainer\b/.test(b.className)){d.push(e.preParse(b))}}}if(d.length){return e.node(d)}},listener:function(a){var b;b=a.target;if(/\bpostContainer\b/.test(b.className)){return e.node([e.preParse(b)])}},callbacks:[]};a.on(d,"DOMContentLoaded",e.init)}).call(this)//]]></script><style>/*<![CDATA[*/img[data-md5]+*{max-width:100%!important;}.i img[data-md5]+*{width:100%!important;}.op:after{clear:both;content:'';display:block;}#qp{padding:2px 2px 5px;position:fixed;border:1px solid rgba(128,128,128,0.5);}#qp .post{border:none;margin:0;padding:0;}#qp img,#qp video{max-height:300px;max-width:500px;}.qphl{outline:2px solid rgba(216,94,49,.7);}.inlined{opacity:.5;}.inline{border:1px solid rgba(128,128,128,0.5);display:table;margin:2px;padding:2px;}.inline .post{background:none;border:none;margin:0;padding:0;}.forwarded{display:none;}.quotelink.forwardlink,.backlink.forwardlink{text-decoration:none;border-bottom:1px dashed;}#ihover{max-height:97%;max-width:75%;padding-bottom:18px;position:fixed;} blockquote{word-wrap:break-word;min-width:120px;}/*]]>*/</style>
  108. EOF
  109.  
  110.     tail -n1 a >> $LOC.html
  111.  
  112.     rm a
  113.  
  114.     sed -i -e '1 {s_<script_!>\n&_
  115.         s_<link [^>]*RSS feed[^>]*>__
  116.         s@//'$ST'/image/\(favicon[^.]*\.ico\)@'$LOC'/misc/\1@
  117.         s_<link rel="alternate style[^-]*\(<link[^>]*tomorrow\.[^>]*>\)<link[^>]*>_\1_
  118.         s@//'$ST'/css/\([^.]\+\.[^.]\+\.css\)@'$LOC'/misc/\1@g}' -e '$ {s_</head>_<!&_
  119.         s_<div id="boardNavDesktop" class="desktop">_\n_
  120.         s_<div class="boardBanner"_\n<!&_
  121.  s@ data-src="[^.]\+\.\([^"]\+\)">@><img alt="4chan" src="'$LOC'/misc/logo.\1" />@
  122.         s_<hr class="abovePost_\n_
  123.         s_ .<a[^>]*>Catalog</a>.__g
  124.         s_\(<div class="navLinks desktop">.<a href="/[^/]\+/[^#]*\)#bottom\(">Bottom</a>.\)_\n<!\1javascript:dE.scrollIntoView(false)\2</div><hr>\n_
  125.         s_\(<form name="delform" id="delform"\)[^>]*_\n<!\1_
  126.         s@//.\.t\.4cdn\.org/[^/]*/\([0-9]*s\.jpg\)@'$LOC'/misc/\1@g
  127.         s@//i\.4cdn\.org/[^/]*/\([0-9]*\....\)@'$LOC'/\1@g
  128.         s@//'$ST'/image/title/[a-z0-9-]*\.\(...\)@'$LOC'/misc/logo.\1@g
  129.         s@//'$ST'/image/\(spoiler[^.]*\....\)@'$LOC'/misc/\1@g
  130.         s@//'$ST'/image/\(filedeleted-res\.gif\)@'$LOC'/misc/\1@g
  131.         s@//'$ST'/image/country/\([^.]*\....\)@'$LOC'/misc/\1@g
  132.         s@//'$ST'/image/\([a-z]*icon.gif\)@'$LOC'/misc/\1@g
  133.         s@//'$ST'/image/\(archived.gif\)@'$LOC'/misc/\1@g
  134.         s_<div data.tip[^>]*>[^<]*<div>__g
  135.         s_\(<a href="\)'$NO'#p_\1#p_g
  136.         s_<a href="#p'$NO'"[^>]*>&gt;&gt;'$NO'_& (OP)_g
  137.         s:\(<a href="\)\([0-9]\+\)\(#p[0-9]*\)\([^<]*\):\1'$BO'_\2.html" target="_blank" \3 (Cross-thread):g
  138.         s_\(<div class="navLinks navLinksBot desktop">.<a href="/[^/]\+/"[^>]*>Return</a>. .<a href="\)#top\(">Top</a>.\)_\n<!<span style="float:right;">Style: [ <a href="javascript:sT(1);dE.scrollIntoView(false)">Default</a> | <a href="javascript:sT(0);dE.scrollIntoView(false)">Tomorrow</a> ]</span>\1javascript:dE.scrollIntoView()\2\n_
  139.         s@</body>@\n<!</div></div></form>&@}' -e '{s:\(<a href="\)/./\([^>]*>Return</a>\):\1'"$RET"'\2:g
  140.         s_\(<a[^>]*href="\)//_\1http://_g
  141.         s_\(<a[^>]*\) onclick="replyhl[^"]*"_\1_g
  142.         s_\(<a href="javascript:\)quote[^>]*_\1;"_g
  143.         s_<div class="postInfoM_\n_g
  144.         s_<div class="file"_\n<!&_g
  145.         s_<div class="postInfo _\n<!&_g
  146.         s_<a href="/ic\?/anim\.php?file=[0-9]*" target="\_blank">_<a title="View is supported only on 4chan">_g
  147.         s_\(<input type="checkbox"\)[^>]*_\1_g
  148.         s_<a onclick="toggle..exif[^"]*"_<a_g
  149.         s_<div class="mFileInfo mobile">[^<]*</div>__g
  150.         s_<wbr>__g}' $LOC.html
  151.  
  152.     grep '^<!' $LOC.html > a
  153.  
  154.     sed -i -e '2,$ s_^<!__' a
  155.  
  156.     tr -d '\n' < a > $LOC.html
  157.  
  158.     sed -i -e 's_!>_\n_g' $LOC.html
  159.  
  160.     rm a
  161.  
  162.     cd $LOC/misc
  163.  
  164.     CSS=$(basename `head -n1 css`)
  165.  
  166.     if [ "$CSSt" != "$CSS" ]; then
  167.         CSSt=CSS
  168.         if [ "$(ls|grep '.css')" ]; then
  169.             rm *.css
  170.         fi
  171.         wget -q -nc -i css
  172.         wget -q -nc "$(grep -o 'fade[^.]*\.png' $CSS | sed -e 's_.*_http://'$ST'/image/&_')"
  173.         sed -i -e 's_/image/\(fade[^.]*\.png\)_\1_g' $CSS
  174.     fi
  175.  
  176.     if [ "$(ls|grep logo.)" ]; then
  177.         rm "$(ls|grep logo.|head -n1)"
  178.     fi
  179.  
  180.     wget -q -i logo -O "logo.$(sed 's_\._\n_g' logo|tail -n1)"
  181.  
  182.     rm logo css
  183.  
  184.     touch .nomedia
  185.  
  186.     cd ..
  187.  
  188.     for image in $(cat images); do
  189.         wget -q -nc $image
  190.     done
  191.  
  192.     rm images
  193.  
  194.     cd misc
  195.  
  196.     for misc in $(cat misc); do
  197.         wget -q -nc $misc
  198.     done
  199.  
  200.     rm misc
  201.  
  202.     cd ../..
  203. }
  204.  
  205. exito () {
  206.     echo "Session completed. Exiting"
  207.     exit 0
  208. }
  209.  
  210. echo ""
  211. echo "Downloading to $LOC"
  212. echo "------------------------------"
  213.  
  214. while true; do
  215.     trap exito 1 2 3 15
  216.  
  217.     stat="$(wget -S --spider "$URL" 2>&1)"
  218.  
  219.     if [ "$(echo "$stat" |grep '404 Not Found')" ]; then
  220.         if [ -s $LOC.html ]; then
  221.             echo "Thread has 404'd or 4chan is down. Stopping script"
  222.         else
  223.             echo "Thread does not exist. Stopping script"
  224.         fi
  225.         exit 0
  226.     fi
  227.  
  228.     if [ "$LM" != "$(echo "$stat" |grep Last-Modified)" ]; then
  229.         LM="$(echo "$stat" |grep Last-Modified)"
  230.         if [ $SLP -gt 1 ] && [ $SLP -lt 10 ]; then
  231.             SLP="10"
  232.         elif [ $SLP -gt 999 ]; then
  233.             SLP="999"
  234.         fi
  235.         SLAP=$SLP
  236.         wget -np -nd -nH -q -erobots=off $URL -O $LOC.html
  237.         if [ "$(egrep -o -e '<div class=\"closed\">' "$LOC".html)" ]; then
  238.             echo "Thread is archived on 4chan. Downloading once"
  239.             SLP="1"
  240.         fi
  241.         thejob
  242.     else
  243.         SLAP=`expr "$SLAP" + "5"`
  244.     fi
  245.  
  246.     trap - 1 2 3 15
  247.  
  248.     if [ $SLP = "1" ]; then
  249.         exito
  250.     fi
  251.  
  252.     echo -ne OK
  253.  
  254.     sleep $SLAP
  255.  
  256.     echo -ne "\b\b  \b\b"
  257. done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement