Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- dependencies:
- conemu (or linux)
- wget
- fnr
- aria2c
- # scrape asiachan base url
- FOR /l %i in (1,1,30) DO wget -v -o "C:\Users\HTPC\Desktop\TESTFOLDER\log.txt" -e robots=off -r --spider --level=1 "http://kpop.asiachan.com/IU?d=2&p=%i" --output-file="C:\Users\HTPC\Desktop\TESTFOLDER\URLS%i.csv"
- # join logs into one txt file
- copy "C:\Users\HTPC\Desktop\TESTFOLDER\*.csv" "C:\Users\HTPC\Desktop\TESTFOLDER\URLS1.txt"
- * delete "C:\Users\HTPC\Desktop\TESTFOLDER\*.csv"
- # delete .csv leftovers
- del "C:\Users\HTPC\Desktop\TESTFOLDER\*.csv"
- # filter page url
- grep "C:\Users\HTPC\Desktop\TESTFOLDER\URLS1.txt" -e http://kpop.asiachan.com/[0-9] > "C:\Users\HTPC\Desktop\TESTFOLDER\URLS2.txt"
- # remove date stamp
- sed -e 's/--.......................//g' "C:\Users\HTPC\Desktop\TESTFOLDER\URLS2.txt" > "C:\Users\HTPC\Desktop\TESTFOLDER\URLS3.txt"
- # remove duplicates
- sed -n 'g;n;p' "C:\Users\HTPC\Desktop\TESTFOLDER\URLS3.txt" > "C:\Users\HTPC\Desktop\TESTFOLDER\URLS4.txt"
- # replace page url with image url
- "C:\Users\HTPC\Desktop\Utilities\find and replace.exe" --cl --dir "C:\Users\HTPC\Desktop\testdir" --fileMask "*.txt" --excludeFileMask "*.dll, *.exe" --includeSubDirectories --find "http://kpop.asiachan.com/" --replace "http://static.asiachan.com/IU.full."
- # add .jpg suffix
- sed -i 's/$/.jpg/' "C:\Users\HTPC\Desktop\TESTFOLDER\URLS5.txt"
- # download from url
- aria2c --file-allocation=none -c -x 10 -s 10 --input-file="C:\Users\HTPC\Desktop\TESTFOLDER\URLS5.txt" --dir="C:\Users\HTPC\Desktop\iu3"
Advertisement
Add Comment
Please, Sign In to add comment