ripping pics from asiachan

dependencies:

   conemu (or linux)
   wget
   fnr
   aria2c

# scrape asiachan base url

FOR /l %i in (1,1,30) DO wget -v -o "C:\Users\HTPC\Desktop\TESTFOLDER\log.txt" -e robots=off -r --spider --level=1 "http://kpop.asiachan.com/IU?d=2&p=%i" --output-file="C:\Users\HTPC\Desktop\TESTFOLDER\URLS%i.csv"


# join logs into one txt file

copy "C:\Users\HTPC\Desktop\TESTFOLDER\*.csv" "C:\Users\HTPC\Desktop\TESTFOLDER\URLS1.txt"

	* delete "C:\Users\HTPC\Desktop\TESTFOLDER\*.csv"

# delete .csv leftovers

del "C:\Users\HTPC\Desktop\TESTFOLDER\*.csv"


# filter page url

grep "C:\Users\HTPC\Desktop\TESTFOLDER\URLS1.txt" -e http://kpop.asiachan.com/[0-9] > "C:\Users\HTPC\Desktop\TESTFOLDER\URLS2.txt"


# remove date stamp

sed -e 's/--.......................//g' "C:\Users\HTPC\Desktop\TESTFOLDER\URLS2.txt" > "C:\Users\HTPC\Desktop\TESTFOLDER\URLS3.txt"


# remove duplicates

sed -n 'g;n;p' "C:\Users\HTPC\Desktop\TESTFOLDER\URLS3.txt" > "C:\Users\HTPC\Desktop\TESTFOLDER\URLS4.txt"


# replace page url with image url


"C:\Users\HTPC\Desktop\Utilities\find and replace.exe" --cl --dir "C:\Users\HTPC\Desktop\testdir" --fileMask "*.txt" --excludeFileMask "*.dll, *.exe" --includeSubDirectories --find "http://kpop.asiachan.com/" --replace "http://static.asiachan.com/IU.full."


# add .jpg suffix

sed -i 's/$/.jpg/' "C:\Users\HTPC\Desktop\TESTFOLDER\URLS5.txt"


# download from url

aria2c --file-allocation=none -c -x 10 -s 10 --input-file="C:\Users\HTPC\Desktop\TESTFOLDER\URLS5.txt" --dir="C:\Users\HTPC\Desktop\iu3"