## Author : Omar (KacangItem)
## URL : https://github.com/kacangitem/komikid-manga-Downloader
#komikkid-downloader.py
#!/usr/bin/env python
import re, sys,glob, shutil,os
from urllib import FancyURLopener
from BeautifulSoup import BeautifulSoup
from zipfile import ZipFile
#from optparse import OptionParser
from urlparse import urljoin
URL_BASE = \'http://komikid.com\'
PARS_VALUE = re.compile(r\'value="(\\w+)"\')
UA = \'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.10 Chromium/14.0.835.202 Chrome/14.0.835.202 Safari/535.1\'
class DonDown(FancyURLopener):
version = UA
akamaru = DonDown()
def get_page_soup(url):
"""Parsing HTML yang didapat menggunakan BeautifulSoup"""
akamaru.retrieve(url, "page.html")
html = ""
with open("page.html") as html_file:
for line in html_file:
html += line
os.remove(\'page.html\')
return BeautifulSoup(html)
def get_all_chapter(soup):
"""Cari list chapter dari manga yang didapatkan"""
links = soup.findAll(\'select\', {"name": "chapter"})
chapters = PARS_VALUE.findall(str(links))
return list(set(chapters))
def getMaxPage(soup):
"""Parsing html untuk mendapatkan max halaman pada suatu chapter"""
pages = soup.findAll(\'select\', {"name": "page"})[0]
balikin = PARS_VALUE.findall(str(pages))[-1]
return balikin
def getPicUrl(soup):
"""Cari gambar"""
gambar = URL_BASE+\'/\'+soup.findAll(\'img\',{\'class\':\'picture\'})[0][\'src\'].encode()
return gambar
def nextPage(soup):
"""Cari Halaman Selanjutnya"""
lanjut = URL_BASE+\'/\'+soup.findAll(\'a\')[4][\'href\'].encode()
return lanjut
def makecbz(manga,chapter,tampung):
"""Convert gambar-gambar yang sudah diunduh menjadi cbz(zip)"""
zipname = manga + \'_\' + chapter+ \'.cbz\'
#list file yang berformat jpg pada direktori yang diberikan
#images = glob.glob( os.getcwd() + "/*."+format)
myzip = ZipFile(zipname, \'w\')
#for filename in images:
for filename in tampung:
#print("writing {0} to {1}".format(filename, zipname))
#print "bikin cbz file {0} ".format(zipname)
myzip.write(filename)
os.remove(filename)
myzip.close()
def DownPic(list_url,manga,chapter):
"""Download gambar dari hasil parsing html"""
#format = list_url[0].split(\'/\')[-1].split(\'.\')[-1]
tampung = []
now = 1
tot = len(list_url)
print \'Mengunduh {0} chapter {1} ...\'.format(manga,chapter)
#print list_url
for url in list_url:
#print "Sekarang ke {0} dari {1}".format(str(now),str(tot))
progBar(now,tot)
file = url.split(\'/\')[-1]
akamaru.retrieve(url,file)
tampung.append(file)
#os.system(\'wget -c %s\' % url)
now += 1
makecbz(manga,chapter,tampung)
def progBar(now,total):
"""Progressbar masih ancur"""
bar = total
percent = \'%d%%\' % int(round(float((now * 100.0) / total)))
prog = \'[\'+\'=\' * now +\'>\'+ \' \' * (bar - now) +\']\'+ percent
if sys.platform.lower().startswith(\'win\'):
print prog, \'\\r\',
else:
print prog,chr(27) + \'[A\'
def list_dwn_pic(first_url,manga,chapter):
"""Listing gambar yang akan diunduh dari hasil parsing HTML"""
soup = get_page_soup(first_url+\'/\'+chapter)
max = getMaxPage(soup)
picurl = []
for seq in xrange(int(max)):
picture = getPicUrl(soup)
#print "Dari",seq
#print "Gambar yang didapatkan",picture
picurl.append(picture)
next = nextPage(soup)
#print "Target Selanjutnya ",next
soup = get_page_soup(next)
DownPic(picurl,manga,chapter)
def DownMe(manga,chapterMin=None,chapterMax=None):
"""Main function"""
manga_url = urljoin(URL_BASE,manga)
print manga_url
bubur = get_page_soup(manga_url)
bab = sorted( get_all_chapter(bubur) )
# cek apakah chapter yang dimaksud ada ?
if chapterMin:
if chapterMin not in bab:
print \'manga {0} chapter {1} tidak tersedia !!!\'.format(manga,chapterMin)
sys.exit(1)
if chapterMax:
if chapterMax not in bab:
print \'manga {0} chapter {1} tidak tersedia !!!\'.format(manga,chapterMax)
sys.exit(1)
if chapterMin and chapterMax:
for chapter in xrange(int(chapterMin),int(chapterMax)+1):
list_dwn_pic(manga_url,manga,str(chapter))
elif chapterMin:
list_dwn_pic(manga_url,manga,str(chapterMin))
else:
#download semua yang ada di bab
for bba in bab:
list_dwn_pic(manga_url,manga,str(bba))
if __name__ == \'__main__\':
#print len(sys.argv)
#print sys.argv[2]
if len(sys.argv) == 4:
DownMe(sys.argv[1], sys.argv[2], sys.argv[3])
elif len(sys.argv) == 3:
DownMe(sys.argv[1], sys.argv[2])
elif len(sys.argv) == 2:
DownMe(sys.argv[1])
else:
# buat testing aja entar bakal diganti pake OPTparser
print("USAGE: mfdl.py [MANGA_NAME]")
print(" mfdl.py [MANGA_NAME] [CHAPTER_NUMBER]")
print(" mfdl.py [MANGA_NAME] [RANGE_START] [RANGE_END]")