Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## Author : Omar (KacangItem)
- ## URL : https://github.com/kacangitem/komikid-manga-Downloader
- #komikkid-downloader.py
- #!/usr/bin/env python
- import re, sys,glob, shutil,os
- from urllib import FancyURLopener
- from BeautifulSoup import BeautifulSoup
- from zipfile import ZipFile
- #from optparse import OptionParser
- from urlparse import urljoin
- URL_BASE = 'http://komikid.com'
- PARS_VALUE = re.compile(r'value="(\w+)"')
- UA = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.10 Chromium/14.0.835.202 Chrome/14.0.835.202 Safari/535.1'
- class DonDown(FancyURLopener):
- version = UA
- akamaru = DonDown()
- def get_page_soup(url):
- """Parsing HTML yang didapat menggunakan BeautifulSoup"""
- akamaru.retrieve(url, "page.html")
- html = ""
- with open("page.html") as html_file:
- for line in html_file:
- html += line
- os.remove('page.html')
- return BeautifulSoup(html)
- def get_all_chapter(soup):
- """Cari list chapter dari manga yang didapatkan"""
- links = soup.findAll('select', {"name": "chapter"})
- chapters = PARS_VALUE.findall(str(links))
- return list(set(chapters))
- def getMaxPage(soup):
- """Parsing html untuk mendapatkan max halaman pada suatu chapter"""
- pages = soup.findAll('select', {"name": "page"})[0]
- balikin = PARS_VALUE.findall(str(pages))[-1]
- return balikin
- def getPicUrl(soup):
- """Cari gambar"""
- gambar = URL_BASE+'/'+soup.findAll('img',{'class':'picture'})[0]['src'].encode()
- return gambar
- def nextPage(soup):
- """Cari Halaman Selanjutnya"""
- lanjut = URL_BASE+'/'+soup.findAll('a')[4]['href'].encode()
- return lanjut
- def makecbz(manga,chapter,tampung):
- """Convert gambar-gambar yang sudah diunduh menjadi cbz(zip)"""
- zipname = manga + '_' + chapter+ '.cbz'
- #list file yang berformat jpg pada direktori yang diberikan
- #images = glob.glob( os.getcwd() + "/*."+format)
- myzip = ZipFile(zipname, 'w')
- #for filename in images:
- for filename in tampung:
- #print("writing {0} to {1}".format(filename, zipname))
- #print "bikin cbz file {0} ".format(zipname)
- myzip.write(filename)
- os.remove(filename)
- myzip.close()
- def DownPic(list_url,manga,chapter):
- """Download gambar dari hasil parsing html"""
- #format = list_url[0].split('/')[-1].split('.')[-1]
- tampung = []
- now = 1
- tot = len(list_url)
- print 'Mengunduh {0} chapter {1} ...'.format(manga,chapter)
- #print list_url
- for url in list_url:
- #print "Sekarang ke {0} dari {1}".format(str(now),str(tot))
- progBar(now,tot)
- file = url.split('/')[-1]
- akamaru.retrieve(url,file)
- tampung.append(file)
- #os.system('wget -c %s' % url)
- now += 1
- makecbz(manga,chapter,tampung)
- def progBar(now,total):
- """Progressbar masih ancur"""
- bar = total
- percent = '%d%%' % int(round(float((now * 100.0) / total)))
- prog = '['+'=' * now +'>'+ ' ' * (bar - now) +']'+ percent
- if sys.platform.lower().startswith('win'):
- print prog, '\r',
- else:
- print prog,chr(27) + '[A'
- def list_dwn_pic(first_url,manga,chapter):
- """Listing gambar yang akan diunduh dari hasil parsing HTML"""
- soup = get_page_soup(first_url+'/'+chapter)
- max = getMaxPage(soup)
- picurl = []
- for seq in xrange(int(max)):
- picture = getPicUrl(soup)
- #print "Dari",seq
- #print "Gambar yang didapatkan",picture
- picurl.append(picture)
- next = nextPage(soup)
- #print "Target Selanjutnya ",next
- soup = get_page_soup(next)
- DownPic(picurl,manga,chapter)
- def DownMe(manga,chapterMin=None,chapterMax=None):
- """Main function"""
- manga_url = urljoin(URL_BASE,manga)
- print manga_url
- bubur = get_page_soup(manga_url)
- bab = sorted( get_all_chapter(bubur) )
- # cek apakah chapter yang dimaksud ada ?
- if chapterMin:
- if chapterMin not in bab:
- print 'manga {0} chapter {1} tidak tersedia !!!'.format(manga,chapterMin)
- sys.exit(1)
- if chapterMax:
- if chapterMax not in bab:
- print 'manga {0} chapter {1} tidak tersedia !!!'.format(manga,chapterMax)
- sys.exit(1)
- if chapterMin and chapterMax:
- for chapter in xrange(int(chapterMin),int(chapterMax)+1):
- list_dwn_pic(manga_url,manga,str(chapter))
- elif chapterMin:
- list_dwn_pic(manga_url,manga,str(chapterMin))
- else:
- #download semua yang ada di bab
- for bba in bab:
- list_dwn_pic(manga_url,manga,str(bba))
- if __name__ == '__main__':
- #print len(sys.argv)
- #print sys.argv[2]
- if len(sys.argv) == 4:
- DownMe(sys.argv[1], sys.argv[2], sys.argv[3])
- elif len(sys.argv) == 3:
- DownMe(sys.argv[1], sys.argv[2])
- elif len(sys.argv) == 2:
- DownMe(sys.argv[1])
- else:
- # buat testing aja entar bakal diganti pake OPTparser
- print("USAGE: mfdl.py [MANGA_NAME]")
- print(" mfdl.py [MANGA_NAME] [CHAPTER_NUMBER]")
- print(" mfdl.py [MANGA_NAME] [RANGE_START] [RANGE_END]")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement