Advertisement
pebriana

Download Komikid

Dec 6th, 2011
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.21 KB | None | 0 0
  1. ## Author : Omar (KacangItem)
  2. ## URL    : https://github.com/kacangitem/komikid-manga-Downloader
  3.  
  4.  
  5. #komikkid-downloader.py
  6.  
  7.  
  8. #!/usr/bin/env python
  9. import re, sys,glob, shutil,os
  10. from urllib import FancyURLopener
  11. from BeautifulSoup import BeautifulSoup
  12. from zipfile import ZipFile
  13. #from optparse import OptionParser
  14. from urlparse import urljoin
  15.  
  16. URL_BASE = 'http://komikid.com'
  17. PARS_VALUE = re.compile(r'value="(\w+)"')
  18. UA = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.10 Chromium/14.0.835.202 Chrome/14.0.835.202 Safari/535.1'
  19.  
  20. class DonDown(FancyURLopener):
  21.     version = UA
  22.  
  23. akamaru = DonDown()
  24.  
  25. def get_page_soup(url):
  26.     """Parsing HTML yang didapat menggunakan BeautifulSoup"""
  27.     akamaru.retrieve(url, "page.html")
  28.     html = ""
  29.     with open("page.html") as html_file:
  30.         for line in html_file:
  31.             html += line
  32.     os.remove('page.html')
  33.     return BeautifulSoup(html)
  34.  
  35. def get_all_chapter(soup):
  36.     """Cari list chapter dari manga yang didapatkan"""
  37.     links = soup.findAll('select', {"name": "chapter"})
  38.     chapters = PARS_VALUE.findall(str(links))
  39.     return list(set(chapters))
  40.  
  41. def getMaxPage(soup):
  42.     """Parsing html untuk mendapatkan max halaman pada suatu chapter"""
  43.     pages = soup.findAll('select', {"name": "page"})[0]
  44.     balikin = PARS_VALUE.findall(str(pages))[-1]
  45.     return balikin
  46.    
  47. def getPicUrl(soup):
  48.     """Cari gambar"""
  49.     gambar = URL_BASE+'/'+soup.findAll('img',{'class':'picture'})[0]['src'].encode()
  50.     return gambar
  51.  
  52. def nextPage(soup):
  53.     """Cari Halaman Selanjutnya"""
  54.     lanjut = URL_BASE+'/'+soup.findAll('a')[4]['href'].encode()
  55.     return lanjut
  56.  
  57. def makecbz(manga,chapter,tampung):
  58.     """Convert gambar-gambar yang sudah diunduh menjadi cbz(zip)"""
  59.     zipname = manga + '_' + chapter+ '.cbz'
  60.     #list file yang berformat jpg pada direktori yang diberikan
  61.     #images = glob.glob( os.getcwd() + "/*."+format)
  62.     myzip = ZipFile(zipname, 'w')
  63.     #for filename in images:
  64.     for filename in tampung:
  65.         #print("writing {0} to {1}".format(filename, zipname))
  66.         #print "bikin cbz file {0} ".format(zipname)
  67.         myzip.write(filename)
  68.         os.remove(filename)
  69.     myzip.close()
  70.  
  71.  
  72. def DownPic(list_url,manga,chapter):
  73.     """Download gambar dari hasil parsing html"""
  74.     #format = list_url[0].split('/')[-1].split('.')[-1]
  75.     tampung = []
  76.     now = 1
  77.     tot = len(list_url)
  78.     print 'Mengunduh {0} chapter {1} ...'.format(manga,chapter)
  79.     #print list_url
  80.     for url in list_url:
  81.         #print "Sekarang ke {0} dari {1}".format(str(now),str(tot))
  82.         progBar(now,tot)
  83.         file = url.split('/')[-1]
  84.         akamaru.retrieve(url,file)
  85.         tampung.append(file)
  86.         #os.system('wget -c %s' % url)
  87.         now += 1
  88.     makecbz(manga,chapter,tampung)
  89.    
  90.  
  91. def progBar(now,total):
  92.     """Progressbar masih ancur"""
  93.     bar = total
  94.     percent = '%d%%' % int(round(float((now * 100.0) / total)))
  95.     prog = '['+'=' * now +'>'+ ' ' * (bar - now)  +']'+ percent
  96.     if sys.platform.lower().startswith('win'):
  97.         print prog, '\r',
  98.     else:
  99.         print prog,chr(27) + '[A'
  100.  
  101.  
  102. def list_dwn_pic(first_url,manga,chapter):
  103.     """Listing gambar yang akan diunduh dari hasil parsing HTML"""
  104.     soup = get_page_soup(first_url+'/'+chapter)
  105.     max = getMaxPage(soup)
  106.     picurl = []
  107.     for seq in xrange(int(max)):
  108.         picture = getPicUrl(soup)
  109.     #print "Dari",seq
  110.     #print "Gambar yang didapatkan",picture
  111.         picurl.append(picture)
  112.         next = nextPage(soup)
  113.     #print "Target Selanjutnya ",next
  114.         soup = get_page_soup(next)
  115.        
  116.     DownPic(picurl,manga,chapter)
  117.    
  118.    
  119. def DownMe(manga,chapterMin=None,chapterMax=None):
  120.     """Main function"""
  121.     manga_url = urljoin(URL_BASE,manga)
  122.     print manga_url
  123.     bubur = get_page_soup(manga_url)
  124.     bab = sorted( get_all_chapter(bubur) )
  125.  
  126.     # cek apakah chapter yang dimaksud ada ?
  127.  
  128.     if chapterMin:
  129.        
  130.         if chapterMin not in bab:
  131.             print 'manga {0} chapter {1} tidak tersedia !!!'.format(manga,chapterMin)
  132.             sys.exit(1)
  133.  
  134.     if chapterMax:
  135.         if chapterMax not in bab:
  136.             print 'manga {0} chapter {1} tidak tersedia !!!'.format(manga,chapterMax)
  137.             sys.exit(1)
  138.            
  139.     if chapterMin and chapterMax:
  140.         for chapter in xrange(int(chapterMin),int(chapterMax)+1):
  141.             list_dwn_pic(manga_url,manga,str(chapter))
  142.            
  143.     elif chapterMin:
  144.          list_dwn_pic(manga_url,manga,str(chapterMin))
  145.        
  146.     else:
  147.         #download semua yang ada di bab
  148.         for bba in bab:
  149.             list_dwn_pic(manga_url,manga,str(bba))
  150.    
  151.    
  152.    
  153. if __name__ == '__main__':
  154.     #print len(sys.argv)
  155.     #print sys.argv[2]
  156.     if len(sys.argv) == 4:
  157.         DownMe(sys.argv[1], sys.argv[2], sys.argv[3])
  158.     elif len(sys.argv) == 3:
  159.         DownMe(sys.argv[1], sys.argv[2])
  160.        
  161.     elif len(sys.argv) == 2:
  162.         DownMe(sys.argv[1])
  163.     else:
  164.         # buat testing aja entar bakal diganti pake OPTparser
  165.         print("USAGE: mfdl.py [MANGA_NAME]")
  166.         print("       mfdl.py [MANGA_NAME] [CHAPTER_NUMBER]")
  167.         print("       mfdl.py [MANGA_NAME] [RANGE_START] [RANGE_END]")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement