Advertisement
rnqqpp

漫画村

Jan 16th, 2018
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.22 KB | None | 0 0
  1. try:
  2.     from bs4 import BeautifulSoup
  3. except Exception as e:
  4.     print "pip install beautifulsoup4"
  5.     exit()
  6. try:
  7.     from selenium import webdriver
  8.     from selenium.webdriver.firefox.options import Options
  9. except Exception as e:
  10.     print "pip install selenium"
  11.     exit()
  12. import time
  13. import random
  14. import re
  15. import os
  16.  
  17. def sleep(interval=None):
  18.     if interval == None:
  19.         interval = random.uniform(0.5, 1.5)
  20.     time.sleep(interval)
  21.  
  22. def run(target_url, is_show_gui=False):
  23.  
  24.     def _get_page_num(html):
  25.         soup = BeautifulSoup(html, "html.parser")
  26.         page_num = soup.find_all("a", href=re.compile("old_viewer"))[-1].text.strip()
  27.         if page_num.isdigit():
  28.             return int(page_num)
  29.         else:
  30.             print "err...could not get page number"
  31.             exit()
  32.  
  33.     def _get_img_url(html):
  34.         dl_list = []
  35.         soup = BeautifulSoup(html, "html.parser")
  36.         for img_url in soup.find_all("img", class_="miss", style="width:100%;"):
  37.             if "imgon.spimg.ch" == img_url.get("src").split("/")[2]:
  38.                 dl_list.append(img_url.get("src"))
  39.         return dl_list
  40.  
  41.     print "[+] Starting..."
  42.     user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
  43.     profile = webdriver.FirefoxProfile()
  44.     profile.set_preference("general.useragent.override", user_agent)
  45.     options = Options()
  46.     if not is_show_gui:
  47.         options.add_argument("--headless")
  48.     driver = webdriver.Firefox(executable_path=r"I:/work/python/geckodriver/geckodriver.exe", firefox_profile=profile, firefox_options=options)
  49.  
  50.     driver.get(target_url)
  51.     print "[+] Access -->", target_url
  52.     sleep()
  53.  
  54.     viewer_url = target_url.split("?p=")[0] + "old_viewer?p=" + target_url.split("?p=")[1]
  55.     driver.get(viewer_url)
  56.     print "[+] Access -->", viewer_url
  57.     sleep()
  58.  
  59.     save_path = target_url.split("?p=")[1]
  60.     if not os.path.isdir(save_path):
  61.         os.mkdir(save_path)
  62.     print "[+] Create download folder. Save path -->", save_path
  63.  
  64.     page_num = _get_page_num(driver.page_source.encode("utf-8"))
  65.     print "[+] Maximum number of pages -->", page_num
  66.     sleep()
  67.  
  68.     for p in range(1, page_num+1):
  69.         tmp_viewer_url = viewer_url + "&paged=%s" % p
  70.         driver.get(tmp_viewer_url)
  71.         print "[+] Access -->", tmp_viewer_url
  72.         sleep()
  73.  
  74.         dl_list = _get_img_url(driver.page_source.encode("utf-8"))
  75.         print "[+] Get images url. Number of images -->", len(dl_list)
  76.         sleep()
  77.  
  78.         for img_url in dl_list:
  79.             driver.get(img_url)
  80.             save_file = save_path + "\\" + img_url.split("?")[0].split("/")[-1]
  81.             with open(save_file, "wb") as file:
  82.                 file.write(driver.find_elements_by_tag_name("img")[0].screenshot_as_png)
  83.             print "[-] Save img -->", img_url.split("?h")[0]
  84.             sleep(interval=random.uniform(0.5, 1.0))
  85.  
  86.     print "[+] exit...bye"
  87.     driver.quit()
  88.  
  89. def main():
  90.     url = raw_input(prompt="Top page url of the book you want to download --> ")
  91.     try:
  92.         run(url)
  93.     except Exception as e:
  94.         print "err... -->", e
  95.         exit()
  96.  
  97. if __name__ == '__main__':
  98.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement