Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- try:
- from bs4 import BeautifulSoup
- except Exception as e:
- print "pip install beautifulsoup4"
- exit()
- try:
- from selenium import webdriver
- from selenium.webdriver.firefox.options import Options
- except Exception as e:
- print "pip install selenium"
- exit()
- import time
- import random
- import re
- import os
- def sleep(interval=None):
- if interval == None:
- interval = random.uniform(0.5, 1.5)
- time.sleep(interval)
- def run(target_url, is_show_gui=False):
- def _get_page_num(html):
- soup = BeautifulSoup(html, "html.parser")
- page_num = soup.find_all("a", href=re.compile("old_viewer"))[-1].text.strip()
- if page_num.isdigit():
- return int(page_num)
- else:
- print "err...could not get page number"
- exit()
- def _get_img_url(html):
- dl_list = []
- soup = BeautifulSoup(html, "html.parser")
- for img_url in soup.find_all("img", class_="miss", style="width:100%;"):
- if "imgon.spimg.ch" == img_url.get("src").split("/")[2]:
- dl_list.append(img_url.get("src"))
- return dl_list
- print "[+] Starting..."
- user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
- profile = webdriver.FirefoxProfile()
- profile.set_preference("general.useragent.override", user_agent)
- options = Options()
- if not is_show_gui:
- options.add_argument("--headless")
- driver = webdriver.Firefox(executable_path=r"I:/work/python/geckodriver/geckodriver.exe", firefox_profile=profile, firefox_options=options)
- driver.get(target_url)
- print "[+] Access -->", target_url
- sleep()
- viewer_url = target_url.split("?p=")[0] + "old_viewer?p=" + target_url.split("?p=")[1]
- driver.get(viewer_url)
- print "[+] Access -->", viewer_url
- sleep()
- save_path = target_url.split("?p=")[1]
- if not os.path.isdir(save_path):
- os.mkdir(save_path)
- print "[+] Create download folder. Save path -->", save_path
- page_num = _get_page_num(driver.page_source.encode("utf-8"))
- print "[+] Maximum number of pages -->", page_num
- sleep()
- for p in range(1, page_num+1):
- tmp_viewer_url = viewer_url + "&paged=%s" % p
- driver.get(tmp_viewer_url)
- print "[+] Access -->", tmp_viewer_url
- sleep()
- dl_list = _get_img_url(driver.page_source.encode("utf-8"))
- print "[+] Get images url. Number of images -->", len(dl_list)
- sleep()
- for img_url in dl_list:
- driver.get(img_url)
- save_file = save_path + "\\" + img_url.split("?")[0].split("/")[-1]
- with open(save_file, "wb") as file:
- file.write(driver.find_elements_by_tag_name("img")[0].screenshot_as_png)
- print "[-] Save img -->", img_url.split("?h")[0]
- sleep(interval=random.uniform(0.5, 1.0))
- print "[+] exit...bye"
- driver.quit()
- def main():
- url = raw_input(prompt="Top page url of the book you want to download --> ")
- try:
- run(url)
- except Exception as e:
- print "err... -->", e
- exit()
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement