Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import os
- import urllib
- def manga_crawl(from_manga, to_manga):
- manga = from_manga
- url = 'https://www.mangareader.net/one-piece/'
- while manga <= to_manga:
- url = url + str(manga) + '/'
- source_code = requests.get(url)
- plain_text = source_code.text
- soup = BeautifulSoup(plain_text, "html.parser")
- #print "URL-> " + url
- path_name = create_folder(manga)
- #print "FOLDER-> " + path_name
- pages = find_manga_pages(soup)
- #print "PAGES-> " + pages
- download_jpg(pages, url, path_name)
- manga = manga + 1
- url = 'https://www.mangareader.net/one-piece/'
- def create_folder(manga):
- pathname = 'one-piece-' + str(manga)
- os.makedirs(pathname)
- return pathname
- def find_manga_pages(soup):
- for opt in soup.find_all('option'):
- counter = opt.text
- return counter
- def download_jpg(pages, url, path_name):
- page = 1
- while page <= int(pages):
- thisurl = url + str(page)
- #print "THIS URL->" + str(thisurl)
- source_code = requests.get(thisurl)
- plain_text = source_code.text
- soup = BeautifulSoup(plain_text, "html.parser")
- urlsoup = soup.find('img', {'id': 'img'})
- iconurl = str(urlsoup['src'])
- this_path_name = path_name + '/' + str(page) + '.jpg'
- print "ICON URL->" + iconurl
- urllib.urlretrieve(iconurl, this_path_name)
- page = page + 1
- def main():
- x = raw_input()
- y = raw_input()
- manga_crawl(int(x), int(y))
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment