Guest User

Untitled

a guest
Jun 24th, 2018
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.59 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import os
  4. import urllib
  5.  
  6.  
  7. def manga_crawl(from_manga, to_manga):
  8. manga = from_manga
  9. url = 'https://www.mangareader.net/one-piece/'
  10. while manga <= to_manga:
  11. url = url + str(manga) + '/'
  12. source_code = requests.get(url)
  13. plain_text = source_code.text
  14. soup = BeautifulSoup(plain_text, "html.parser")
  15.  
  16. #print "URL-> " + url
  17.  
  18. path_name = create_folder(manga)
  19.  
  20. #print "FOLDER-> " + path_name
  21.  
  22. pages = find_manga_pages(soup)
  23.  
  24. #print "PAGES-> " + pages
  25.  
  26. download_jpg(pages, url, path_name)
  27.  
  28. manga = manga + 1
  29. url = 'https://www.mangareader.net/one-piece/'
  30.  
  31.  
  32. def create_folder(manga):
  33. pathname = 'one-piece-' + str(manga)
  34. os.makedirs(pathname)
  35. return pathname
  36.  
  37.  
  38. def find_manga_pages(soup):
  39. for opt in soup.find_all('option'):
  40. counter = opt.text
  41.  
  42. return counter
  43.  
  44.  
  45. def download_jpg(pages, url, path_name):
  46. page = 1
  47. while page <= int(pages):
  48. thisurl = url + str(page)
  49. #print "THIS URL->" + str(thisurl)
  50. source_code = requests.get(thisurl)
  51. plain_text = source_code.text
  52. soup = BeautifulSoup(plain_text, "html.parser")
  53. urlsoup = soup.find('img', {'id': 'img'})
  54. iconurl = str(urlsoup['src'])
  55. this_path_name = path_name + '/' + str(page) + '.jpg'
  56.  
  57. print "ICON URL->" + iconurl
  58.  
  59. urllib.urlretrieve(iconurl, this_path_name)
  60.  
  61. page = page + 1
  62.  
  63.  
  64. def main():
  65. x = raw_input()
  66. y = raw_input()
  67. manga_crawl(int(x), int(y))
  68.  
  69.  
  70. if __name__ == "__main__":
  71. main()
Add Comment
Please, Sign In to add comment