Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import re
- import urllib.parse
- from urllib.request import Request, urlopen
- from bs4 import BeautifulSoup
- from requests.packages.urllib3.exceptions import InsecureRequestWarning
- requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
- class API:
- def __init__(self):
- self.url_base = 'https://horriblesubs.info/'
- def episode_url(self):
- # Splits the name of the series and turns the spaces (' ') into
- # dash '-'. This will make it possible to create a fitting string.
- # Example. 'Boku no Hero Academia' transforms to
- # 'boku-no-hero-academia/'
- # The variable anime_series, will be changed for a JSON, CSV or
- # something else.
- anime_series = 'Boku no Hero Academia'
- respons = '-'.join(anime_series.split())
- self.series_url = respons.lower() + '/'
- # def single_anime_data(self):
- # self.Session = requests.Session()
- # data = self.Session.get(self.url_base + self.series_url, verify=False)
- # print(data.text)
- def find_link(self):
- html_url = self.url_base + self.series_url + '#78'
- print(html_url)
- html_request = Request(html_url, headers={'User-Agent': 'Mozilla/5.0'})
- html_page = urlopen(html_request).read()
- soup = BeautifulSoup(html_page, features='lxml')
- for link in soup.findAll('a', attrs={'href': re.compile("^https://")}):
- print(link.get('href'))
- main = API()
- main.episode_url()
- #main.single_anime_data()
- main.find_link()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement