Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import datetime
- from requests_html import HTML
- now = datetime.datetime.now()
- year = now.year
- url = 'https://www.boxofficemojo.com/year/world/'
- """
- url,filename and save are parameters
- filename has been initialized as box.html
- while save has been initialized as False
- If you don't pass new values these will be used
- """
- def url_to_text(url,filename="box.html",save=False):
- r = requests.get(url)
- if r.status_code == 200:
- html_text = r.text
- #here the program checks if save equal True. if save is false it exits
- if save:
- with open(f"box-{year}.html" , 'w',encoding="utf-8") as f:
- f.write(html_text)
- return html_text
- return "" #code gets here if save =False
- html_text = url_to_text(url=url,save=True)
- print(type(html_text))
- r_html = HTML(html=html_text)
- print(r_html.find('a'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement