Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- I'm Da Bolloxs v0.10 (IMDB)
- This script is designed for my personal use to gather info on a movie
- in the format that I require.
- It takes an IMDB.com movie URL from the current clipboard
- and outputs the poster image and a text file of the info I need.
- By Steve Shambles Oct 2018. Updated Nov 2019.
- https://stevepython.wordpress.com/
- pip3 install beautifulsoup4
- pip3 install requests
- pip3 install pyperclip
- --------------------------
- v0.10-solid url check, nothing on clipboard or non imdb address found.
- made multiplatform by soft coding file locations to cwd
- and using webbrowser to open associated apps for text and image display.
- '''
- import os
- import re
- import sys
- from tkinter import messagebox, Tk
- from urllib.request import urlopen
- import webbrowser
- from bs4 import BeautifulSoup
- import pyperclip
- import requests
- def url_error():
- messagebox.showerror('Error', 'No IMDB URL found on clipboard.')
- sys.exit()
- # Stop ugly default tk window appearing if error message box is used.
- root = Tk()
- root.withdraw()
- # Get URL of film from clipboard.
- imdb_url = pyperclip.paste()
- # Show error and quit if not imdb address.
- if not imdb_url.startswith('https://www.imdb.com/title/'):
- url_error()
- # Read the html source code from the URL.
- imdb_html = urlopen(imdb_url)
- btfl_soup = BeautifulSoup(imdb_html.read(), 'lxml')
- print('Scraping, URL')
- # Get the star actors in the film.
- try:
- film_actors = btfl_soup.find('table', {'class':'cast_list'})
- except:
- pass
- # Get the summary text of the film.
- try:
- film_summary = btfl_soup.find('div', {'class':'summary_text'})
- except:
- pass
- # Get the films imdb rating.
- try:
- film_rating = btfl_soup.find('div', {'class':'ratingValue'})
- except:
- pass
- # Find the title, genre, run time, release date, & age rating of the film.
- # All that info is contained in the class title_wrapper
- try:
- film_info = btfl_soup.find('div', {'class':'title_wrapper'})
- except:
- pass
- # Save all info to a text file in current directory.
- # Note: At present this will overwtite previously saved imbd.txt file.
- with open('imdb.txt', 'w', encoding='utf-8') as file:
- # Check exists before writing or causes crash if None.
- if film_info:
- file.write(film_info.text)
- if film_summary:
- file.write(film_summary.text)
- if film_actors:
- file.write(film_actors.text)
- if film_rating:
- file.write(film_rating.text)
- # Open the text file for viewing in associated program.
- webbrowser.open('imdb.txt')
- #find the poster image
- imdb_soup = btfl_soup.find('div', {'class':'poster'})
- cover_img = imdb_soup.find('img', {'src':re.compile('.jpg')})
- # Grab just the URL from the surrounding tags
- # Check to make sure have found something
- # before getting link and causing crash if none found
- if cover_img:
- cover_img_link = (cover_img['src'])
- # Save the jpg image from the resulting URL.
- # Note: At present this will overwtite previous scrape cover image.
- if cover_img_link:
- with open('cover.jpg', 'wb') as handle:
- img_response = requests.get(cover_img_link, stream=True)
- for block in img_response.iter_content(1024):
- if not block:
- break
- handle.write(block)
- # Open image with systems viewer
- webbrowser.open('cover.jpg')
- print('Done scraping.')
- # To do, text output needs cleaning up
- # Try to find out the except error names
- # save imdb.txt and cover.jpg in films name so not overwrite previous scrape.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement