I'm Da Bolloxs v0.10 (IMDB)

'''
I'm Da Bolloxs v0.10 (IMDB)

This script is designed for my personal use to gather info on a movie
in the format that I require.

It takes an IMDB.com movie URL from the current clipboard
and outputs the poster image and a text file of the info I need.

By Steve Shambles Oct 2018. Updated Nov 2019.
https://stevepython.wordpress.com/

pip3 install beautifulsoup4
pip3 install requests
pip3 install pyperclip
--------------------------
v0.10-solid url check, nothing on clipboard or non imdb address found.
      made multiplatform by soft coding file locations to cwd
      and using webbrowser to open associated apps for text and image display.
'''
import os
import re
import sys
from tkinter import messagebox, Tk
from urllib.request import urlopen
import webbrowser

from bs4 import BeautifulSoup
import pyperclip
import requests


def url_error():
    messagebox.showerror('Error', 'No IMDB URL found on clipboard.')
    sys.exit()

# Stop ugly default tk window appearing if error message box is used.
root = Tk()
root.withdraw()

# Get URL of film from clipboard.
imdb_url = pyperclip.paste()

# Show error and quit if not imdb address.
if not imdb_url.startswith('https://www.imdb.com/title/'):
    url_error()

# Read the html source code from the URL.
imdb_html = urlopen(imdb_url)
btfl_soup = BeautifulSoup(imdb_html.read(), 'lxml')
print('Scraping, URL')

# Get the star actors in the film.
try:
    film_actors = btfl_soup.find('table', {'class':'cast_list'})
except:
    pass

# Get the summary text of the film.
try:
    film_summary = btfl_soup.find('div', {'class':'summary_text'})
except:
    pass

# Get the films imdb rating.
try:
    film_rating = btfl_soup.find('div', {'class':'ratingValue'})
except:
    pass

# Find the title, genre, run time, release date, & age rating of the film.
# All that info is contained in the class title_wrapper
try:
    film_info = btfl_soup.find('div', {'class':'title_wrapper'})
except:
    pass

# Save all info to a text file in current directory.
# Note: At present this will overwtite previously saved imbd.txt file.
with open('imdb.txt', 'w', encoding='utf-8') as file:
# Check exists before writing or causes crash if None.
    if film_info:
        file.write(film_info.text)
    if film_summary:
        file.write(film_summary.text)
    if film_actors:
        file.write(film_actors.text)
    if film_rating:
        file.write(film_rating.text)

# Open the text file for viewing in associated program.
webbrowser.open('imdb.txt')

#find the poster image
imdb_soup = btfl_soup.find('div', {'class':'poster'})
cover_img = imdb_soup.find('img', {'src':re.compile('.jpg')})

# Grab just the URL from the surrounding tags
# Check to make sure have found something
# before getting link and causing crash if none found
if cover_img:
    cover_img_link = (cover_img['src'])

# Save the jpg image from the resulting URL.
# Note: At present this will overwtite previous scrape cover image.
if cover_img_link:
    with open('cover.jpg', 'wb') as handle:
        img_response = requests.get(cover_img_link, stream=True)
        for block in img_response.iter_content(1024):
            if not block:
                break
            handle.write(block)

# Open image with systems viewer
webbrowser.open('cover.jpg')

print('Done scraping.')

# To do, text output needs cleaning up
# Try to find out the except error names
# save imdb.txt and cover.jpg in films name so not overwrite previous scrape.