Advertisement
steve-shambles-2109

I'm Da Bolloxs v0.10 (IMDB)

Nov 26th, 2019
243
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.50 KB | None | 0 0
  1. '''
  2. I'm Da Bolloxs v0.10 (IMDB)
  3.  
  4. This script is designed for my personal use to gather info on a movie
  5. in the format that I require.
  6.  
  7. It takes an IMDB.com movie URL from the current clipboard
  8. and outputs the poster image and a text file of the info I need.
  9.  
  10. By Steve Shambles Oct 2018. Updated Nov 2019.
  11. https://stevepython.wordpress.com/
  12.  
  13. pip3 install beautifulsoup4
  14. pip3 install requests
  15. pip3 install pyperclip
  16. --------------------------
  17. v0.10-solid url check, nothing on clipboard or non imdb address found.
  18.      made multiplatform by soft coding file locations to cwd
  19.      and using webbrowser to open associated apps for text and image display.
  20. '''
  21. import os
  22. import re
  23. import sys
  24. from tkinter import messagebox, Tk
  25. from urllib.request import urlopen
  26. import webbrowser
  27.  
  28. from bs4 import BeautifulSoup
  29. import pyperclip
  30. import requests
  31.  
  32.  
  33. def url_error():
  34.     messagebox.showerror('Error', 'No IMDB URL found on clipboard.')
  35.     sys.exit()
  36.  
  37. # Stop ugly default tk window appearing if error message box is used.
  38. root = Tk()
  39. root.withdraw()
  40.  
  41. # Get URL of film from clipboard.
  42. imdb_url = pyperclip.paste()
  43.  
  44. # Show error and quit if not imdb address.
  45. if not imdb_url.startswith('https://www.imdb.com/title/'):
  46.     url_error()
  47.  
  48. # Read the html source code from the URL.
  49. imdb_html = urlopen(imdb_url)
  50. btfl_soup = BeautifulSoup(imdb_html.read(), 'lxml')
  51. print('Scraping, URL')
  52.  
  53. # Get the star actors in the film.
  54. try:
  55.     film_actors = btfl_soup.find('table', {'class':'cast_list'})
  56. except:
  57.     pass
  58.  
  59. # Get the summary text of the film.
  60. try:
  61.     film_summary = btfl_soup.find('div', {'class':'summary_text'})
  62. except:
  63.     pass
  64.  
  65. # Get the films imdb rating.
  66. try:
  67.     film_rating = btfl_soup.find('div', {'class':'ratingValue'})
  68. except:
  69.     pass
  70.  
  71. # Find the title, genre, run time, release date, & age rating of the film.
  72. # All that info is contained in the class title_wrapper
  73. try:
  74.     film_info = btfl_soup.find('div', {'class':'title_wrapper'})
  75. except:
  76.     pass
  77.  
  78. # Save all info to a text file in current directory.
  79. # Note: At present this will overwtite previously saved imbd.txt file.
  80. with open('imdb.txt', 'w', encoding='utf-8') as file:
  81. # Check exists before writing or causes crash if None.
  82.     if film_info:
  83.         file.write(film_info.text)
  84.     if film_summary:
  85.         file.write(film_summary.text)
  86.     if film_actors:
  87.         file.write(film_actors.text)
  88.     if film_rating:
  89.         file.write(film_rating.text)
  90.  
  91. # Open the text file for viewing in associated program.
  92. webbrowser.open('imdb.txt')
  93.  
  94. #find the poster image
  95. imdb_soup = btfl_soup.find('div', {'class':'poster'})
  96. cover_img = imdb_soup.find('img', {'src':re.compile('.jpg')})
  97.  
  98. # Grab just the URL from the surrounding tags
  99. # Check to make sure have found something
  100. # before getting link and causing crash if none found
  101. if cover_img:
  102.     cover_img_link = (cover_img['src'])
  103.  
  104. # Save the jpg image from the resulting URL.
  105. # Note: At present this will overwtite previous scrape cover image.
  106. if cover_img_link:
  107.     with open('cover.jpg', 'wb') as handle:
  108.         img_response = requests.get(cover_img_link, stream=True)
  109.         for block in img_response.iter_content(1024):
  110.             if not block:
  111.                 break
  112.             handle.write(block)
  113.  
  114. # Open image with systems viewer
  115. webbrowser.open('cover.jpg')
  116.  
  117. print('Done scraping.')
  118.  
  119. # To do, text output needs cleaning up
  120. # Try to find out the except error names
  121. # save imdb.txt and cover.jpg in films name so not overwrite previous scrape.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement