Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from bs4 import BeautifulSoup as bs
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.common.exceptions import TimeoutException, NoSuchElementException, UnexpectedAlertPresentException, WebDriverException
- from selenium.webdriver.chrome.options import Options
- import time
- count_1 = 0
- count_2 = 0
- opts = Options()
- opts.add_argument("my-user-agent")
- driver = webdriver.Chrome('/Users/MyName/bin/chromedriver', chrome_options=opts)
- driver.wait = WebDriverWait(driver, 15)
- res = ''
- page = driver.get('https://www.springfieldspringfield.co.uk/episode_scripts.php?tv-show=the-secret-life-of-the-american-teenager')
- soup = bs(driver.page_source, 'html.parser')
- episodes = soup.find_all('a', class_= 'season-episode-title')
- per_ep = []
- print (1198/len(episodes))
- for i in episodes:
- count = 0
- driver.get('https://www.springfieldspringfield.co.uk/'+i['href'])
- soup = bs(driver.page_source, 'html.parser')
- container = soup.find('div',class_='scrolling-script-container')
- for j in container.text.split(' '):
- if j == 'sex':
- count += 1
- count_1 += 1
- if j == 'the':
- count_2 += 2
- per_ep.append((count,episodes.index(i)))
- print (count_1)
- print (count_2)
- driver.back()
- print (count_1/count_2)
- counts = []
- for i in per_ep:
- counts.append(i[0])
- for i in per_ep:
- if i[0] == max(counts):
- print (i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement