Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- movielinkArray = []
- titleArray = []
- combineArray = []
- reviewsArray=[]
- movielink2Array = []
- idArray=[]
- id=''
- link=''
- def start():
- for id in range(11):
- link='https://tw.movies.yahoo.com/movie_intheaters.html?p='+str(id+1)
- res = requests.get(link)
- soup = BeautifulSoup(res.text, 'html.parser')
- print("第"+str(id+1)+"頁========================================")
- for link in soup.find_all('div',class_="row-container"):
- titleArray.append(link.find('h4').text)
- movielinkArray.append(link.find('h4').find('a').get('href'))
- #print(link.find('h4').find('a').get('href'))
- firstlevel(movielinkArray)
- def firstlevel(movielinkArray):
- link=''.join(movielinkArray)
- res = requests.get(link)
- soup = BeautifulSoup(res.text, 'html.parser')
- for link in soup.find_all('li',class_="last"):
- print("https://tw.movies.yahoo.com"+link.find('span').find('a').get('href'))
- movielink2Array.append("https://tw.movies.yahoo.com"+link.find('span').find('a').get('href'))
- if __name__=='__main__':
- start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement