Advertisement
Guest User

Untitled

a guest
Mar 26th, 2019
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.84 KB | None | 0 0
  1. def parse_base_url():
  2. # Use requests.get to get the informtaion from the web page and store in the variable page
  3. page = requests.get(base_url)
  4. # if the status_code is 200 then the request was made successfully
  5. if page.status_code == 200:
  6. # BeautifulSoup gets the text from the page and lxml parses the information.
  7. bs = BeautifulSoup(page.text, 'lxml')
  8.  
  9. # Get previous 10 players of the year using the class 'multicol', and the links from the unordered list.
  10. last_ten_players = bs.find('table', class_='multicol').find('ul').find_all('li')[:-11:-1][::-1]
  11.  
  12. # Create the names of the data columns for pandas and assign each column an empty list.
  13. data = {
  14. 'Year': [],
  15. 'Country': [],
  16. 'Player': [],
  17. 'Team': [],
  18. }
  19. # for loop to iterate over and parse the data we want from the 10 players
  20. for parser in last_ten_players:
  21.  
  22. year = parser.find('span').previousSibling.split()[0]
  23. # If there is a year append the year to the 'Year' list.
  24. if year:
  25. data['Year'].append(year)
  26. # else add the word 'none' to the 'Year' column
  27. else:
  28. data['Year'].append('none')
  29. # find all the anchor tags <a> and return the 1st <a> tag at index 0
  30. country = parser.findAll('a')[0]['title']
  31. # If there is a country append the country to the 'Country' list.
  32. if country:
  33. data['Country'].append(country)
  34. # else add the word 'none' to the 'Country' column
  35. else:
  36. data['Country'].append('none')
  37. # find all the anchor tags <a> and return the 2nd <a> tag at index 1
  38. player = parser.findAll('a')[1]
  39. # If there is a player append the player to the 'Player' list.
  40. if player:
  41. data['Player'].append(player.text)
  42. # else add the word 'none' to the 'Player' column
  43. else:
  44. data['Player'].append('none')
  45. # find all the anchor tags <a> and return the 3rd <a> tag at index 2
  46. team = parser.findAll('a')[2]
  47. # If there is a year team append the team to the 'Team' list.
  48. if team:
  49. data['Team'].append(team.text)
  50. # else add the word 'none' to the 'Team' column
  51. else:
  52. data['Team'].append('none')
  53.  
  54. # Store data to dataframe using pandas with the following names for the columns.
  55. df = pd.DataFrame(data, columns=['Year','Country','Player', 'Team'])
  56. # Start the index at 1 instead of 0
  57. df.index = df.index + 1
  58. # print dataframe
  59. print(df)
  60. # store and save the data in the dataframe to a csv file
  61. df.to_csv('players_of_the_year.csv', sep=',', index=False, encoding='utf-8')
  62.  
  63. parse_base_url()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement