Advertisement
furas

Python - requests - premierleague.com - (Stackoverflow)

Mar 22nd, 2025 (edited)
295
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.81 KB | None | 0 0
  1. # date: 2025.03.22
  2. # author: furas
  3.  
  4. # [python - I am trying to access the next "page" of a table im scraping data from and unsure how to do it, new to HTML/JavaScrip - Stack Overflow](https://stackoverflow.com/questions/79525775/i-am-trying-to-access-the-next-page-of-a-table-im-scraping-data-from-and-unsur)
  5.  
  6. # [Most Goals Scored - Premier League Player Stats](https://www.premierleague.com/stats/top/players/goals)
  7.  
  8. # jq -r '.stats.content[] | (.rank|tostring) + "," + .owner.name.display + "," + .owner.currentTeam.name + "," + .owner.nationalTeam.country + "," + (.value|tostring)' page*.json > data.csv
  9. # https://jqlang.org/
  10.  
  11. import sys  # to get page number as argument `python main.py 23`
  12. import requests
  13. import json  # only to format JSON - to make it more readable
  14.  
  15.  
  16. def get_data(page):
  17.     """Read JSON data from server."""
  18.  
  19.     url = 'https://footballapi.pulselive.com/football/stats/ranked/players/goals'
  20.  
  21.     payload = {
  22.         'page': page,
  23.         'pageSize': 10,
  24.         'compSeasons': 719,
  25.         'comps': 1,
  26.         'compCodeForActivePlayer': 'EN_PR',
  27.         'altIds': True,
  28.     }
  29.  
  30.     response = requests.get(url, params=payload)
  31.  
  32.     data = response.json()
  33.  
  34.     return data
  35.  
  36.  
  37. def process_data(data):
  38.     """Get data from JSON."""
  39.  
  40.     for item in data['stats']['content']:
  41.         # jq -r '.stats.content[] | (.rank|tostring) + "," + .owner.name.display + "," + .owner.currentTeam.name + "," + .owner.nationalTeam.country + "," + (.value|tostring)' page* > data.csv
  42.         # https://jqlang.org/
  43.  
  44.         try:
  45.             rank = int(item['rank'])  # convert float to int
  46.             name = item['owner']['name']['display']
  47.             team = item['owner'].get('currentTeam', {'name':'-'})['name']  # some players don't have team
  48.             nationality = item['owner']['nationalTeam']['country']
  49.             goals = int(item['value'])  # convert float to int
  50.             print(f'{rank},{name},{team},{nationality},{goals}')
  51.         except Exception as ex:
  52.             print(f'Exception: {ex}')
  53.             print(json.dumps(item, indent=2))
  54.  
  55.  
  56. def main():
  57.     """Main code."""
  58.  
  59.     start_page = 0
  60.     end_page = 23
  61.  
  62.     if len(sys.argv) > 1:
  63.         end_page = int(sys.argv[1])
  64.  
  65.     print(f'Rank,Name,Team,Nationality,Goals')  # header for CSV
  66.  
  67.     for page in range(start_page, end_page+1):
  68.  
  69.         #print(f'--- {page} ---')
  70.  
  71.         data = get_data(page)  # get JSON data
  72.  
  73.         #filename = f'test_page_{page:03}.json'
  74.         #print(f'>>> writing JSON ... {filename}')
  75.         #with open(filename, 'w') as f:
  76.         #    #text = json.dumps(data, indent=2)  # write with indentations to make it more readable
  77.         #    #f.write(text)
  78.         #    json.dump(data, f, indent=2)       # in one line
  79.  
  80.         process_data(data)  #
  81.  
  82. if __name__ == '__main__':
  83.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement