Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # date: 2025.03.22
- # author: furas
- # [python - I am trying to access the next "page" of a table im scraping data from and unsure how to do it, new to HTML/JavaScrip - Stack Overflow](https://stackoverflow.com/questions/79525775/i-am-trying-to-access-the-next-page-of-a-table-im-scraping-data-from-and-unsur)
- # [Most Goals Scored - Premier League Player Stats](https://www.premierleague.com/stats/top/players/goals)
- # jq -r '.stats.content[] | (.rank|tostring) + "," + .owner.name.display + "," + .owner.currentTeam.name + "," + .owner.nationalTeam.country + "," + (.value|tostring)' page*.json > data.csv
- # https://jqlang.org/
- import sys # to get page number as argument `python main.py 23`
- import requests
- import json # only to format JSON - to make it more readable
- def get_data(page):
- """Read JSON data from server."""
- url = 'https://footballapi.pulselive.com/football/stats/ranked/players/goals'
- payload = {
- 'page': page,
- 'pageSize': 10,
- 'compSeasons': 719,
- 'comps': 1,
- 'compCodeForActivePlayer': 'EN_PR',
- 'altIds': True,
- }
- response = requests.get(url, params=payload)
- data = response.json()
- return data
- def process_data(data):
- """Get data from JSON."""
- for item in data['stats']['content']:
- # jq -r '.stats.content[] | (.rank|tostring) + "," + .owner.name.display + "," + .owner.currentTeam.name + "," + .owner.nationalTeam.country + "," + (.value|tostring)' page* > data.csv
- # https://jqlang.org/
- try:
- rank = int(item['rank']) # convert float to int
- name = item['owner']['name']['display']
- team = item['owner'].get('currentTeam', {'name':'-'})['name'] # some players don't have team
- nationality = item['owner']['nationalTeam']['country']
- goals = int(item['value']) # convert float to int
- print(f'{rank},{name},{team},{nationality},{goals}')
- except Exception as ex:
- print(f'Exception: {ex}')
- print(json.dumps(item, indent=2))
- def main():
- """Main code."""
- start_page = 0
- end_page = 23
- if len(sys.argv) > 1:
- end_page = int(sys.argv[1])
- print(f'Rank,Name,Team,Nationality,Goals') # header for CSV
- for page in range(start_page, end_page+1):
- #print(f'--- {page} ---')
- data = get_data(page) # get JSON data
- #filename = f'test_page_{page:03}.json'
- #print(f'>>> writing JSON ... {filename}')
- #with open(filename, 'w') as f:
- # #text = json.dumps(data, indent=2) # write with indentations to make it more readable
- # #f.write(text)
- # json.dump(data, f, indent=2) # in one line
- process_data(data) #
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement