Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os, datetime, pickle, time
- from robobrowser import RoboBrowser
- import pandas as pd
- import re
- import matplotlib.pyplot as plt
- import matplotlib.colors as datacolors
- import matplotlib.cm as cm
- import numpy as np
- from scipy.stats import gaussian_kde
- import base64
- from io import BytesIO
- CREDENTIALS = ('username', 'password')
- ORDERED_SKILLS = [['ID', 'Player', 'Nat', 'Deadline', 'Current Bid'], ['Rating', 'Exp', 'Talents', 'BT'], ['Bat', 'Bowl', 'Keep', 'Field'], ['End', 'Tech', 'Pow']]
- SKILL_LEVELS = ['atrocious', 'dreadful', 'poor', 'ordinary', 'average', 'reasonable', 'capable', 'reliable', 'accomplished', 'expert', 'outstanding', 'spectacular', 'exceptional', 'world class', 'elite', 'legendary']
- class Player(): #player class, generates a player object when given a player ID
- def __init__(self, ID, market_info=None):
- self.class_version = '1.0'
- self.ID = ID
- self.instancetime = str(int(time.time()))
- downloaded_page = download_player_page(self.ID)
- stats = stats_from_page(downloaded_page)
- self.stats = stats
- if market_info != None:
- bid, team_bid = market_info['Current Bid'].split(' ', 1)
- deadline = datetime.datetime.strptime(market_info['Deadline'], '%d %b %Y%H:%M')
- self.stats['Current Bid'] = bid
- self.stats['Current Buyer'] = team_bid
- self.stats['Deadline'] = deadline
- def stats_from_page(page): #parses html of a players page and returns their stats as a dictionary (the stats are an object already in the website? idk how that works
- info = pd.read_html(page) #<<-- reads some structure in the html to get batting/bowling/rating/etc.
- infodic = {}
- for x in range(0, len(info[0]), 2):
- try:
- for stat in zip(info[0][x], info[0][x+1]):
- infodic[stat[0]] = stat[1]
- except:
- pass
- return infodic
- def download_player_page(ID): #downloads the html of a players page for a given ID
- global browser
- print('Downloading player ' + str(ID))
- browser.open('http://www.fromthepavilion.org/playerpopup.htm?playerId={}&showExtraSkills=true&club=true'.format(ID))
- player_info = str(browser.parsed)
- return player_info
- def login(credentials): #creates a robobrowser object and logs into ftp with given credentials, required to download other pages html
- print('Logging in as {}...'.format(credentials[0]))
- browser = RoboBrowser(history=True)
- browser.open('http://www.fromthepavilion.org/')
- form = browser.get_form(action='securityCheck.htm')
- form['j_username'] = credentials[0]
- form['j_password'] = credentials[1]
- browser.submit_form(form)
- return browser
- def get_current_transfers(pages=[1]): #downloads the first page of transfer listings
- global browser
- print('Downloading transfer listings...')
- browser.open('http://www.fromthepavilion.org/transfer.htm')
- form = browser.get_form(action='/transfer.htm')
- browser.submit_form(form)
- raw_player_info = pd.read_html(str(browser.parsed))[0]
- player_data = []
- player_ids = re.findall('playerId=\d+&', str(browser.parsed))
- player_ids = [t[9:-1] for t in player_ids]
- for player in range(len(raw_player_info)):
- playerdic = {}
- playerdic['ID'] = player_ids[player]
- for item in raw_player_info.keys():
- playerdic[item] = raw_player_info[item][player]
- player_data.append(playerdic)
- return player_data
- def save_player_instance(player, database_dir='players/'): #saves an instance of a player
- if not os.path.exists(database_dir):
- os.mkdir(database_dir)
- playerdir = database_dir + str(player.ID) + '/'
- if not os.path.exists(playerdir):
- os.mkdir(playerdir)
- filename = database_dir + str(player.ID) + '/' + str(player.instancetime)
- with open(filename, 'wb') as playerfile:
- pickle.dump(player, playerfile)
- def load_player_instance(ID, instancetime='recent', database_dir='players/'): #loads an instance of a player, defaulting to most recent
- playerdir = database_dir + '/' + str(ID) + '/'
- if instancetime == 'recent':
- instancetime = str(max([int(x) for x in os.listdir(playerdir) if x.isdigit()]))
- filename = playerdir + instancetime
- with open(filename, 'rb') as playerfile:
- player = pickle.load(playerfile)
- return player
- def load_players(players, database_dir='players/'): #loads players from a list of IDs or all of them
- if players == 'all':
- return [load_player_instance(int(player), database_dir=database_dir) for player in os.listdir(database_dir) if player.isdigit()]
- return [load_player_instance(player, database_dir=database_dir) for player in players]
- def scrape_transfer_market(pages=-1): #downloads first page of transfer market, creates player instances for players on the first page, saves them, and waits until the last player on the page has been sold before repeating (to leave running and download all players that go through the market)
- global browser, CREDENTIALS
- while pages != 0:
- try:
- current_page = get_current_transfers()
- except:
- browser = login(CREDENTIALS)
- current_page = get_current_transfers()
- current_players = [Player(t['ID'], market_info=t) for t in current_page]
- for player in current_players:
- save_player_instance(player)
- print('Saved {} players! {} pages remaining.'.format(len(current_players), pages))
- time_until_refresh = round((current_players[-1].stats['Deadline'] - datetime.datetime.utcnow()).total_seconds()) + 1
- current_time_until_refresh = time_until_refresh
- while current_time_until_refresh > 0:
- if current_time_until_refresh % 100 == 0:
- print('{} seconds remaining until the next page is downloaded.'.format(current_time_until_refresh))
- current_time_until_refresh -= 1
- time.sleep(1)
- pages -= 1
- def skill_age_graph(players, skill, colors='density', saveas=None, returnas=None): #graphs player data. see examples
- plot, ax = plt.subplots()
- plot.set_size_inches(10, 7.5)
- plt.ylabel('{}'.format(skill))
- plt.xlabel('Age')
- if skill in ['Rating', 'Wage', 'Current Bid']:
- graph_data = [[int(re.sub("\D", "", x.stats[skill])), int(x.stats['Age'])] for x in players]
- levels, age = zip(*graph_data)
- levels, age = np.array(levels), np.array(age)
- plt.ylim(min([int(x) for x in levels]) * 0.9, max([int(x) for x in levels]) * 1.1)
- else:
- graph_data = [[SKILL_LEVELS.index(x.stats[skill]), int(x.stats['Age'])] for x in players]
- levels, age = zip(*graph_data)
- levels, age = np.array(levels), np.array(age)
- plt.ylim(-1, 16)
- plt.yticks(np.arange(len(SKILL_LEVELS)), SKILL_LEVELS, rotation=45)
- max_age = int(max([x[1] for x in graph_data])) + 2
- plt.xlim(15, max_age)
- plt.xticks(np.arange(16, max_age), range(16, max_age))
- if colors == 'density':
- xy = np.vstack([age,levels]) #calculate point density
- z = gaussian_kde(xy)(xy)
- idx = z.argsort() # sort points by denisty, so the most dense points are plotted last and appear on top
- colorvalues = idx
- age, levels, z = age[idx], levels[idx], z[idx]
- colorticks = ['least dense', 'most dense']
- ticks = [0, 1]
- elif colors == 'wage':
- colorvalues = [int(re.sub("\D", "", x.stats['Wage'])) for x in players]
- largest_value = max(colorvalues)
- normalized_wages = datacolors.Normalize(min(colorvalues), max(colorvalues))
- z = normalized_wages(colorvalues)
- elif colors == 'bid':
- colorvalues = [int(re.sub("\D", "", x.stats['Current Bid'])) for x in players]
- largest_value = max(colorvalues)
- quantile = np.quantile(colorvalues, 0.995)
- for i in range(len(colorvalues)):
- if colorvalues[i] > quantile:
- colorvalues[i] = quantile
- normalized_bids = datacolors.Normalize(0, max(colorvalues))
- z = normalized_bids(colorvalues)
- if colors in ['wage', 'bid']:
- colors = colors + ' percentile' #for colorbar title
- percentiles = [20, 40, 60, 80, 99.5]
- colorticks = [str(int(min(colorvalues))) + ' (min)']
- colorticks = colorticks + ['{} ({}th percentile)'.format(int(np.percentile(colorvalues, x)), x) for x in percentiles]
- colorticks += [str(int(largest_value)) + ' (max)']
- ticks = [0] + [x / 100 if x < 90 else 0.95 for x in percentiles] + [1]
- age, levels, z = zip(*sorted([(age[i], levels[i], z[i]) for i in range(len(z))], key=lambda pair: pair[2]))
- points = plt.scatter(age, levels, c=z, s=50, edgecolor='')
- plt.title('FTP Recent Transfers: {} skill (n = {})'.format(skill, len(players)))
- colorbar = plot.colorbar(points, ticks=ticks)
- colorbar.ax.set_yticklabels(colorticks)
- colorbar.set_label(colors, rotation=270)
- plt.tight_layout()
- plt.show()
- if saveas == 'html' or returnas == 'html':
- tmpfile = BytesIO()
- plt.savefig(tmpfile, format='png')
- encoded = base64.b64encode(tmpfile.getvalue()).decode('utf-8')
- html = '<img src=\'data:image/png;base64,{}\'>'.format(encoded)
- with open('test.html', 'w') as f:
- f.write(html)
- if returnas == 'html':
- return html
- #scrape market
- #browser = login(CREDENTIALS)
- #scrape_transfer_market()
- #graph stuff
- #skill_age_graph(load_players('all'), 'Bat.', colors='density', saveas='html')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement