Advertisement
Guest User

Untitled

a guest
Jan 17th, 2020
340
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.63 KB | None | 0 0
  1. import os, datetime, pickle, time
  2. from robobrowser import RoboBrowser
  3. import pandas as pd
  4. import re
  5. import matplotlib.pyplot as plt
  6. import matplotlib.colors as datacolors
  7. import matplotlib.cm as cm
  8. import numpy as np
  9. from scipy.stats import gaussian_kde
  10. import base64
  11. from io import BytesIO
  12.  
  13. CREDENTIALS = ('username', 'password')
  14.  
  15.  
  16.  
  17.  
  18. ORDERED_SKILLS = [['ID', 'Player', 'Nat', 'Deadline', 'Current Bid'], ['Rating', 'Exp', 'Talents', 'BT'], ['Bat', 'Bowl', 'Keep', 'Field'], ['End', 'Tech', 'Pow']]
  19. SKILL_LEVELS = ['atrocious', 'dreadful', 'poor', 'ordinary', 'average', 'reasonable', 'capable', 'reliable', 'accomplished', 'expert', 'outstanding', 'spectacular', 'exceptional', 'world class', 'elite', 'legendary']
  20.  
  21.  
  22. class Player(): #player class, generates a player object when given a player ID
  23. def __init__(self, ID, market_info=None):
  24. self.class_version = '1.0'
  25. self.ID = ID
  26. self.instancetime = str(int(time.time()))
  27.  
  28. downloaded_page = download_player_page(self.ID)
  29. stats = stats_from_page(downloaded_page)
  30. self.stats = stats
  31.  
  32. if market_info != None:
  33. bid, team_bid = market_info['Current Bid'].split(' ', 1)
  34. deadline = datetime.datetime.strptime(market_info['Deadline'], '%d %b %Y%H:%M')
  35.  
  36. self.stats['Current Bid'] = bid
  37. self.stats['Current Buyer'] = team_bid
  38. self.stats['Deadline'] = deadline
  39.  
  40.  
  41. def stats_from_page(page): #parses html of a players page and returns their stats as a dictionary (the stats are an object already in the website? idk how that works
  42. info = pd.read_html(page) #<<-- reads some structure in the html to get batting/bowling/rating/etc.
  43. infodic = {}
  44. for x in range(0, len(info[0]), 2):
  45. try:
  46. for stat in zip(info[0][x], info[0][x+1]):
  47. infodic[stat[0]] = stat[1]
  48. except:
  49. pass
  50. return infodic
  51.  
  52.  
  53. def download_player_page(ID): #downloads the html of a players page for a given ID
  54. global browser
  55. print('Downloading player ' + str(ID))
  56. browser.open('http://www.fromthepavilion.org/playerpopup.htm?playerId={}&amp;showExtraSkills=true&amp;club=true'.format(ID))
  57. player_info = str(browser.parsed)
  58. return player_info
  59.  
  60.  
  61. def login(credentials): #creates a robobrowser object and logs into ftp with given credentials, required to download other pages html
  62. print('Logging in as {}...'.format(credentials[0]))
  63. browser = RoboBrowser(history=True)
  64. browser.open('http://www.fromthepavilion.org/')
  65. form = browser.get_form(action='securityCheck.htm')
  66.  
  67. form['j_username'] = credentials[0]
  68. form['j_password'] = credentials[1]
  69.  
  70. browser.submit_form(form)
  71. return browser
  72.  
  73.  
  74. def get_current_transfers(pages=[1]): #downloads the first page of transfer listings
  75. global browser
  76. print('Downloading transfer listings...')
  77. browser.open('http://www.fromthepavilion.org/transfer.htm')
  78. form = browser.get_form(action='/transfer.htm')
  79. browser.submit_form(form)
  80. raw_player_info = pd.read_html(str(browser.parsed))[0]
  81. player_data = []
  82. player_ids = re.findall('playerId=\d+&', str(browser.parsed))
  83. player_ids = [t[9:-1] for t in player_ids]
  84. for player in range(len(raw_player_info)):
  85. playerdic = {}
  86. playerdic['ID'] = player_ids[player]
  87. for item in raw_player_info.keys():
  88. playerdic[item] = raw_player_info[item][player]
  89. player_data.append(playerdic)
  90. return player_data
  91.  
  92.  
  93. def save_player_instance(player, database_dir='players/'): #saves an instance of a player
  94. if not os.path.exists(database_dir):
  95. os.mkdir(database_dir)
  96.  
  97. playerdir = database_dir + str(player.ID) + '/'
  98. if not os.path.exists(playerdir):
  99. os.mkdir(playerdir)
  100.  
  101. filename = database_dir + str(player.ID) + '/' + str(player.instancetime)
  102. with open(filename, 'wb') as playerfile:
  103. pickle.dump(player, playerfile)
  104.  
  105. def load_player_instance(ID, instancetime='recent', database_dir='players/'): #loads an instance of a player, defaulting to most recent
  106. playerdir = database_dir + '/' + str(ID) + '/'
  107. if instancetime == 'recent':
  108. instancetime = str(max([int(x) for x in os.listdir(playerdir) if x.isdigit()]))
  109. filename = playerdir + instancetime
  110. with open(filename, 'rb') as playerfile:
  111. player = pickle.load(playerfile)
  112. return player
  113.  
  114.  
  115. def load_players(players, database_dir='players/'): #loads players from a list of IDs or all of them
  116. if players == 'all':
  117. return [load_player_instance(int(player), database_dir=database_dir) for player in os.listdir(database_dir) if player.isdigit()]
  118.  
  119. return [load_player_instance(player, database_dir=database_dir) for player in players]
  120.  
  121.  
  122. def scrape_transfer_market(pages=-1): #downloads first page of transfer market, creates player instances for players on the first page, saves them, and waits until the last player on the page has been sold before repeating (to leave running and download all players that go through the market)
  123. global browser, CREDENTIALS
  124. while pages != 0:
  125. try:
  126. current_page = get_current_transfers()
  127. except:
  128. browser = login(CREDENTIALS)
  129. current_page = get_current_transfers()
  130.  
  131. current_players = [Player(t['ID'], market_info=t) for t in current_page]
  132. for player in current_players:
  133. save_player_instance(player)
  134. print('Saved {} players! {} pages remaining.'.format(len(current_players), pages))
  135.  
  136. time_until_refresh = round((current_players[-1].stats['Deadline'] - datetime.datetime.utcnow()).total_seconds()) + 1
  137. current_time_until_refresh = time_until_refresh
  138. while current_time_until_refresh > 0:
  139. if current_time_until_refresh % 100 == 0:
  140. print('{} seconds remaining until the next page is downloaded.'.format(current_time_until_refresh))
  141. current_time_until_refresh -= 1
  142. time.sleep(1)
  143. pages -= 1
  144.  
  145. def skill_age_graph(players, skill, colors='density', saveas=None, returnas=None): #graphs player data. see examples
  146. plot, ax = plt.subplots()
  147.  
  148. plot.set_size_inches(10, 7.5)
  149. plt.ylabel('{}'.format(skill))
  150. plt.xlabel('Age')
  151.  
  152. if skill in ['Rating', 'Wage', 'Current Bid']:
  153. graph_data = [[int(re.sub("\D", "", x.stats[skill])), int(x.stats['Age'])] for x in players]
  154. levels, age = zip(*graph_data)
  155. levels, age = np.array(levels), np.array(age)
  156.  
  157. plt.ylim(min([int(x) for x in levels]) * 0.9, max([int(x) for x in levels]) * 1.1)
  158. else:
  159. graph_data = [[SKILL_LEVELS.index(x.stats[skill]), int(x.stats['Age'])] for x in players]
  160. levels, age = zip(*graph_data)
  161. levels, age = np.array(levels), np.array(age)
  162.  
  163. plt.ylim(-1, 16)
  164. plt.yticks(np.arange(len(SKILL_LEVELS)), SKILL_LEVELS, rotation=45)
  165.  
  166. max_age = int(max([x[1] for x in graph_data])) + 2
  167. plt.xlim(15, max_age)
  168. plt.xticks(np.arange(16, max_age), range(16, max_age))
  169.  
  170. if colors == 'density':
  171. xy = np.vstack([age,levels]) #calculate point density
  172. z = gaussian_kde(xy)(xy)
  173.  
  174. idx = z.argsort() # sort points by denisty, so the most dense points are plotted last and appear on top
  175. colorvalues = idx
  176. age, levels, z = age[idx], levels[idx], z[idx]
  177.  
  178. colorticks = ['least dense', 'most dense']
  179. ticks = [0, 1]
  180.  
  181. elif colors == 'wage':
  182. colorvalues = [int(re.sub("\D", "", x.stats['Wage'])) for x in players]
  183. largest_value = max(colorvalues)
  184. normalized_wages = datacolors.Normalize(min(colorvalues), max(colorvalues))
  185. z = normalized_wages(colorvalues)
  186.  
  187. elif colors == 'bid':
  188. colorvalues = [int(re.sub("\D", "", x.stats['Current Bid'])) for x in players]
  189. largest_value = max(colorvalues)
  190. quantile = np.quantile(colorvalues, 0.995)
  191. for i in range(len(colorvalues)):
  192. if colorvalues[i] > quantile:
  193. colorvalues[i] = quantile
  194. normalized_bids = datacolors.Normalize(0, max(colorvalues))
  195. z = normalized_bids(colorvalues)
  196.  
  197. if colors in ['wage', 'bid']:
  198. colors = colors + ' percentile' #for colorbar title
  199. percentiles = [20, 40, 60, 80, 99.5]
  200.  
  201.  
  202. colorticks = [str(int(min(colorvalues))) + ' (min)']
  203. colorticks = colorticks + ['{} ({}th percentile)'.format(int(np.percentile(colorvalues, x)), x) for x in percentiles]
  204. colorticks += [str(int(largest_value)) + ' (max)']
  205. ticks = [0] + [x / 100 if x < 90 else 0.95 for x in percentiles] + [1]
  206.  
  207. age, levels, z = zip(*sorted([(age[i], levels[i], z[i]) for i in range(len(z))], key=lambda pair: pair[2]))
  208. points = plt.scatter(age, levels, c=z, s=50, edgecolor='')
  209.  
  210. plt.title('FTP Recent Transfers: {} skill (n = {})'.format(skill, len(players)))
  211. colorbar = plot.colorbar(points, ticks=ticks)
  212. colorbar.ax.set_yticklabels(colorticks)
  213. colorbar.set_label(colors, rotation=270)
  214. plt.tight_layout()
  215. plt.show()
  216.  
  217. if saveas == 'html' or returnas == 'html':
  218. tmpfile = BytesIO()
  219.  
  220. plt.savefig(tmpfile, format='png')
  221. encoded = base64.b64encode(tmpfile.getvalue()).decode('utf-8')
  222.  
  223. html = '<img src=\'data:image/png;base64,{}\'>'.format(encoded)
  224.  
  225. with open('test.html', 'w') as f:
  226. f.write(html)
  227.  
  228. if returnas == 'html':
  229. return html
  230.  
  231.  
  232. #scrape market
  233.  
  234. #browser = login(CREDENTIALS)
  235. #scrape_transfer_market()
  236.  
  237.  
  238. #graph stuff
  239.  
  240. #skill_age_graph(load_players('all'), 'Bat.', colors='density', saveas='html')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement