Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests as req
- import lxml.html
- import re
- import json
- import sys
- from copy import copy
- class MPG():
- def __init__(self, ligue_mdp):
- self.ligue_mdp = ligue_mdp
- self.cookies = None
- self.session = req.Session()
- self.mpg_url = 'http://www.monpetitgazon.com'
- self.mpg_all_url = self.mpg_url + '/{page}.php?mdp=' + self.ligue_mdp
- def login(self, email, pwd):
- self.cookies = self.session.get(self.mpg_url).cookies
- print(*self.cookies.itervalues())
- params = {'connemail': email,
- 'connpass': pwd,
- 'persistent':'1',
- 'typeForm':'conn'}
- a = self.session.post(self.mpg_url, data=params)
- print(a.cookies)
- #print(self.session.get("http://www.monpetitgazon.com/?redirect=").text)
- def get_classement(self):
- class_url = self.mpg_all_url.format(page="classement")
- classement = self.session.get(class_url)
- classement.raise_for_status()
- tree = lxml.html.fromstring(classement.text)
- #print(classement.text)
- nodes_list = tree.xpath('//tr[td/div[@class="equipe"]/b/a]')
- for node in nodes_list:
- d={}
- name = node.xpath('td/div/b/a')[0].text
- d['name'] = name
- col = node.xpath('td')
- d['rank'] = int(col[0].text)
- d['points'] = int(col[2].text)
- d['played'] = int(col[3].text)
- d['won'] = int(col[4].text)
- d['null'] = int(col[5].text)
- d['lost'] = int(col[6].text)
- d['for'] = int(col[7].text)
- d['against'] = int(col[8].text)
- d['diff'] = int(col[9].text)
- yield d
- def get_matchs_actuels(self):
- day_url = self.mpg_all_url.format(page="calendrier-resultat")
- yield from self.get_matchs_url(day_url)
- def get_matchs(self, day):
- day_url = self.mpg_all_url.format(page="calendrier-resultat")
- day_url += '&num=' + str(day)
- yield from self.get_matchs_url(day_url)
- def get_matchs_url(self, day_url):
- print(day_url)
- day = self.session.get(day_url)
- day.raise_for_status()
- tree = lxml.html.fromstring(day.text)
- #print(day.text)
- nodes_list = tree.xpath('//tr[td[@class="equipeDom"]/b/a]')
- nodes_list2 = tree.xpath('//tr/td[@class="equipeDom"]/b/a')
- for node in nodes_list:
- d={}
- d['domicile'] = node.xpath('td[@class="equipeDom"]/b/a')[0].text
- try:
- d['score'] = node.xpath('td[@class="score"]/a')[0].text
- except IndexError:
- d['score'] = node.xpath('td[@class="score"]')[0].text
- d['extérieur'] = node.xpath('td[@class="equipeExt"]/b/a')[0].text
- yield d
- def get_notes(self, match_id):
- match_url = "{base}/DetailMatchChampionnat2.php?idmatch={id}"
- match_url = match_url.format(base=self.mpg_url, id=match_id)
- m_page = req.get(match_url)
- m_page.encoding = 'UTF-8'
- tree = lxml.html.fromstring(m_page.text)
- affiche = tree.xpath('//title')[0].text
- teams = [team.strip()
- for team in
- re.sub(r".* ([\w-]+ - [\w-]+$)",r"\1",affiche).split("-")]
- for t in teams:
- if "notes" in t:
- print(affiche)
- home_gen = self.gen_team_from_tree(tree)
- yield teams[0], home_gen
- away_gen = self.gen_team_from_tree(tree, home=False)
- if match_id =="805433":
- print(*self.gen_team_from_tree(tree, home=False))
- yield teams[1], away_gen
- def gen_team_from_tree(self, tree, home=True):
- team_class = "teamhome" if home else "teamaway"
- path = '//div/div[@class="{team}"]/div/div'.format(team=team_class)
- nodes_home = tree.xpath(path)
- for p_node in nodes_home:
- player_info = {}
- p_id = p_node.get('id')
- p_mark = p_node.xpath('div[@class="note"]/p')[0].text
- p_name = p_node.xpath('p')[0].text
- p_buts = len(p_node.xpath('div[@class="but"]/img'))
- if p_mark:
- p_mark = int(p_mark)
- player_info = {
- 'id' : p_id,
- 'mark' : p_mark,
- 'name' : p_name,
- 'buts' : p_buts
- }
- yield player_info
- def crawl_days(self):
- t_dict = {}
- postes = dict(self.get_players_url_and_position())
- for i in range(1, 39):
- for team, gen_p in self.crawl_matches_of_day(i):
- for p in gen_p:
- p_id = p["id"]
- name = p["name"]
- if "Traoré" in name:
- print(team, p)
- if name in t_dict:
- t_dict[name]["marks"].append(p["mark"])
- t_dict[name]["buts"] += p["buts"]
- continue
- poste = postes[name] if name in postes else "unknown"
- t_dict[name] = {'marks' : [p["mark"]],
- 'team' : team,
- 'poste' : poste,
- 'buts' : p['buts']
- }
- return t_dict
- def crawl_matches_of_day(self, day):
- day_global_url = "http://www.monpetitgazon.com/calendrier-resultat-championnat.php?num={day}"
- day_url = day_global_url.format(day=day)
- day = req.get(day_url)
- day.raise_for_status()
- tree = lxml.html.fromstring(day.text)
- matches_nodes = tree.xpath('//td[@class="score"]/a')
- for m in matches_nodes:
- m_id = re.sub(r"^.*idmatch=","",m.get('href'))
- yield from self.get_notes(m_id)
- def get_players_url_and_position(self):
- blog_url = "http://blog.monpetitgazon.com/"
- players = req.get(blog_url + "team" )
- players.encoding = 'UTF-8'
- players.raise_for_status()
- tree = lxml.html.fromstring(players.text)
- page_nodes = tree.xpath('//ul[@class="sub-menu"]/li/a')
- yield from self.get_players_url_and_position_from_url(blog_url + "team")
- for page in page_nodes:
- page_url = blog_url + page.get("href")
- yield from self.get_players_url_and_position_from_url(page_url)
- def get_players_url_and_position_from_url(self, url):
- players = req.get(url)
- players.encoding = 'UTF-8'
- players.raise_for_status()
- tree = lxml.html.fromstring(players.text)
- player_nodes = tree.xpath('//figcaption')
- for player in player_nodes:
- name = player.xpath('h2/a')[0].text
- position = player.xpath('h6/a')[0].text
- yield name, position
- def do_load(self):
- try:
- with open("mpg.json","r") as d_json:
- self.mega_d = json.load(d_json)
- except:
- self.mega_d = self.crawl_days()
- with open("mpg.json","w") as d_json:
- json.dump(self.mega_d, d_json)
- def query(self, poste, matches=0, last=0, buts=0, team=""):
- d = self.mega_d
- for player in self.mega_d:
- if poste and d[player]["poste"] != poste:
- continue
- if len(d[player]["marks"]) < last :
- continue
- if len(d[player]["marks"]) < matches:
- continue
- if d[player]["buts"] < buts:
- continue
- if team and team != d[player]["team"]:
- continue
- p = copy(d[player])
- p["marks"] = p["marks"][-last:]
- yield player, p
- def print_category(self, category, matches=0, last=0, top=0, buts=0, ratio=False, of=sys.stdout, team=""):
- players = dict(mpg.query(category, matches, last, buts=buts, team=team))
- column = max(len(n) for n in players.keys())
- column2 = max(len(v["team"])for v in players.values())
- if ratio:
- criteria = lambda x : x["buts"]/len(x["marks"])
- else :
- criteria = lambda x : sum(x["marks"])/len(x["marks"])
- reverse_dict = [(criteria(info),
- (name, info["team"], len(info["marks"]), info["buts"]))
- for name, info in players.items()]
- if not top:
- top=len(reverse_dict)
- count = 0
- for k in reversed(sorted(reverse_dict, key=lambda x:x[0])):
- if count == top:
- break
- count = count + 1
- out1 = k[1][0] + ' '*(column - len(k[1][0]))
- out2 = k[1][1] + ' '*(column2 - len(k[1][1]))
- print(count,
- out1,
- out2,
- k[1][2],
- k[1][3],
- k[0],
- sep='\t',
- file=of)
- if __name__=='__main__':
- mpg = MPG('1zzAqTvvnb')
- mpg.do_load()
- print(*mpg.get_matchs_actuels())
- d = mpg.mega_d
- joueurs = ["Rodrigue Ninga", "Ismael Traoré", "Pujol"]
- for j in joueurs:
- print(j, d[j])
- #print(sum(d["Dja Djédjé"]['marks'])/len(d["Dja Djédjé"]['marks']))
- #print("="*80)
- #mpg.print_category("Milieu", matches=6, buts=0)
- #print("="*80)
- #mpg.print_category("Milieu", matches=6)
- print("="*80)
- #mpg.print_category("Defenseur", matches=6)
- #print("="*80)
- mpg.print_category("Gardien", matches=6, last=0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement