Advertisement
Guest User

Untitled

a guest
Nov 30th, 2015
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.62 KB | None | 0 0
  1. import requests as req
  2. import lxml.html
  3. import re
  4. import json
  5. import sys
  6. from copy import copy
  7.  
  8. class MPG():
  9.     def __init__(self, ligue_mdp):
  10.         self.ligue_mdp = ligue_mdp
  11.         self.cookies = None
  12.         self.session = req.Session()
  13.         self.mpg_url = 'http://www.monpetitgazon.com'
  14.         self.mpg_all_url = self.mpg_url + '/{page}.php?mdp=' + self.ligue_mdp
  15.  
  16.     def login(self, email, pwd):
  17.         self.cookies = self.session.get(self.mpg_url).cookies
  18.         print(*self.cookies.itervalues())
  19.         params = {'connemail': email,
  20.                   'connpass': pwd,
  21.                   'persistent':'1',
  22.                   'typeForm':'conn'}
  23.         a = self.session.post(self.mpg_url, data=params)
  24.         print(a.cookies)
  25.  
  26.         #print(self.session.get("http://www.monpetitgazon.com/?redirect=").text)
  27.  
  28.     def get_classement(self):
  29.         class_url = self.mpg_all_url.format(page="classement")
  30.         classement = self.session.get(class_url)
  31.         classement.raise_for_status()
  32.         tree = lxml.html.fromstring(classement.text)
  33.         #print(classement.text)
  34.         nodes_list = tree.xpath('//tr[td/div[@class="equipe"]/b/a]')
  35.         for node in nodes_list:
  36.             d={}
  37.             name = node.xpath('td/div/b/a')[0].text
  38.             d['name'] = name
  39.             col = node.xpath('td')
  40.             d['rank'] = int(col[0].text)
  41.             d['points'] = int(col[2].text)
  42.             d['played'] = int(col[3].text)
  43.             d['won'] = int(col[4].text)
  44.             d['null'] = int(col[5].text)
  45.             d['lost'] = int(col[6].text)
  46.             d['for'] = int(col[7].text)
  47.             d['against'] = int(col[8].text)
  48.             d['diff'] = int(col[9].text)
  49.             yield d
  50.  
  51.     def get_matchs_actuels(self):
  52.         day_url = self.mpg_all_url.format(page="calendrier-resultat")
  53.         yield from self.get_matchs_url(day_url)
  54.  
  55.     def get_matchs(self, day):
  56.         day_url = self.mpg_all_url.format(page="calendrier-resultat")
  57.         day_url += '&num=' + str(day)
  58.         yield from self.get_matchs_url(day_url)
  59.  
  60.     def get_matchs_url(self, day_url):
  61.         print(day_url)
  62.         day = self.session.get(day_url)
  63.         day.raise_for_status()
  64.         tree = lxml.html.fromstring(day.text)
  65.         #print(day.text)
  66.         nodes_list = tree.xpath('//tr[td[@class="equipeDom"]/b/a]')
  67.         nodes_list2 = tree.xpath('//tr/td[@class="equipeDom"]/b/a')
  68.         for node in nodes_list:
  69.             d={}
  70.             d['domicile'] = node.xpath('td[@class="equipeDom"]/b/a')[0].text
  71.             try:
  72.                 d['score'] = node.xpath('td[@class="score"]/a')[0].text
  73.             except IndexError:
  74.                 d['score'] = node.xpath('td[@class="score"]')[0].text
  75.  
  76.             d['extérieur'] = node.xpath('td[@class="equipeExt"]/b/a')[0].text
  77.             yield d
  78.  
  79.     def get_notes(self, match_id):
  80.         match_url = "{base}/DetailMatchChampionnat2.php?idmatch={id}"
  81.         match_url = match_url.format(base=self.mpg_url, id=match_id)
  82.         m_page = req.get(match_url)
  83.         m_page.encoding = 'UTF-8'
  84.         tree = lxml.html.fromstring(m_page.text)
  85.         affiche = tree.xpath('//title')[0].text
  86.         teams = [team.strip()
  87.                  for team in
  88.                  re.sub(r".* ([\w-]+ - [\w-]+$)",r"\1",affiche).split("-")]
  89.         for t in teams:
  90.             if "notes" in t:
  91.                 print(affiche)
  92.         home_gen = self.gen_team_from_tree(tree)
  93.         yield teams[0], home_gen
  94.         away_gen = self.gen_team_from_tree(tree, home=False)
  95.         if match_id =="805433":
  96.             print(*self.gen_team_from_tree(tree, home=False))
  97.  
  98.  
  99.         yield teams[1], away_gen
  100.  
  101.     def gen_team_from_tree(self, tree, home=True):
  102.         team_class = "teamhome" if home else "teamaway"
  103.         path = '//div/div[@class="{team}"]/div/div'.format(team=team_class)
  104.         nodes_home = tree.xpath(path)
  105.         for p_node in nodes_home:
  106.             player_info = {}
  107.             p_id = p_node.get('id')
  108.             p_mark = p_node.xpath('div[@class="note"]/p')[0].text
  109.             p_name = p_node.xpath('p')[0].text
  110.             p_buts = len(p_node.xpath('div[@class="but"]/img'))
  111.             if p_mark:
  112.                 p_mark = int(p_mark)
  113.                 player_info = {
  114.                                 'id' : p_id,
  115.                                 'mark' : p_mark,
  116.                                 'name' : p_name,
  117.                                 'buts' : p_buts
  118.                               }
  119.                 yield player_info
  120.  
  121.     def crawl_days(self):
  122.         t_dict = {}
  123.         postes = dict(self.get_players_url_and_position())
  124.         for i in range(1, 39):
  125.             for team, gen_p in self.crawl_matches_of_day(i):
  126.                 for p in gen_p:
  127.                     p_id = p["id"]
  128.                     name = p["name"]
  129.                     if "Traoré" in name:
  130.                         print(team, p)
  131.                     if name in t_dict:
  132.                         t_dict[name]["marks"].append(p["mark"])
  133.                         t_dict[name]["buts"] += p["buts"]
  134.                         continue
  135.                     poste = postes[name] if name in postes else "unknown"
  136.                     t_dict[name] = {'marks' : [p["mark"]],
  137.                                     'team'  : team,
  138.                                     'poste' : poste,
  139.                                     'buts' : p['buts']
  140.                                    }
  141.         return t_dict
  142.  
  143.     def crawl_matches_of_day(self, day):
  144.         day_global_url = "http://www.monpetitgazon.com/calendrier-resultat-championnat.php?num={day}"
  145.         day_url = day_global_url.format(day=day)
  146.         day = req.get(day_url)
  147.         day.raise_for_status()
  148.         tree = lxml.html.fromstring(day.text)
  149.         matches_nodes = tree.xpath('//td[@class="score"]/a')
  150.         for m in matches_nodes:
  151.             m_id = re.sub(r"^.*idmatch=","",m.get('href'))
  152.             yield from self.get_notes(m_id)
  153.  
  154.     def get_players_url_and_position(self):
  155.         blog_url = "http://blog.monpetitgazon.com/"
  156.         players = req.get(blog_url + "team" )
  157.         players.encoding = 'UTF-8'
  158.         players.raise_for_status()
  159.         tree = lxml.html.fromstring(players.text)
  160.         page_nodes = tree.xpath('//ul[@class="sub-menu"]/li/a')
  161.         yield from self.get_players_url_and_position_from_url(blog_url + "team")
  162.         for page in page_nodes:
  163.             page_url = blog_url + page.get("href")
  164.             yield from self.get_players_url_and_position_from_url(page_url)
  165.  
  166.     def get_players_url_and_position_from_url(self, url):
  167.         players = req.get(url)
  168.         players.encoding = 'UTF-8'
  169.         players.raise_for_status()
  170.         tree = lxml.html.fromstring(players.text)
  171.         player_nodes = tree.xpath('//figcaption')
  172.         for player in player_nodes:
  173.             name = player.xpath('h2/a')[0].text
  174.             position = player.xpath('h6/a')[0].text
  175.             yield name, position
  176.  
  177.  
  178.  
  179.     def do_load(self):
  180.         try:
  181.             with open("mpg.json","r") as d_json:
  182.                 self.mega_d = json.load(d_json)
  183.         except:
  184.             self.mega_d = self.crawl_days()
  185.             with open("mpg.json","w") as d_json:
  186.                 json.dump(self.mega_d, d_json)
  187.  
  188.     def query(self, poste, matches=0, last=0, buts=0, team=""):
  189.         d = self.mega_d
  190.         for player in self.mega_d:
  191.             if poste and d[player]["poste"] != poste:
  192.                 continue
  193.             if len(d[player]["marks"]) < last :
  194.                 continue
  195.             if len(d[player]["marks"]) < matches:
  196.                 continue
  197.             if d[player]["buts"] < buts:
  198.                 continue
  199.             if team and team != d[player]["team"]:
  200.                 continue
  201.             p = copy(d[player])
  202.             p["marks"] = p["marks"][-last:]
  203.             yield player, p
  204.  
  205.     def print_category(self, category, matches=0, last=0, top=0, buts=0, ratio=False, of=sys.stdout, team=""):
  206.         players = dict(mpg.query(category, matches, last, buts=buts, team=team))
  207.         column = max(len(n) for n in players.keys())
  208.         column2 = max(len(v["team"])for v in players.values())
  209.         if ratio:
  210.             criteria = lambda x : x["buts"]/len(x["marks"])
  211.         else :
  212.             criteria = lambda x : sum(x["marks"])/len(x["marks"])
  213.         reverse_dict = [(criteria(info),
  214.                         (name, info["team"], len(info["marks"]), info["buts"]))
  215.                         for name, info in players.items()]
  216.  
  217.         if not top:
  218.             top=len(reverse_dict)
  219.  
  220.         count = 0
  221.  
  222.         for k in reversed(sorted(reverse_dict, key=lambda x:x[0])):
  223.             if count == top:
  224.                 break
  225.             count = count + 1
  226.             out1 = k[1][0] + ' '*(column - len(k[1][0]))
  227.             out2 = k[1][1] + ' '*(column2 - len(k[1][1]))
  228.             print(count,
  229.                   out1,
  230.                   out2,
  231.                   k[1][2],
  232.                   k[1][3],
  233.                   k[0],
  234.                   sep='\t',
  235.                   file=of)
  236.  
  237.  
  238.  
  239.  
  240.  
  241. if __name__=='__main__':
  242.     mpg = MPG('1zzAqTvvnb')
  243.     mpg.do_load()
  244.     print(*mpg.get_matchs_actuels())
  245.     d = mpg.mega_d
  246.     joueurs = ["Rodrigue Ninga", "Ismael Traoré", "Pujol"]
  247.     for j in joueurs:
  248.         print(j, d[j])
  249.     #print(sum(d["Dja Djédjé"]['marks'])/len(d["Dja Djédjé"]['marks']))
  250.     #print("="*80)
  251.     #mpg.print_category("Milieu", matches=6, buts=0)
  252.     #print("="*80)
  253.     #mpg.print_category("Milieu", matches=6)
  254.     print("="*80)
  255.     #mpg.print_category("Defenseur", matches=6)
  256.     #print("="*80)
  257.     mpg.print_category("Gardien", matches=6, last=0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement