Advertisement
Guest User

SoccerStatsApi.py

a guest
Oct 24th, 2019
137
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.11 KB | None | 0 0
  1. #!/usr/bin/ python
  2. # -*- coding: UTF-8 -*-
  3. import requests
  4. import browser_cookie3
  5. from bs4 import BeautifulSoup
  6. import lxml.html as lh
  7. from datetime import datetime
  8. import pymysql
  9.  
  10.  
  11. class SoccerStatsApi:
  12.     def __init__(self, home_url):
  13.         """ api initialization
  14.        """
  15.         self.home_url = home_url
  16.         self.g_data = list()
  17.         self.page_data = None
  18.         self.matches = list()
  19.         self.league_name = None
  20.         self.home_name = None
  21.         self.away_name = None
  22.         self.stats_url = None
  23.         self.tree = None
  24.         self.table_id = None
  25.         self.inquiry_day = datetime.now().strftime("%Y-%m-%d")
  26.         self.inquiry_time = datetime.now().strftime("%H:%M:%S")
  27.         self.total_number_of_matches = None
  28.         self.number_of_matches_with_stat = None
  29.         self.main_id = None
  30.  
  31.     def page_load(self):
  32.         r = requests.get(self.home_url, cookies=browser_cookie3.load())
  33.         self.g_data = BeautifulSoup(r.content, 'lxml').find_all("tr", {"class": "trow8"})
  34.  
  35.     def get_matches_list(self):
  36.         """ upload the current daily match list
  37.        """
  38.         self.matches.clear()
  39.         self.total_number_of_matches = len(self.g_data)
  40.         for item in self.g_data:
  41.             stat_link = SoccerStatsApi.get_stats_link(self, item)
  42.             if stat_link != b'':
  43.                 current_match = list()
  44.                 current_match.append(item.contents[0].contents[2].text.strip())     # 0 - start
  45.                 current_match.append(item.contents[2].text)                         # 1 - country
  46.                 current_match.append(item.contents[3].text.strip())                 # 2 - home_name
  47.                 current_match.append(item.contents[6].text.strip())                 # 3 - away_name
  48.                 current_match.append(stat_link)                                     # 4 - stats link
  49.  
  50.                 try:
  51.                     self.matches.append(current_match)
  52.                 except:
  53.                     """ continue in case of failure """
  54.                     pass
  55.  
  56.         self.number_of_matches_with_stat = len(self.matches)
  57.         return self.matches
  58.  
  59.     def get_stats_link(self, act_data):
  60.         self.stats_url = ''
  61.         field_list = (7, 8, 9)
  62.         for field in field_list:
  63.             data = act_data.contents[field]
  64.             if len(data) != 0:
  65.                 if data.contents[0].text == 'stats':
  66.                     self.stats_url = 'http://www.soccerstats.com/' + data.contents[0].attrs['href']
  67.                     # result = table_content(stat_url.encode(encoding='UTF-8', errors='strict'), table_id)
  68.  
  69.         return self.stats_url.encode(encoding='UTF-8', errors='strict')
  70.  
  71.     def get_table_contents(self, match_data):
  72.         """ find table content
  73.        """
  74.         self.home_name = match_data[2]
  75.         self.away_name = match_data[3]
  76.         self.stats_url = match_data[4]
  77.         current_table = list()
  78.  
  79.         response = requests.get(self.stats_url, cookies=browser_cookie3.load())
  80.         if response.ok:
  81.             tree = lh.fromstring(response.text)
  82.             table_index = Table_index_find(tree).index
  83.             self.league_name = tree.xpath('//*[@id="content"]/div[1]/div[1]/table/tr/td[3]/font')[0].text
  84.             current_table.append(self.league_name)
  85.             for table_id in range(6):
  86.                 table_xpath = '//*[@id="container"]/div[5]/table[1]/tr[1]/td/table[' + table_index + ']/tr/td[' + \
  87.                               str(table_id+1) + ']/table'
  88.                 table = tree.xpath(table_xpath)
  89.                 home_data_list = None
  90.                 away_data_list = None
  91.  
  92.                 for i in range(2, len(table[0])):
  93.                     table_children = table[0][i].getchildren()
  94.                     team = table_children[1].text.strip()
  95.                     gp = int(table_children[2][0].text)
  96.                     pts = int(table_children[3][0].text.replace('\r\n', ''))
  97.  
  98.                     if team == self.home_name:
  99.                         home_data_list = [i-1, gp, pts]
  100.                     if team == self.away_name:
  101.                         away_data_list = [i-1, gp, pts]
  102.                 current_table.append((home_data_list, away_data_list))
  103.         return current_table
  104.  
  105.     def sql_upgrade(self):
  106.         """ main table upgrade
  107.        """
  108.         connection = pymysql.connect(host="192.168.2.74", port=3306, user="soccer", passwd="stat",
  109.                                      database="soccerstat")
  110.         cursor = connection.cursor()
  111.         # insert data
  112.         fields = "INSERT INTO main(I_Date, I_Time, Number_of_Matches, Matches_with_Stat) VALUES (%s, %s, %s, %s)"
  113.         values = (self.inquiry_day, self.inquiry_time, self.total_number_of_matches, self.number_of_matches_with_stat)
  114.         cursor.execute(fields, values)
  115.         connection.commit()
  116.         self.main_id = cursor.lastrowid
  117.  
  118.         for match in self.matches:
  119.             data = self.get_table_contents(match)
  120.             if len(data) > 0:
  121.                 fields = "INSERT INTO Matches(Main_ID, Start_Time, National, League, Home, Away) " \
  122.                          "VALUES (%s, %s, %s, %s, %s, %s)"
  123.                 values = (self.main_id, match[0], match[1], data[0], match[2], match[3])
  124.                 cursor.execute(fields, values)
  125.                 connection.commit()
  126.                 match_id = cursor.lastrowid
  127.                 # print(match_id)
  128.  
  129.                 # ide jön még a tábla feltöltés
  130.                 fields = "INSERT INTO tables(Match_ID, Main_ID, H_Le_Ra, H_Le_Gp, H_Le_Pt, H_La_Ra, H_La_Gp, H_La_Pt, " \
  131.                          "H_Ho_Ra, H_Ho_Gp, H_Ho_Pt, H_Aw_Ra, H_Aw_Gp, H_Aw_Pt, H_Of_Ra, H_Of_Gp, H_Of_Gf, H_De_Ra," \
  132.                          " H_De_Gp, H_De_Ga, A_Le_Ra, A_Le_Gp, A_Le_Pt, A_La_Ra, A_La_Gp, A_La_Pt, A_Ho_Ra, A_Ho_Gp," \
  133.                          " A_Ho_Pt, A_Aw_Ra, A_Aw_Gp, A_Aw_Pt, A_Of_Ra, A_Of_Gp, A_Of_Gf, A_De_Ra, A_De_Gp, A_De_Ga)" \
  134.                          " VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s," \
  135.                          " %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
  136.                 values = list()
  137.                 values.append(match_id)
  138.                 values.append(self.main_id)
  139.                 for i in data[1:]:
  140.                     for j in i:
  141.                         for k in j:
  142.                             values.append(k)
  143.                 cursor.execute(fields, values)
  144.                 connection.commit()
  145.         connection.close()
  146.  
  147.  
  148. class Table_index_find():
  149.     """ table index
  150.    """
  151.     def __init__(self, tree):
  152.         self.tree = tree
  153.         path = '//*[@id="container"]/div[5]/table[1]/tr[1]/td'
  154.         tables = tree.xpath(path)[0]
  155.         label = tables[0].getchildren()[0].getchildren()[0].getchildren()[0].text
  156.         if label == 'Tables':
  157.             self.index = '2'
  158.         else:
  159.             self.index = '1'
  160.  
  161.     def __str__(self):
  162.         return self.index
  163.  
  164.  
  165. if __name__ == '__main__':
  166.     soccer = SoccerStatsApi("http://www.soccerstats.com/matches.asp?matchday=6")
  167.     soccer.page_load()
  168.     soccer.get_matches_list()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement