Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/ python
- # -*- coding: UTF-8 -*-
- import requests
- import browser_cookie3
- from bs4 import BeautifulSoup
- import lxml.html as lh
- from datetime import datetime
- import pymysql
- class SoccerStatsApi:
- def __init__(self, home_url):
- """ api initialization
- """
- self.home_url = home_url
- self.g_data = list()
- self.page_data = None
- self.matches = list()
- self.league_name = None
- self.home_name = None
- self.away_name = None
- self.stats_url = None
- self.tree = None
- self.table_id = None
- self.inquiry_day = datetime.now().strftime("%Y-%m-%d")
- self.inquiry_time = datetime.now().strftime("%H:%M:%S")
- self.total_number_of_matches = None
- self.number_of_matches_with_stat = None
- self.main_id = None
- def page_load(self):
- r = requests.get(self.home_url, cookies=browser_cookie3.load())
- self.g_data = BeautifulSoup(r.content, 'lxml').find_all("tr", {"class": "trow8"})
- def get_matches_list(self):
- """ upload the current daily match list
- """
- self.matches.clear()
- self.total_number_of_matches = len(self.g_data)
- for item in self.g_data:
- stat_link = SoccerStatsApi.get_stats_link(self, item)
- if stat_link != b'':
- current_match = list()
- current_match.append(item.contents[0].contents[2].text.strip()) # 0 - start
- current_match.append(item.contents[2].text) # 1 - country
- current_match.append(item.contents[3].text.strip()) # 2 - home_name
- current_match.append(item.contents[6].text.strip()) # 3 - away_name
- current_match.append(stat_link) # 4 - stats link
- try:
- self.matches.append(current_match)
- except:
- """ continue in case of failure """
- pass
- self.number_of_matches_with_stat = len(self.matches)
- return self.matches
- def get_stats_link(self, act_data):
- self.stats_url = ''
- field_list = (7, 8, 9)
- for field in field_list:
- data = act_data.contents[field]
- if len(data) != 0:
- if data.contents[0].text == 'stats':
- self.stats_url = 'http://www.soccerstats.com/' + data.contents[0].attrs['href']
- # result = table_content(stat_url.encode(encoding='UTF-8', errors='strict'), table_id)
- return self.stats_url.encode(encoding='UTF-8', errors='strict')
- def get_table_contents(self, match_data):
- """ find table content
- """
- self.home_name = match_data[2]
- self.away_name = match_data[3]
- self.stats_url = match_data[4]
- current_table = list()
- response = requests.get(self.stats_url, cookies=browser_cookie3.load())
- if response.ok:
- tree = lh.fromstring(response.text)
- table_index = Table_index_find(tree).index
- self.league_name = tree.xpath('//*[@id="content"]/div[1]/div[1]/table/tr/td[3]/font')[0].text
- current_table.append(self.league_name)
- for table_id in range(6):
- table_xpath = '//*[@id="container"]/div[5]/table[1]/tr[1]/td/table[' + table_index + ']/tr/td[' + \
- str(table_id+1) + ']/table'
- table = tree.xpath(table_xpath)
- home_data_list = None
- away_data_list = None
- for i in range(2, len(table[0])):
- table_children = table[0][i].getchildren()
- team = table_children[1].text.strip()
- gp = int(table_children[2][0].text)
- pts = int(table_children[3][0].text.replace('\r\n', ''))
- if team == self.home_name:
- home_data_list = [i-1, gp, pts]
- if team == self.away_name:
- away_data_list = [i-1, gp, pts]
- current_table.append((home_data_list, away_data_list))
- return current_table
- def sql_upgrade(self):
- """ main table upgrade
- """
- connection = pymysql.connect(host="192.168.2.74", port=3306, user="soccer", passwd="stat",
- database="soccerstat")
- cursor = connection.cursor()
- # insert data
- fields = "INSERT INTO main(I_Date, I_Time, Number_of_Matches, Matches_with_Stat) VALUES (%s, %s, %s, %s)"
- values = (self.inquiry_day, self.inquiry_time, self.total_number_of_matches, self.number_of_matches_with_stat)
- cursor.execute(fields, values)
- connection.commit()
- self.main_id = cursor.lastrowid
- for match in self.matches:
- data = self.get_table_contents(match)
- if len(data) > 0:
- fields = "INSERT INTO Matches(Main_ID, Start_Time, National, League, Home, Away) " \
- "VALUES (%s, %s, %s, %s, %s, %s)"
- values = (self.main_id, match[0], match[1], data[0], match[2], match[3])
- cursor.execute(fields, values)
- connection.commit()
- match_id = cursor.lastrowid
- # print(match_id)
- # ide jön még a tábla feltöltés
- fields = "INSERT INTO tables(Match_ID, Main_ID, H_Le_Ra, H_Le_Gp, H_Le_Pt, H_La_Ra, H_La_Gp, H_La_Pt, " \
- "H_Ho_Ra, H_Ho_Gp, H_Ho_Pt, H_Aw_Ra, H_Aw_Gp, H_Aw_Pt, H_Of_Ra, H_Of_Gp, H_Of_Gf, H_De_Ra," \
- " H_De_Gp, H_De_Ga, A_Le_Ra, A_Le_Gp, A_Le_Pt, A_La_Ra, A_La_Gp, A_La_Pt, A_Ho_Ra, A_Ho_Gp," \
- " A_Ho_Pt, A_Aw_Ra, A_Aw_Gp, A_Aw_Pt, A_Of_Ra, A_Of_Gp, A_Of_Gf, A_De_Ra, A_De_Gp, A_De_Ga)" \
- " VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s," \
- " %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
- values = list()
- values.append(match_id)
- values.append(self.main_id)
- for i in data[1:]:
- for j in i:
- for k in j:
- values.append(k)
- cursor.execute(fields, values)
- connection.commit()
- connection.close()
- class Table_index_find():
- """ table index
- """
- def __init__(self, tree):
- self.tree = tree
- path = '//*[@id="container"]/div[5]/table[1]/tr[1]/td'
- tables = tree.xpath(path)[0]
- label = tables[0].getchildren()[0].getchildren()[0].getchildren()[0].text
- if label == 'Tables':
- self.index = '2'
- else:
- self.index = '1'
- def __str__(self):
- return self.index
- if __name__ == '__main__':
- soccer = SoccerStatsApi("http://www.soccerstats.com/matches.asp?matchday=6")
- soccer.page_load()
- soccer.get_matches_list()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement