Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class ReplayData:
- """extracts data from a single VGC replay"""
- def __init__(self, replay_log, filename):
- self.file = replay_log
- self.filename = filename
- self.p1_wins = False
- self.p2_wins = False
- self.p1 = ""
- self.p2 = ""
- self.p1_pokemon = []
- self.p2_pokemon = []
- self.p1_teamsize = 0
- self.p2_teamsize = 0
- self.rated_battle = False
- self.p1_rating = 0
- self.p2_rating = 0
- self.correct_format = False
- # reads a replay log and extracts data
- def read_data(self):
- # read file line by line
- for line in self.file:
- line = line.rstrip('\n')
- # check if the line is a chat message
- if "|c|" in line:
- continue
- # checks if the format is correct
- if "|tier|[Gen 7] VGC 2019 Ultra Series" in line:
- self.correct_format = True
- # extract player names
- if "|j|â" in line:
- line = line.replace('|j|â','')
- if not self.p1:
- self.p1 = line
- else:
- self.p2 = line
- # extract Pokemon
- if "|poke|p1|" in line or "|poke|p2|" in line:
- mon = ''
- index = 0
- while line[index] is not ',':
- mon += line[index]
- index += 1
- if "|poke|p1|" in mon:
- mon = mon.replace('|poke|p1|','')
- self.p1_pokemon.append(mon)
- if "|poke|p2|" in mon:
- mon = mon.replace('|poke|p2|','')
- self.p2_pokemon.append(mon)
- # extract winner
- if "|win|" in line:
- line = line.replace('|win|','')
- if line == self.p1:
- self.p1_wins = True
- if line == self.p2:
- self.p2_wins = True
- # check if it's a rated battle
- if "|rated|" == line:
- self.rated_battle = True
- if "score could not be retrieved" in line:
- self.rated_battle = False
- # extract rating
- if "|raw|" in line:
- line = line.replace('|raw|','')
- if self.p1 in line and line.lstrip(self.p1)[:11] == "'s rating: ":
- line = line.replace(self.p1,'')
- line = line.replace("'s rating: ",'')
- self.p1_rating = int(line[:4])
- if self.p2 in line and line.lstrip(self.p2)[:11] == "'s rating: ":
- line = line.replace(self.p2,'')
- line = line.replace("'s rating: ",'')
- self.p2_rating = int(line[:4])
- # stores the correct data in a list
- def store_data(self):
- if self.p1_rating < 1000 or self.p2_rating < 1000:
- self.rated_battle = False
- if len(self.p1_pokemon) == 6 and len(self.p2_pokemon) == 6:
- if self.correct_format:
- data = []
- data.append(self.filename)
- for mon in self.p1_pokemon:
- data.append(mon)
- for mon in self.p2_pokemon:
- data.append(mon)
- if self.rated_battle:
- data.append(self.p1_rating)
- data.append(self.p2_rating)
- else:
- data.append(None)
- data.append(None)
- if self.p1_wins:
- data.append("win")
- else:
- data.append("loss")
- return data
- print("invalid replay:", self.filename)
- return []
- ___________________________________________________________
- import ReplayDataReader
- import os
- from urllib.request import Request, urlopen
- import io
- import xlsxwriter
- class ReplayReader9000:
- """reads a list of VGC replays"""
- def __init__(self, replays):
- self.replays = replays
- self.data = []
- self.replay_logs = []
- # opens vgc replay urls and writes the log in txt files
- def write(self):
- index = 1
- for replay in self.replays:
- if '\n' in replay:
- replay = replay.rstrip('\n')
- replay = replay + ".log"
- req = Request(replay, headers={'User-Agent': 'Mozilla/5.0'})
- replay_log = urlopen(req).read()
- path_name = "C:/Users/RoelH/PycharmProjects/DataMiningPokemonVGC/VGCreplays/"
- file_name = os.path.join(path_name, "vgc replay" + str(index) +".txt")
- file = io.open(file_name, "w", encoding="utf-8")
- replay_log = ''.join(chr(i) for i in replay_log)
- file.write(replay_log)
- file.close()
- index += 1
- # reads the log txt files
- def read(self):
- for filename in os.listdir("C:/Users/RoelH/PycharmProjects/DataMiningPokemonVGC/VGCreplays/"):
- if filename.endswith(".txt"):
- replay_log = open("VGCreplays/" + filename, "rt")
- replayData = ReplayDataReader.ReplayData(replay_log, filename)
- replayData.read_data()
- replay_data = replayData.store_data()
- self.data.append(replay_data)
- # open the replay replay urls to readable text
- def open_replays(self):
- for replay in self.replays:
- if '\n' in replay:
- replay = replay.rstrip('\n')
- replay = replay + ".log"
- req = Request(replay, headers={'User-Agent': 'Mozilla/5.0'})
- raw_replay_log = urlopen(req).read()
- raw_replay_log = ''.join(chr(i) for i in raw_replay_log)
- replay_log = []
- log = ""
- for char in raw_replay_log:
- log = log + char
- if char == '\n':
- replay_log.append(log)
- log = ""
- self.replay_logs.append(replay_log)
- # reads the replay data
- def read_replays(self):
- index = 1
- for replay_log in self.replay_logs:
- replayData = ReplayDataReader.ReplayData(replay_log, "replay" + str(index))
- replayData.read_data()
- replay_data = replayData.store_data()
- self.data.append(replay_data)
- if replay_data:
- index += 1
- # writes the dataset in an excel file
- def write_excel(self):
- workbook = xlsxwriter.Workbook("VGC_datamining_project.csv")
- file = workbook.add_worksheet()
- # write headers
- columns = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]
- file.write("A1", "replays")
- file.write("B1", "p1 pokemon")
- file.write("C1", "p1 pokemon")
- file.write("D1", "p1 pokemon")
- file.write("E1", "p1 pokemon")
- file.write("F1", "p1 pokemon")
- file.write("G1", "p1 pokemon")
- file.write("H1", "p2 pokemon")
- file.write("I1", "p2 pokemon")
- file.write("J1", "p2 pokemon")
- file.write("K1", "p2 pokemon")
- file.write("L1", "p2 pokemon")
- file.write("M1", "p2 pokemon")
- file.write("N1", "p1 rating")
- file.write("O1", "p2 rating")
- file.write("P1", "p1 result")
- # write data to excel
- row = 2
- for replay_data in self.data:
- index = 0
- if replay_data:
- for column in columns:
- file.write(column + str(row), replay_data[index])
- index += 1
- row += 1
- workbook.close()
- def main():
- replays = open("Replays.txt", "rt")
- program = ReplayReader9000(replays)
- program.open_replays()
- program.read_replays()
- program.write_excel()
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement