Advertisement
Guest User

Untitled

a guest
Nov 17th, 2019
115
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.92 KB | None | 0 0
  1.  
  2.  
  3. class ReplayData:
  4. """extracts data from a single VGC replay"""
  5.  
  6. def __init__(self, replay_log, filename):
  7. self.file = replay_log
  8. self.filename = filename
  9. self.p1_wins = False
  10. self.p2_wins = False
  11. self.p1 = ""
  12. self.p2 = ""
  13. self.p1_pokemon = []
  14. self.p2_pokemon = []
  15. self.p1_teamsize = 0
  16. self.p2_teamsize = 0
  17. self.rated_battle = False
  18. self.p1_rating = 0
  19. self.p2_rating = 0
  20. self.correct_format = False
  21.  
  22. # reads a replay log and extracts data
  23. def read_data(self):
  24. # read file line by line
  25. for line in self.file:
  26. line = line.rstrip('\n')
  27.  
  28. # check if the line is a chat message
  29. if "|c|" in line:
  30. continue
  31.  
  32. # checks if the format is correct
  33. if "|tier|[Gen 7] VGC 2019 Ultra Series" in line:
  34. self.correct_format = True
  35.  
  36. # extract player names
  37. if "|j|☆" in line:
  38. line = line.replace('|j|☆','')
  39. if not self.p1:
  40. self.p1 = line
  41. else:
  42. self.p2 = line
  43.  
  44. # extract Pokemon
  45. if "|poke|p1|" in line or "|poke|p2|" in line:
  46. mon = ''
  47. index = 0
  48. while line[index] is not ',':
  49. mon += line[index]
  50. index += 1
  51. if "|poke|p1|" in mon:
  52. mon = mon.replace('|poke|p1|','')
  53. self.p1_pokemon.append(mon)
  54. if "|poke|p2|" in mon:
  55. mon = mon.replace('|poke|p2|','')
  56. self.p2_pokemon.append(mon)
  57.  
  58. # extract winner
  59. if "|win|" in line:
  60. line = line.replace('|win|','')
  61. if line == self.p1:
  62. self.p1_wins = True
  63. if line == self.p2:
  64. self.p2_wins = True
  65.  
  66. # check if it's a rated battle
  67. if "|rated|" == line:
  68. self.rated_battle = True
  69. if "score could not be retrieved" in line:
  70. self.rated_battle = False
  71.  
  72. # extract rating
  73. if "|raw|" in line:
  74. line = line.replace('|raw|','')
  75. if self.p1 in line and line.lstrip(self.p1)[:11] == "'s rating: ":
  76. line = line.replace(self.p1,'')
  77. line = line.replace("'s rating: ",'')
  78. self.p1_rating = int(line[:4])
  79. if self.p2 in line and line.lstrip(self.p2)[:11] == "'s rating: ":
  80. line = line.replace(self.p2,'')
  81. line = line.replace("'s rating: ",'')
  82. self.p2_rating = int(line[:4])
  83.  
  84.  
  85. # stores the correct data in a list
  86. def store_data(self):
  87. if self.p1_rating < 1000 or self.p2_rating < 1000:
  88. self.rated_battle = False
  89. if len(self.p1_pokemon) == 6 and len(self.p2_pokemon) == 6:
  90. if self.correct_format:
  91. data = []
  92. data.append(self.filename)
  93.  
  94. for mon in self.p1_pokemon:
  95. data.append(mon)
  96. for mon in self.p2_pokemon:
  97. data.append(mon)
  98.  
  99. if self.rated_battle:
  100. data.append(self.p1_rating)
  101. data.append(self.p2_rating)
  102. else:
  103. data.append(None)
  104. data.append(None)
  105.  
  106. if self.p1_wins:
  107. data.append("win")
  108. else:
  109. data.append("loss")
  110.  
  111. return data
  112.  
  113. print("invalid replay:", self.filename)
  114. return []
  115.  
  116. ___________________________________________________________
  117.  
  118. import ReplayDataReader
  119. import os
  120. from urllib.request import Request, urlopen
  121. import io
  122. import xlsxwriter
  123.  
  124. class ReplayReader9000:
  125. """reads a list of VGC replays"""
  126.  
  127. def __init__(self, replays):
  128. self.replays = replays
  129. self.data = []
  130. self.replay_logs = []
  131.  
  132. # opens vgc replay urls and writes the log in txt files
  133. def write(self):
  134. index = 1
  135. for replay in self.replays:
  136. if '\n' in replay:
  137. replay = replay.rstrip('\n')
  138. replay = replay + ".log"
  139.  
  140. req = Request(replay, headers={'User-Agent': 'Mozilla/5.0'})
  141. replay_log = urlopen(req).read()
  142. path_name = "C:/Users/RoelH/PycharmProjects/DataMiningPokemonVGC/VGCreplays/"
  143. file_name = os.path.join(path_name, "vgc replay" + str(index) +".txt")
  144.  
  145. file = io.open(file_name, "w", encoding="utf-8")
  146. replay_log = ''.join(chr(i) for i in replay_log)
  147. file.write(replay_log)
  148. file.close()
  149.  
  150. index += 1
  151.  
  152. # reads the log txt files
  153. def read(self):
  154. for filename in os.listdir("C:/Users/RoelH/PycharmProjects/DataMiningPokemonVGC/VGCreplays/"):
  155. if filename.endswith(".txt"):
  156. replay_log = open("VGCreplays/" + filename, "rt")
  157. replayData = ReplayDataReader.ReplayData(replay_log, filename)
  158. replayData.read_data()
  159. replay_data = replayData.store_data()
  160. self.data.append(replay_data)
  161.  
  162.  
  163. # open the replay replay urls to readable text
  164. def open_replays(self):
  165. for replay in self.replays:
  166. if '\n' in replay:
  167. replay = replay.rstrip('\n')
  168. replay = replay + ".log"
  169. req = Request(replay, headers={'User-Agent': 'Mozilla/5.0'})
  170. raw_replay_log = urlopen(req).read()
  171. raw_replay_log = ''.join(chr(i) for i in raw_replay_log)
  172.  
  173. replay_log = []
  174. log = ""
  175. for char in raw_replay_log:
  176. log = log + char
  177. if char == '\n':
  178. replay_log.append(log)
  179. log = ""
  180. self.replay_logs.append(replay_log)
  181.  
  182.  
  183. # reads the replay data
  184. def read_replays(self):
  185. index = 1
  186. for replay_log in self.replay_logs:
  187. replayData = ReplayDataReader.ReplayData(replay_log, "replay" + str(index))
  188. replayData.read_data()
  189. replay_data = replayData.store_data()
  190. self.data.append(replay_data)
  191. if replay_data:
  192. index += 1
  193.  
  194.  
  195. # writes the dataset in an excel file
  196. def write_excel(self):
  197. workbook = xlsxwriter.Workbook("VGC_datamining_project.csv")
  198. file = workbook.add_worksheet()
  199.  
  200. # write headers
  201. columns = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]
  202. file.write("A1", "replays")
  203. file.write("B1", "p1 pokemon")
  204. file.write("C1", "p1 pokemon")
  205. file.write("D1", "p1 pokemon")
  206. file.write("E1", "p1 pokemon")
  207. file.write("F1", "p1 pokemon")
  208. file.write("G1", "p1 pokemon")
  209. file.write("H1", "p2 pokemon")
  210. file.write("I1", "p2 pokemon")
  211. file.write("J1", "p2 pokemon")
  212. file.write("K1", "p2 pokemon")
  213. file.write("L1", "p2 pokemon")
  214. file.write("M1", "p2 pokemon")
  215. file.write("N1", "p1 rating")
  216. file.write("O1", "p2 rating")
  217. file.write("P1", "p1 result")
  218.  
  219. # write data to excel
  220. row = 2
  221. for replay_data in self.data:
  222. index = 0
  223. if replay_data:
  224. for column in columns:
  225. file.write(column + str(row), replay_data[index])
  226. index += 1
  227. row += 1
  228. workbook.close()
  229.  
  230.  
  231. def main():
  232. replays = open("Replays.txt", "rt")
  233. program = ReplayReader9000(replays)
  234. program.open_replays()
  235. program.read_replays()
  236. program.write_excel()
  237.  
  238.  
  239. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement