Advertisement
zcbz

R Stats scrapping ut99 CTF - (Not working anymore)

Apr 12th, 2023 (edited)
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 4.95 KB | Gaming | 0 0
  1. library(rvest)
  2. library("jsonlite")
  3.  
  4. #helper example
  5. ##https://www.r-bloggers.com/2015/01/using-rvest-to-scrape-an-html-table/
  6.  
  7. gc()
  8.  
  9. ##
  10. match_url <- read_html("https://stats.utctfpug.com/match/2585")
  11.  
  12. #example of main commands for exploration
  13. #trying to fetch all data - this file is key, has everything but no clue how to get
  14. bb = html_elements(match_url,xpath='//*[@id="__NEXT_DATA__"]') %>% html_text() #%>% jsonlite::toJSON()  %>% fromJSON() %>% as.data.frame
  15.  
  16. bb1 <- gsub('\\\\', "", bb)
  17.  
  18. bb.json <- fromJSON(bb)
  19. bb.json.PlayerData <- fromJSON(bb.json$props$pageProps$playerData)
  20. bb.json.Weapondata <- fromJSON(bb.json$props$pageProps$weaponData)
  21. bb.json.headshotdata <- fromJSON(bb.json$props$pageProps$headshotData)
  22.  
  23.  
  24. #scrapping loop
  25. init_val = 2859
  26. last_val = 703
  27.  
  28. #end_val = 703 #limit until more matches seem to not exist
  29.  
  30. #to iterate use this
  31. number_seq <- c(init_val:last_val)
  32. #to fetch specific games:
  33. #number_seq <- c(gamenumber, gamenumber etc) https://stats.utctfpug.com/match/2878 (matchid is the last number on this link)
  34.  
  35.  
  36. for (i in number_seq){
  37.  
  38.   tmp_str <- paste("https://stats.utctfpug.com/match/",i,sep="")
  39.   print(paste("Fetching:",tmp_str))
  40.  
  41.   #using try catch allows loop to finish and to skip error(s) (erros come from different game modes like DM, players logged in wrong teams, missing players, missing reports)
  42.   tryCatch({
  43.    
  44.     match_url <- read_html(tmp_str)
  45.    
  46.     fetch_json = html_elements(match_url,xpath='//*[@id="__NEXT_DATA__"]') %>% html_text()
  47.     dump_json <- fromJSON(fetch_json)
  48.    
  49.     #this no servers only as a verification for the "if" - no longer used to get the data - it serves to verify if teams have 5 on each side
  50.     red_team_data <- match_url%>%
  51.       html_nodes("table") %>%
  52.       .[c(1,3)] %>%
  53.       html_table(fill = TRUE)
  54.    
  55.     blu_team_data <- match_url%>%
  56.       html_nodes("table") %>%
  57.       .[c(2,4)] %>%
  58.       html_table(fill = TRUE)
  59.    
  60.     team_red_actions <- red_team_data[[1]]
  61.     team_red_general <- red_team_data[[2]]
  62.     team_blu_actions <- blu_team_data[[1]]
  63.     team_blu_general <- blu_team_data[[2]]
  64.    
  65.    
  66.     if ((nrow(team_red_actions)==6) & (nrow(team_blu_actions)==6) & (nrow(team_red_general)==6) & (nrow(team_blu_actions)==6)){
  67.      
  68.      
  69.      
  70.      
  71.       #collecting the info from the website
  72.       player_table_tmp <- fromJSON(dump_json$props$pageProps$playerData)
  73.       weapon_table_tmp <- fromJSON(dump_json$props$pageProps$weaponData)
  74.      
  75.       #another check up if there is an extra player on a team or not
  76.       #if (length(player_table_tmp$name))
  77.      
  78.      
  79.       #removing spectators
  80.       player_table_tmp2 <- player_table_tmp[player_table_tmp$team %in% c(0,1),]
  81.      
  82.       #table of players/id
  83.       playerIDtable <- player_table_tmp2[,c("player_id","name")]
  84.      
  85.       #adding the nmes of the weapons to the table
  86.       weapon_table_tmp_comb <- merge(x=weapon_table_tmp$playerData,y=weapon_table_tmp$names,
  87.                                      all.y=TRUE,
  88.                                      by.x="weapon_id",
  89.                                      by.y = "id")
  90.      
  91.       #adding the player names to the weapon table
  92.       weapon_table_tmp_comb <- merge(x=weapon_table_tmp_comb,y=playerIDtable,
  93.                                      all.y=TRUE,
  94.                                      by.x="player_id",
  95.                                      by.y = "player_id")
  96.      
  97.       #adding general information
  98.       gen_info <- html_nodes(match_url,".white") %>% html_text()
  99.       gen_time <- html_nodes(match_url,".MatchSummary_small__LeJEu") %>% html_text()
  100.      
  101.       #to player general data
  102.       player_table_tmp2$game_type        <- gen_info[1]
  103.       player_table_tmp2$map_name         <- gen_info[2]
  104.       player_table_tmp2$server_info      <- gen_time[1]
  105.       player_table_tmp2$match_time_date  <- gen_time[2]
  106.       player_table_tmp2$match_url        <- tmp_str
  107.      
  108.      
  109.       #to weapon table
  110.       weapon_table_tmp_comb$map_name        <- gen_info[2]
  111.       weapon_table_tmp_comb$match_time_date <- gen_time[2]
  112.       weapon_table_tmp_comb$match_id        <- i
  113.       weapon_table_tmp_comb$match_url       <- tmp_str
  114.      
  115.      
  116.       #first iteration creates an iniitial table
  117.       if (i == init_val){
  118.         print("started table")
  119.         out_playerdata <- player_table_tmp2
  120.         out_weapondata <- weapon_table_tmp_comb
  121.        
  122.        
  123.       } else {
  124.         print("appending to tables")
  125.        
  126.         out_playerdata <- rbind(out_playerdata,player_table_tmp2)
  127.         out_weapondata <- rbind(out_weapondata,weapon_table_tmp_comb )
  128.        
  129.        
  130.       }
  131.      
  132.     }
  133.    
  134.   }, error=function(e){
  135.     #number_seq_temp <- head(number_seq_temp,-1)
  136.     cat("ERROR :",conditionMessage(e),tmp_str, "\n")})
  137.  
  138.  
  139. }
  140.  
  141. write.csv2(out_playerdata_sel,"<path2folder>/<path2folder_filename>.csv",row.names = F)
  142. write.csv2(out_weapondata,"<path2folder>/<path2folder_filename>.csv",row.names = F)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement