Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(rvest)
- library("jsonlite")
- #helper example
- ##https://www.r-bloggers.com/2015/01/using-rvest-to-scrape-an-html-table/
- gc()
- ##
- match_url <- read_html("https://stats.utctfpug.com/match/2585")
- #example of main commands for exploration
- #trying to fetch all data - this file is key, has everything but no clue how to get
- bb = html_elements(match_url,xpath='//*[@id="__NEXT_DATA__"]') %>% html_text() #%>% jsonlite::toJSON() %>% fromJSON() %>% as.data.frame
- bb1 <- gsub('\\\\', "", bb)
- bb.json <- fromJSON(bb)
- bb.json.PlayerData <- fromJSON(bb.json$props$pageProps$playerData)
- bb.json.Weapondata <- fromJSON(bb.json$props$pageProps$weaponData)
- bb.json.headshotdata <- fromJSON(bb.json$props$pageProps$headshotData)
- #scrapping loop
- init_val = 2859
- last_val = 703
- #end_val = 703 #limit until more matches seem to not exist
- #to iterate use this
- number_seq <- c(init_val:last_val)
- #to fetch specific games:
- #number_seq <- c(gamenumber, gamenumber etc) https://stats.utctfpug.com/match/2878 (matchid is the last number on this link)
- for (i in number_seq){
- tmp_str <- paste("https://stats.utctfpug.com/match/",i,sep="")
- print(paste("Fetching:",tmp_str))
- #using try catch allows loop to finish and to skip error(s) (erros come from different game modes like DM, players logged in wrong teams, missing players, missing reports)
- tryCatch({
- match_url <- read_html(tmp_str)
- fetch_json = html_elements(match_url,xpath='//*[@id="__NEXT_DATA__"]') %>% html_text()
- dump_json <- fromJSON(fetch_json)
- #this no servers only as a verification for the "if" - no longer used to get the data - it serves to verify if teams have 5 on each side
- red_team_data <- match_url%>%
- html_nodes("table") %>%
- .[c(1,3)] %>%
- html_table(fill = TRUE)
- blu_team_data <- match_url%>%
- html_nodes("table") %>%
- .[c(2,4)] %>%
- html_table(fill = TRUE)
- team_red_actions <- red_team_data[[1]]
- team_red_general <- red_team_data[[2]]
- team_blu_actions <- blu_team_data[[1]]
- team_blu_general <- blu_team_data[[2]]
- if ((nrow(team_red_actions)==6) & (nrow(team_blu_actions)==6) & (nrow(team_red_general)==6) & (nrow(team_blu_actions)==6)){
- #collecting the info from the website
- player_table_tmp <- fromJSON(dump_json$props$pageProps$playerData)
- weapon_table_tmp <- fromJSON(dump_json$props$pageProps$weaponData)
- #another check up if there is an extra player on a team or not
- #if (length(player_table_tmp$name))
- #removing spectators
- player_table_tmp2 <- player_table_tmp[player_table_tmp$team %in% c(0,1),]
- #table of players/id
- playerIDtable <- player_table_tmp2[,c("player_id","name")]
- #adding the nmes of the weapons to the table
- weapon_table_tmp_comb <- merge(x=weapon_table_tmp$playerData,y=weapon_table_tmp$names,
- all.y=TRUE,
- by.x="weapon_id",
- by.y = "id")
- #adding the player names to the weapon table
- weapon_table_tmp_comb <- merge(x=weapon_table_tmp_comb,y=playerIDtable,
- all.y=TRUE,
- by.x="player_id",
- by.y = "player_id")
- #adding general information
- gen_info <- html_nodes(match_url,".white") %>% html_text()
- gen_time <- html_nodes(match_url,".MatchSummary_small__LeJEu") %>% html_text()
- #to player general data
- player_table_tmp2$game_type <- gen_info[1]
- player_table_tmp2$map_name <- gen_info[2]
- player_table_tmp2$server_info <- gen_time[1]
- player_table_tmp2$match_time_date <- gen_time[2]
- player_table_tmp2$match_url <- tmp_str
- #to weapon table
- weapon_table_tmp_comb$map_name <- gen_info[2]
- weapon_table_tmp_comb$match_time_date <- gen_time[2]
- weapon_table_tmp_comb$match_id <- i
- weapon_table_tmp_comb$match_url <- tmp_str
- #first iteration creates an iniitial table
- if (i == init_val){
- print("started table")
- out_playerdata <- player_table_tmp2
- out_weapondata <- weapon_table_tmp_comb
- } else {
- print("appending to tables")
- out_playerdata <- rbind(out_playerdata,player_table_tmp2)
- out_weapondata <- rbind(out_weapondata,weapon_table_tmp_comb )
- }
- }
- }, error=function(e){
- #number_seq_temp <- head(number_seq_temp,-1)
- cat("ERROR :",conditionMessage(e),tmp_str, "\n")})
- }
- write.csv2(out_playerdata_sel,"<path2folder>/<path2folder_filename>.csv",row.names = F)
- write.csv2(out_weapondata,"<path2folder>/<path2folder_filename>.csv",row.names = F)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement