Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## Clear environment
- rm(list = ls())
- ## Load packages
- library(dplyr)
- ## Use https://konklone.io/json/ to convert any individual jsonlines file into a CSV.
- ## All raw CSVs are available @
- ## https://drive.google.com/a/mst.edu/file/d/0B6wt0HET6pITVHJSZFo3YTNMWWc/view?usp=sharing
- ## Read CSV into R
- ARMA3_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\ARMA_3.csv", header = TRUE, sep = ",")
- CS_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\Counter_Strike.csv", header = TRUE, sep = ",")
- CSGO_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\Counter_Strike_GO.csv", header = TRUE, sep = ",")
- DOTA2_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\Dota_2.csv", header = TRUE, sep = ",")
- TF2_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\TF2.csv", header = TRUE, sep = ",")
- WarF_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\Warframe.csv", header = TRUE, sep = ",")
- ## Add "paid/free" column to each individual dataset
- ## This will help us compare later once the datasets are joined together.
- ARMA3_DF$Monetization <- c("Paid")
- CS_DF$Monetization <- c("Paid")
- CSGO_DF$Monetization <- c("Paid")
- DOTA2_DF$Monetization <- c("Free")
- TF2_DF$Monetization <- c("Free")
- WarF_DF$Monetization <- c("Paid")
- ## Add "game" column to each individual dataset
- ## This will help us compare later once the datasets are joined together.
- ARMA3_DF$Game_Name <- c("ARMA 3")
- CS_DF$Game_Name <- c("Counterstrike")
- CSGO_DF$Game_Name <- c("Counterstrike: Global Offensive")
- DOTA2_DF$Game_Name <- c("DOTA 2")
- TF2_DF$Game_Name <- c("Team Fortress 2")
- WarF_DF$Game_Name <- c("Warframe")
- ## Add "genre" column to each individual dataset
- ## This will help us compare later once the datasets are joined together.
- ARMA3_DF$Genre <- c("Shooter")
- CS_DF$Genre <- c("Shooter")
- CSGO_DF$Genre <- c("Shooter")
- DOTA2_DF$Genre <- c("MOBA")
- TF2_DF$Genre <- c("Shooter")
- WarF_DF$Genre <- c("MMORPG")
- ## Time to merge the dataframes
- Review_Data <- rbind(ARMA3_DF, CS_DF, CSGO_DF, DOTA2_DF, TF2_DF, WarF_DF)
- ## We now remove columns that aren't necessary for our functions
- Review_Data <- Review_Data %>% select(-c(num_groups,orig_url,num_badges,review_url,num_found_funny,date_updated,
- num_workshop_items,date_posted,found_helpful_percentage,num_voted_helpfulness,
- achievement_progress.num_achievements_percentage,achievement_progress.num_achievements_attained,
- achievement_progress.num_achievements_possible,profile_url,num_found_helpful,steam_id_number,
- friend_player_level,num_found_unhelpful,username,num_guides,num_comments,num_reviews,num_games_owned,num_screenshots))
- ## Rename columns for clarity
- Review_Data <- Review_Data %>% rename(Recent_Played_Time = total_game_hours_last_two_weeks,Review = review
- ,Total_Played_Time = total_game_hours,Rating = rating,Friends = num_friends)
- ## Replace NULL values with 0s for clarity/comparison
- Review_Data <- Review_Data %>% mutate(Friends = ifelse(is.na(Friends),0,Friends))
- ## Remaining factors are converted to characters
- Review_Data <- rapply(Review_Data, as.character, classes="factor", how="replace")
- ## Add new column "Review_Char_Count" to track how many characters were in a review
- Review_Data$Review_Char_Count <- c(nchar(Review_Data$Review, allowNA = TRUE))
- ## Export data as CSV
- write.csv(Review_Data, file = "ReviewData_v4.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement