Advertisement
Guest User

Review_Datav4

a guest
Oct 13th, 2016
171
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.57 KB | None | 0 0
  1. ## Clear environment
  2. rm(list = ls())
  3.  
  4. ## Load packages
  5. library(dplyr)
  6.  
  7. ## Use https://konklone.io/json/ to convert any individual jsonlines file into a CSV.
  8. ## All raw CSVs are available @
  9. ## https://drive.google.com/a/mst.edu/file/d/0B6wt0HET6pITVHJSZFo3YTNMWWc/view?usp=sharing
  10.  
  11. ## Read CSV into R
  12. ARMA3_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\ARMA_3.csv", header = TRUE, sep = ",")
  13. CS_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\Counter_Strike.csv", header = TRUE, sep = ",")
  14. CSGO_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\Counter_Strike_GO.csv", header = TRUE, sep = ",")
  15. DOTA2_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\Dota_2.csv", header = TRUE, sep = ",")
  16. TF2_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\TF2.csv", header = TRUE, sep = ",")
  17. WarF_DF <- read.csv("G:\\Users\\Chase\\Desktop\\IST3420Project\\Data\\Warframe.csv", header = TRUE, sep = ",")
  18.  
  19. ## Add "paid/free" column to each individual dataset
  20. ## This will help us compare later once the datasets are joined together.
  21.  
  22. ARMA3_DF$Monetization <- c("Paid")
  23. CS_DF$Monetization <- c("Paid")
  24. CSGO_DF$Monetization <- c("Paid")
  25. DOTA2_DF$Monetization <- c("Free")
  26. TF2_DF$Monetization <- c("Free")
  27. WarF_DF$Monetization <- c("Paid")
  28.  
  29. ## Add "game" column to each individual dataset
  30. ## This will help us compare later once the datasets are joined together.
  31.  
  32. ARMA3_DF$Game_Name <- c("ARMA 3")
  33. CS_DF$Game_Name <- c("Counterstrike")
  34. CSGO_DF$Game_Name <- c("Counterstrike: Global Offensive")
  35. DOTA2_DF$Game_Name <- c("DOTA 2")
  36. TF2_DF$Game_Name <- c("Team Fortress 2")
  37. WarF_DF$Game_Name <- c("Warframe")
  38.  
  39. ## Add "genre" column to each individual dataset
  40. ## This will help us compare later once the datasets are joined together.
  41.  
  42. ARMA3_DF$Genre <- c("Shooter")
  43. CS_DF$Genre <- c("Shooter")
  44. CSGO_DF$Genre <- c("Shooter")
  45. DOTA2_DF$Genre <- c("MOBA")
  46. TF2_DF$Genre <- c("Shooter")
  47. WarF_DF$Genre <- c("MMORPG")
  48.  
  49. ## Time to merge the dataframes
  50.  
  51. Review_Data <- rbind(ARMA3_DF, CS_DF, CSGO_DF, DOTA2_DF, TF2_DF, WarF_DF)
  52.  
  53. ## We now remove columns that aren't necessary for our functions
  54.  
  55. Review_Data <- Review_Data %>% select(-c(num_groups,orig_url,num_badges,review_url,num_found_funny,date_updated,
  56.                                          num_workshop_items,date_posted,found_helpful_percentage,num_voted_helpfulness,
  57.                                          achievement_progress.num_achievements_percentage,achievement_progress.num_achievements_attained,
  58.                                          achievement_progress.num_achievements_possible,profile_url,num_found_helpful,steam_id_number,
  59.                                          friend_player_level,num_found_unhelpful,username,num_guides,num_comments,num_reviews,num_games_owned,num_screenshots))
  60.  
  61. ## Rename columns for clarity
  62.  
  63. Review_Data <- Review_Data %>% rename(Recent_Played_Time = total_game_hours_last_two_weeks,Review = review
  64.                                       ,Total_Played_Time = total_game_hours,Rating = rating,Friends = num_friends)
  65.  
  66. ## Replace NULL values with 0s for clarity/comparison
  67.  
  68. Review_Data <- Review_Data %>% mutate(Friends = ifelse(is.na(Friends),0,Friends))
  69.  
  70. ## Remaining factors are converted to characters
  71.  
  72. Review_Data <- rapply(Review_Data, as.character, classes="factor", how="replace")
  73.  
  74. ## Add new column "Review_Char_Count" to track how many characters were in a review
  75.  
  76. Review_Data$Review_Char_Count <- c(nchar(Review_Data$Review, allowNA = TRUE))
  77.  
  78. ## Export data as CSV
  79.  
  80. write.csv(Review_Data, file = "ReviewData_v4.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement