Advertisement
Guest User

Untitled

a guest
Nov 9th, 2016
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.54 KB | None | 0 0
  1. require("RCurl")
  2. require("jsonlite")
  3. require("dplyr")
  4.  
  5. # Read in data from DB
  6.  
  7. walmart_features <- data.frame(fromJSON(getURL(URLencode('oraclerest.cs.utexas.edu:5001/rest/native/?query="select * from WALMART_FEATURES"'),httpheader=c(DB='jdbc:oracle:thin:@aevum.cs.utexas.edu:1521/f16pdb', USER='cs329e_qmn76', PASS='orcl_qmn76', MODE='native_mode', MODEL='model', returnDimensions = 'False', returnFor = 'JSON'), verbose = TRUE)))
  8.  
  9. walmart_stores <- data.frame(fromJSON(getURL(URLencode('oraclerest.cs.utexas.edu:5001/rest/native/?query="select * from WALMART_STORES"'),httpheader=c(DB='jdbc:oracle:thin:@aevum.cs.utexas.edu:1521/f16pdb', USER='cs329e_qmn76', PASS='orcl_qmn76', MODE='native_mode', MODEL='model', returnDimensions = 'False', returnFor = 'JSON'), verbose = TRUE)))
  10.  
  11. # Here the data is retrieved in batches because else it's takes to long for the REST server to respond
  12.  
  13. walmart_train1 <- data.frame(fromJSON(getURL(URLencode('oraclerest.cs.utexas.edu:5001/rest/native/?query="select * from WALMART_TRAIN where store_id in (1,2,3,4,5,6)"'),httpheader=c(DB='jdbc:oracle:thin:@aevum.cs.utexas.edu:1521/f16pdb', USER='cs329e_qmn76', PASS='orcl_qmn76', MODE='native_mode', MODEL='model', returnDimensions = 'False', returnFor = 'JSON'), verbose = TRUE)))
  14. walmart_train2 <- data.frame(fromJSON(getURL(URLencode('oraclerest.cs.utexas.edu:5001/rest/native/?query="select * from WALMART_TRAIN where store_id in (7,8,9,10,11,12)"'),httpheader=c(DB='jdbc:oracle:thin:@aevum.cs.utexas.edu:1521/f16pdb', USER='cs329e_qmn76', PASS='orcl_qmn76', MODE='native_mode', MODEL='model', returnDimensions = 'False', returnFor = 'JSON'), verbose = TRUE)))
  15. walmart_train3 <- data.frame(fromJSON(getURL(URLencode('oraclerest.cs.utexas.edu:5001/rest/native/?query="select * from WALMART_TRAIN where store_id in (13,14,15,16,17,18)"'),httpheader=c(DB='jdbc:oracle:thin:@aevum.cs.utexas.edu:1521/f16pdb', USER='cs329e_qmn76', PASS='orcl_qmn76', MODE='native_mode', MODEL='model', returnDimensions = 'False', returnFor = 'JSON'), verbose = TRUE)))
  16. walmart_train4 <- data.frame(fromJSON(getURL(URLencode('oraclerest.cs.utexas.edu:5001/rest/native/?query="select * from WALMART_TRAIN where store_id in (19,20,21,22,23,24)"'),httpheader=c(DB='jdbc:oracle:thin:@aevum.cs.utexas.edu:1521/f16pdb', USER='cs329e_qmn76', PASS='orcl_qmn76', MODE='native_mode', MODEL='model', returnDimensions = 'False', returnFor = 'JSON'), verbose = TRUE)))
  17. walmart_train5 <- data.frame(fromJSON(getURL(URLencode('oraclerest.cs.utexas.edu:5001/rest/native/?query="select * from WALMART_TRAIN where store_id in (25,26,27,28,29,30)"'),httpheader=c(DB='jdbc:oracle:thin:@aevum.cs.utexas.edu:1521/f16pdb', USER='cs329e_qmn76', PASS='orcl_qmn76', MODE='native_mode', MODEL='model', returnDimensions = 'False', returnFor = 'JSON'), verbose = TRUE)))
  18.  
  19. # Combine data
  20. walmart_train = rbind(walmart_train1, walmart_train2, walmart_train3, walmart_train4, walmart_train5)
  21.  
  22. # Reformat the date format and get ready to join
  23. walmart_features$DATE_OF_WEEK <- as.Date(walmart_features$DATE_OF_WEEK, "%m/%d/%Y")
  24. walmart_train$DATE_OF_WEEK <- as.Date(walmart_train$DATE_OF_WEEK, "%Y-%m-%d")
  25.  
  26. # Join
  27. data = merge(merge(walmart_train, walmart_features, by=c("STORE_ID","DATE_OF_WEEK"), all=FALSE),walmart_stores, by="STORE_ID")
  28.  
  29. data$ISHOLIDAY = data$ISHOLIDAY.x
  30. data$ISHOLIDAY.x = NULL
  31. data$ISHOLIDAY.y = NULL
  32. data
  33.  
  34. #Display column names
  35. names(data)
  36.  
  37. # Display a subset and summary of the data frame
  38. summary(data)
  39. head(data)
  40.  
  41. subset <- subset(data, STORE_ID == 5)
  42. head(subset)
  43.  
  44. subset2 <- subset(df, ISHOLIDAY = "TRUE")
  45. head(subset2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement