Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- cc <- rxSparkConnect(reset = TRUE)
- hdfsFileSystem <- RxHdfsFileSystem()
- textData <- RxTextData(file = "/share/AirlineDemoSmall/AirlineDemoSmall.csv", missingValueString = "M", stringsAsFactors = TRUE, fileSystem = hdfsFileSystem)
- .Summary <- function(keys, data)
- {
- df <- rxImport(data)
- nrow(df)
- }
- # single key
- result <- rxExecBy(inData = textData, keys = c("DayOfWeek"), func = .Summary)
- str(result)
- #List of 7
- # $ :List of 3
- # ..$ keys :List of 1
- # .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 4
- # ..$ result: int 78875
- # ..$ status:List of 3
- # .. ..$ : chr "OK"
- # .. ..$ : NULL
- # .. ..$ : NULL
- # $ :List of 3
- # ..$ keys :List of 1
- # .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 6
- # ..$ result: int 82987
- # ..$ status:List of 3
- # .. ..$ : chr "OK"
- # .. ..$ : NULL
- # .. ..$ : NULL
- # $ :List of 3
- # ..$ keys :List of 1
- # .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 5
- # ..$ result: int 81304
- # ..$ status:List of 3
- # .. ..$ : chr "OK"
- # .. ..$ : NULL
- # .. ..$ : NULL
- # $ :List of 3
- # ..$ keys :List of 1
- # .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 7
- # ..$ result: int 86159
- # ..$ status:List of 3
- # .. ..$ : chr "OK"
- # .. ..$ : NULL
- # .. ..$ : NULL
- # $ :List of 3
- # ..$ keys :List of 1
- # .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 1
- # ..$ result: int 97975
- # ..$ status:List of 3
- # .. ..$ : chr "OK"
- # .. ..$ : NULL
- # .. ..$ : NULL
- # $ :List of 3
- # ..$ keys :List of 1
- # .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 3
- # ..$ result: int 77725
- # ..$ status:List of 3
- # .. ..$ : chr "OK"
- # .. ..$ : NULL
- # .. ..$ : NULL
- # $ :List of 3
- # ..$ keys :List of 1
- # .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 2
- # ..$ result: int 94975
- # ..$ status:List of 3
- # .. ..$ : chr "OK"
- # .. ..$ : NULL
- # .. ..$ : NULL
- # multi keys
- result <- rxExecBy(inData = textData, keys = c("DayOfWeek", "ArrDelay"), func = .Summary)
- length(result)
- #[1] 3233
- str(result[[1]])
- #List of 3
- # $ keys :List of 2
- # ..$ : Factor w/ 7 levels "Monday","Sunday",..: 4
- # ..$ : int 388
- # $ result: int 2
- # $ status:List of 3
- # ..$ : chr "OK"
- # ..$ : NULL
- # ..$ : NULL
- rxSparkDisconnect(cc)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement