- remove factors with criteria
- studenttable <- table(data$Anon.Student.Id)
- l5eh0S53tB Qwq8d0du28 tyU2s0MBzm dvG32rxRzQ i8f2gg51r5 XL0eQIoG72
- 9890 7989 7665 7242 6928 6651
- biginstances <- studenttable>1000
- bigdata <- subset(data, (biginstances[Anon.Student.Id]))
- # Create some fake data
- dat <- data.frame(id = rep(letters[1:5], 1:5), y = rnorm(15))
- # tabulate the id variable
- tab <- table(dat$id)
- # Get the names of the ids that we care about.
- # In this case the ids that occur >= 3 times
- idx <- names(tab)[tab >=3]
- # Only look at the data that we care about
- dat[dat$id %in% idx,]
- biginstances <- studenttable>1000
- bigdata <- subset(data, (biginstances[Anon.Student.Id]))
- > fac <- factor(rep(letters[1:3],each = 3))
- > fac
- [1] a a a b b b c c c
- Levels: a b c
- > fac[-(1:3)]
- [1] b b b c c c
- Levels: a b c
- > droplevels(fac[-(1:3)])
- [1] b b b c c c
- Levels: b c
- require(plyr)
- set.seed(123)
- Data <- data.frame(var1 = sample(LETTERS[1:5], size = 100, replace = TRUE),
- var2 = 1:100)
- R> table(Data$var1)
- A B C D E
- 19 20 21 22 18
- ## rows with category less than 20
- mytable <- count(Data, vars = "var1")
- ## mytable <- as.data.frame(table(Data$var1))
- R> str(mytable)
- 'data.frame': 5 obs. of 2 variables:
- $ var1: Factor w/ 5 levels "A","B","C","D",..: 1 2 3 4 5
- $ freq: int 19 20 21 22 18
- Data <- join(Data, mytable)
- ## Data <- merge(Data, mytable)
- R> str(Data)
- 'data.frame': 100 obs. of 3 variables:
- $ var1: Factor w/ 5 levels "A","B","C","D",..: 3 2 3 5 3 5 5 4 3 1 ...
- $ var2: int 1 2 3 4 5 6 7 8 9 10 ...
- $ freq: int 21 20 21 18 21 18 18 22 21 19 ...
- mysubset <- droplevels(subset(Data, freq > 20))
- R> table(mysubset$var1)
- C D
- 21 22
- studenttable <- sort(studenttable, decreasing=TRUE)
- sum(studenttable>1000)
- 230
- sum(studenttable<1000)
- 344
- 344+230=574
- idx <- names(studenttable[1:230])
- bigdata <- data[data$Anon.Student.Id %in% idx,]
- bigstudenttable <- table(bigdata$Anon.Student.Id)