Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 24th, 2012  |  syntax: None  |  size: 1.97 KB  |  hits: 12  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. remove factors with criteria
  2. studenttable <- table(data$Anon.Student.Id)
  3.        
  4. l5eh0S53tB Qwq8d0du28 tyU2s0MBzm dvG32rxRzQ i8f2gg51r5 XL0eQIoG72
  5.   9890       7989       7665       7242       6928       6651
  6.        
  7. biginstances <- studenttable>1000
  8.        
  9. bigdata <- subset(data, (biginstances[Anon.Student.Id]))
  10.        
  11. # Create some fake data
  12. dat <- data.frame(id = rep(letters[1:5], 1:5), y = rnorm(15))
  13. # tabulate the id variable
  14. tab <- table(dat$id)
  15. # Get the names of the ids that we care about.
  16. # In this case the ids that occur >= 3 times
  17. idx <- names(tab)[tab >=3]
  18. # Only look at the data that we care about
  19. dat[dat$id %in% idx,]
  20.        
  21. biginstances <- studenttable>1000
  22.        
  23. bigdata <- subset(data, (biginstances[Anon.Student.Id]))
  24.        
  25. > fac <- factor(rep(letters[1:3],each = 3))
  26. > fac
  27. [1] a a a b b b c c c
  28. Levels: a b c
  29. > fac[-(1:3)]
  30. [1] b b b c c c
  31. Levels: a b c
  32. > droplevels(fac[-(1:3)])
  33. [1] b b b c c c
  34. Levels: b c
  35.        
  36. require(plyr)
  37.  
  38. set.seed(123)
  39. Data <- data.frame(var1 = sample(LETTERS[1:5], size = 100, replace = TRUE),
  40.                    var2 = 1:100)
  41.  
  42.  
  43. R> table(Data$var1)
  44.  
  45.  A  B  C  D  E
  46. 19 20 21 22 18
  47.  
  48.  
  49. ## rows with category less than 20
  50.  
  51. mytable <- count(Data, vars = "var1")
  52.  
  53. ## mytable <- as.data.frame(table(Data$var1))
  54.  
  55. R> str(mytable)
  56. 'data.frame':   5 obs. of  2 variables:
  57.  $ var1: Factor w/ 5 levels "A","B","C","D",..: 1 2 3 4 5
  58.  $ freq: int  19 20 21 22 18
  59.  
  60. Data <- join(Data, mytable)
  61.  
  62. ## Data <- merge(Data, mytable)
  63.  
  64. R> str(Data)
  65. 'data.frame':   100 obs. of  3 variables:
  66.  $ var1: Factor w/ 5 levels "A","B","C","D",..: 3 2 3 5 3 5 5 4 3 1 ...
  67.  $ var2: int  1 2 3 4 5 6 7 8 9 10 ...
  68.  $ freq: int  21 20 21 18 21 18 18 22 21 19 ...
  69.  
  70.  
  71.  
  72. mysubset <- droplevels(subset(Data, freq > 20))
  73.  
  74. R> table(mysubset$var1)
  75.  
  76.  C  D
  77. 21 22
  78.        
  79. studenttable <- sort(studenttable, decreasing=TRUE)
  80.        
  81. sum(studenttable>1000)
  82. 230
  83. sum(studenttable<1000)
  84. 344
  85. 344+230=574
  86.        
  87. idx <- names(studenttable[1:230])
  88. bigdata <- data[data$Anon.Student.Id %in% idx,]
  89.        
  90. bigstudenttable <- table(bigdata$Anon.Student.Id)