Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 16th, 2012  |  syntax: None  |  size: 2.23 KB  |  hits: 14  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Remove duplicates column combinations from a dataframe in R
  2. sessionid             qf        qn         city
  3. 1  9cf571c8faa67cad2aa9ff41f3a26e38     cat   biddix          fresno
  4. 2  e30f853d4e54604fd62858badb68113a   caleb     amos                
  5. 3  2ad41134cc285bcc06892fd68a471cd7  daniel  folkers                
  6. 4  2ad41134cc285bcc06892fd68a471cd7  daniel  folkers                
  7. 5  63a5e839510a647c1ff3b8aed684c2a5 charles   pierce           flint
  8. 6  691df47f2df12f14f000f9a17d1cc40e       j    franz prescott+valley
  9. 7  691df47f2df12f14f000f9a17d1cc40e       j    franz prescott+valley
  10. 8  b3a1476aa37ae4b799495256324a8d3d  carrie mascorro            brea
  11. 9  bd9f1404b313415e7e7b8769376d2705    fred  morales       las+vegas
  12. 10 b50a610292803dc302f24ae507ea853a  aurora      lee                
  13. 11 fb74940e6feb0dc61a1b4d09fcbbcb37  andrew    price       yorkville
  14.        
  15. sortDATA<-function(name)
  16. {
  17. #sort the code by session Id, first name, then last name
  18. sort1.name <- name[order("sessionid","qf","qn") , ]
  19. #create a vector of length of first names
  20. sname<-nchar(sort1.name$qf)
  21. hist(sname)
  22. }
  23.        
  24. df <- data.frame(a = c(1:4, 1:4),
  25.                  b = c(4:1, 4:1),
  26.                  d = LETTERS[1:8])
  27.  
  28. df[!duplicated(df[c("a", "b")]),]
  29. #   a b d
  30. # 1 1 4 A
  31. # 2 2 3 B
  32. # 3 3 2 C
  33. # 4 4 1 D
  34.        
  35. dat <- read.table(text = "               sessionid             qf        qn         city
  36. 1  9cf571c8faa67cad2aa9ff41f3a26e38     cat   biddix          fresno
  37. 2  e30f853d4e54604fd62858badb68113a   caleb     amos             NA  
  38. 3  2ad41134cc285bcc06892fd68a471cd7  daniel  folkers             NA  
  39. 4  2ad41134cc285bcc06892fd68a471cd7  daniel  folkers             NA  
  40. 5  63a5e839510a647c1ff3b8aed684c2a5 charles   pierce           flint
  41. 6  691df47f2df12f14f000f9a17d1cc40e       j    franz prescott+valley
  42. 7  691df47f2df12f14f000f9a17d1cc40e       j    franz prescott+valley
  43. 8  b3a1476aa37ae4b799495256324a8d3d  carrie mascorro            brea
  44. 9  bd9f1404b313415e7e7b8769376d2705    fred  morales       las+vegas
  45. 10 b50a610292803dc302f24ae507ea853a  aurora      lee              NA  
  46. 11 fb74940e6feb0dc61a1b4d09fcbbcb37  andrew    price       yorkville ",sep = "",header = TRUE)
  47.        
  48. arrange(dat,sessionid,qf,qn)
  49.        
  50. with(dat,dat[order(sessionid,qf,qn),])
  51.        
  52. udf <- unique( my.data.frame )