Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.90 KB | None | 0 0
  1. > testout<-data.frame(X1=rnorm(50,mean=50,sd=10),X2=rnorm(50,mean=5,sd=1.5),Y=rnorm(50,mean=200,sd=25))
  2. > #Taint the Data
  3. > testout$X1[10]<-5
  4. > testout$X2[10]<-5
  5. > testout$Y[10]<-530
  6.  
  7. > testout
  8. X1 X2 Y
  9. 1 44.20043 1.5259458 169.3296
  10. 2 40.46721 5.8437076 200.9038
  11. 3 48.20571 3.8243373 189.4652
  12. 4 60.09808 4.6609190 177.5159
  13. 5 50.23627 2.6193455 210.4360
  14. 6 43.50972 5.8212863 203.8361
  15. 7 44.95626 7.8368405 236.5821
  16. 8 66.14391 3.6828843 171.9624
  17. 9 45.53040 4.8311616 187.0553
  18. 10 5.00000 5.0000000 530.0000
  19. 11 64.71719 6.4007245 164.8052
  20. 12 54.43665 7.8695891 192.8824
  21. 13 45.78278 4.9921489 182.2957
  22. 14 49.59998 4.7716099 146.3090
  23. <snip>
  24. 48 26.55487 5.8082497 189.7901
  25. 49 45.28317 5.0219647 208.1318
  26. 50 44.84145 3.6252663 251.5620
  27.  
  28. > #Use Boxplot to Review the Data
  29. > boxplot(testout$X1, ylab="X1")
  30. > boxplot(testout$X2, ylab="X2")
  31. > boxplot(testout$Y, ylab="Y")
  32.  
  33. > #Alternative approach using Lund Test
  34. > lundcrit<-function(a, n, q) {
  35. + # Calculates a Critical value for Outlier Test according to Lund
  36. + # See Lund, R. E. 1975, "Tables for An Approximate Test for Outliers in Linear Models", Technometrics, vol. 17, no. 4, pp. 473-476.
  37. + # and Prescott, P. 1975, "An Approximate Test for Outliers in Linear Models", Technometrics, vol. 17, no. 1, pp. 129-132.
  38. + # a = alpha
  39. + # n = Number of data elements
  40. + # q = Number of independent Variables (including intercept)
  41. + F<-qf(c(1-(a/n)),df1=1,df2=n-q-1,lower.tail=TRUE)
  42. + crit<-((n-q)*F/(n-q-1+F))^0.5
  43. + crit
  44. + }
  45.  
  46. > testoutlm<-lm(Y~X1+X2,data=testout)
  47.  
  48. > testout$fitted<-fitted(testoutlm)
  49.  
  50. > testout$residual<-residuals(testoutlm)
  51.  
  52. > testout$standardresid<-rstandard(testoutlm)
  53.  
  54. > n<-nrow(testout)
  55.  
  56. > q<-length(testoutlm$coefficients)
  57.  
  58. > crit<-lundcrit(0.1,n,q)
  59.  
  60. > testout$Ynew<-ifelse(testout$standardresid>crit,NA,testout$Y)
  61.  
  62. > testout
  63. X1 X2 Y newX1 fitted residual standardresid
  64. 1 44.20043 1.5259458 169.3296 44.20043 209.8467 -40.5171222 -1.009507695
  65. 2 40.46721 5.8437076 200.9038 40.46721 231.9221 -31.0183107 -0.747624895
  66. 3 48.20571 3.8243373 189.4652 48.20571 203.4786 -14.0134646 -0.335955648
  67. 4 60.09808 4.6609190 177.5159 60.09808 169.6108 7.9050960 0.190908291
  68. 5 50.23627 2.6193455 210.4360 50.23627 194.3285 16.1075799 0.391537883
  69. 6 43.50972 5.8212863 203.8361 43.50972 222.6667 -18.8306252 -0.452070155
  70. 7 44.95626 7.8368405 236.5821 44.95626 223.3287 13.2534226 0.326339981
  71. 8 66.14391 3.6828843 171.9624 66.14391 148.8870 23.0754677 0.568829360
  72. 9 45.53040 4.8311616 187.0553 45.53040 214.0832 -27.0279262 -0.646090667
  73. 10 5.00000 5.0000000 530.0000 NA 337.0535 192.9465135 5.714275585
  74. 11 64.71719 6.4007245 164.8052 64.71719 159.9911 4.8141018 0.118618011
  75. 12 54.43665 7.8695891 192.8824 54.43665 194.7454 -1.8630426 -0.046004311
  76. 13 45.78278 4.9921489 182.2957 45.78278 213.7223 -31.4266180 -0.751115595
  77. 14 49.59998 4.7716099 146.3090 49.59998 201.6296 -55.3205552 -1.321042392
  78. 15 45.07720 4.2355525 192.9041 45.07720 213.9655 -21.0613819 -0.504406009
  79. 16 62.27717 7.1518606 186.6482 62.27717 169.2455 17.4027250 0.430262983
  80. 17 48.50446 3.0712422 228.3253 48.50446 200.6938 27.6314695 0.667366651
  81. 18 65.49983 5.4609713 184.8983 65.49983 155.2768 29.6214506 0.726319931
  82. 19 44.38387 4.9305222 213.9378 44.38387 217.7981 -3.8603382 -0.092354925
  83. 20 43.52883 8.3777627 203.5657 43.52883 228.9961 -25.4303732 -0.634725264
  84. <snip>
  85. 49 45.28317 5.0219647 208.1318 45.28317 215.3075 -7.1756966 -0.171560291
  86. 50 44.84145 3.6252663 251.5620 44.84145 213.1535 38.4084869 0.923804784
  87. Ynew
  88. 1 169.3296
  89. 2 200.9038
  90. 3 189.4652
  91. 4 177.5159
  92. 5 210.4360
  93. 6 203.8361
  94. 7 236.5821
  95. 8 171.9624
  96. 9 187.0553
  97. 10 NA
  98. 11 164.8052
  99. 12 192.8824
  100. 13 182.2957
  101. 14 146.3090
  102. 15 192.9041
  103. 16 186.6482
  104. 17 228.3253
  105. 18 184.8983
  106. 19 213.9378
  107. 20 203.5657
  108. <snip>
  109. 49 208.1318
  110. 50 251.5620
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement