Guest User

Untitled

a guest
Apr 29th, 2016
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.96 KB | None | 0 0
  1. # Using airquality dataset
  2. data <- airquality
  3. data[4:10,3] <- rep(NA,7)
  4. data[1:5,4] <- NA
  5.  
  6. # Removing categorical variables
  7. data <- airquality[-c(5,6)]
  8. summary(data)
  9.  
  10. #-------------------------------------------------------------------------------
  11. # Look for missing > 5% variables
  12. pMiss <- function(x){sum(is.na(x))/length(x)*100}
  13.  
  14. # Check each column
  15. apply(data,2,pMiss)
  16.  
  17. # Check each row
  18. apply(data,1,pMiss)
  19.  
  20. #-------------------------------------------------------------------------------
  21. # Missing data pattern
  22. library(mice)
  23.  
  24. # Missing data pattern
  25. md.pattern(data)
  26.  
  27. library(VIM)
  28. # Plot of missing data pattern
  29. aggr_plot <- aggr(data, col=c('navyblue','red'), numbers=TRUE, sortVars=TRUE, labels=names(data), cex.axis=.7, gap=3, ylab=c("Histogram of missing data","Pattern"))
  30.  
  31. # Box plot
  32. marginplot(data[c(1,2)])
  33.  
  34. #-------------------------------------------------------------------------------
  35. # Impute missing data using mice
  36.  
  37. tempData <- mice(data,m=5,maxit=50,meth='pmm',seed=500)
  38. summary(tempData)
  39.  
  40. # Get imputed data (for the Ozone variable)
  41. tempData$imp$Ozone
  42.  
  43. # Possible imputation models provided by mice() are
  44. methods(mice)
  45.  
  46. # What imputation method did we use?
  47. tempData$meth
  48.  
  49. # Get completed datasets (observed and imputed)
  50. completedData <- complete(tempData,1)
  51. summary(completedData)
  52.  
  53. #-------------------------------------------------------------------------------
  54. # Plots
  55.  
  56. # Scatterplot Ozone vs all
  57. xyplot(tempData,Ozone ~ Wind+Temp+Solar.R,pch=18,cex=1)
  58.  
  59. # Density plot original vs imputed dataset
  60. densityplot(tempData)
  61.  
  62. # Another take on the density: stripplot()
  63. stripplot(tempData, pch = 20, cex = 1.2)
  64.  
  65. #-------------------------------------------------------------------------------
  66. # Pooling the results and fitting a linear model
  67.  
  68. modelFit1 <- with(tempData,lm(Temp~ Ozone+Solar.R+Wind))
  69. pool(modelFit1)
  70. summary(pool(modelFit1))
  71.  
  72. # Using more imputed datasets
  73. tempData2 <- mice(data,m=50,seed=245435)
  74. modelFit2 <- with(tempData2,lm(Temp~ Ozone+Solar.R+Wind))
  75. summary(pool(modelFit2))
Add Comment
Please, Sign In to add comment