Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.24 KB | None | 0 0
  1. library(tidyr)
  2. library(reshape2)
  3. library(ggplot2)
  4. library(corrplot)
  5.  
  6. #split numeric and categorical variables
  7. is.fact <- sapply(df1, is.factor)
  8. df1_cat <- df1[,is.fact] %>%
  9. mutate(count = 1) %>%
  10. gather
  11. df1_num <- df1[,!is.fact]
  12. df1_num_long <- df1_num %>% melt
  13.  
  14. #plot numeric variables
  15. num_var <- ggplot(data = df1_num_long, aes(x = value)) +
  16. stat_density() +
  17. facet_wrap(~variable, scales = "free") +
  18. theme_bw()
  19. num_var
  20.  
  21. #plot categorical variables
  22. cat_var <- ggplot(df1_cat, aes(x = value)) + geom_bar() +
  23. facet_wrap(~key, scales = 'free') +
  24. theme_bw() +
  25. theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
  26. cat_var
  27.  
  28. #correlation matrix of numeric variables
  29. corrplot(cor(df1_num)
  30. , type = 'upper'
  31. , tl.col = 'black'
  32. , tl.srt = 45
  33. , tl.cex = 0.6
  34. , mehtod = 'number'
  35. , addCoef.col = 'black'
  36. , addCoefasPercent = TRUE
  37. , number.cex=0.5)
  38.  
  39.  
  40. #one hot encoding categorical variables
  41. encoder <- onehot(df1[,is.fact])
  42. cat_encoded <- predict(encoder, df1[,is.fact])
  43. df_encoded <- cbind(df1_num,cat_encoded)
  44.  
  45. #correlation matrix of all variables
  46. corrplot(cor(df_encoded)
  47. , type = 'upper'
  48. , tl.col = 'black'
  49. , tl.srt = 45
  50. , tl.cex = 0.6
  51. , mehtod = 'number'
  52. , addCoef.col = 'black'
  53. , addCoefasPercent = TRUE
  54. , number.cex=0.5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement