Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(tidyr)
- library(reshape2)
- library(ggplot2)
- library(corrplot)
- #split numeric and categorical variables
- is.fact <- sapply(df1, is.factor)
- df1_cat <- df1[,is.fact] %>%
- mutate(count = 1) %>%
- gather
- df1_num <- df1[,!is.fact]
- df1_num_long <- df1_num %>% melt
- #plot numeric variables
- num_var <- ggplot(data = df1_num_long, aes(x = value)) +
- stat_density() +
- facet_wrap(~variable, scales = "free") +
- theme_bw()
- num_var
- #plot categorical variables
- cat_var <- ggplot(df1_cat, aes(x = value)) + geom_bar() +
- facet_wrap(~key, scales = 'free') +
- theme_bw() +
- theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
- cat_var
- #correlation matrix of numeric variables
- corrplot(cor(df1_num)
- , type = 'upper'
- , tl.col = 'black'
- , tl.srt = 45
- , tl.cex = 0.6
- , mehtod = 'number'
- , addCoef.col = 'black'
- , addCoefasPercent = TRUE
- , number.cex=0.5)
- #one hot encoding categorical variables
- encoder <- onehot(df1[,is.fact])
- cat_encoded <- predict(encoder, df1[,is.fact])
- df_encoded <- cbind(df1_num,cat_encoded)
- #correlation matrix of all variables
- corrplot(cor(df_encoded)
- , type = 'upper'
- , tl.col = 'black'
- , tl.srt = 45
- , tl.cex = 0.6
- , mehtod = 'number'
- , addCoef.col = 'black'
- , addCoefasPercent = TRUE
- , number.cex=0.5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement