Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(modeest)
- library(mice)
- getwd()
- setwd("/home/vantu/Thesis")
- data <- read.csv("insurance.csv")
- original <- data
- head(data)
- # num of row and column
- print(c("num of row ",nrow(data)))
- print(c("num of row ",ncol(data)))
- # count male and female
- data$sex<- factor(data$sex,labels = c("female","male"))
- table(data$sex)
- # mode of the target attribute
- ux<- unique(data$sex)
- ux<-ux[which.max(tabulate(match(data$sex,ux)))]
- ux
- # % of missing data
- data[sample(1:nrow(data), 1000), "sex"] <- NA # insert na
- sapply(data, function(x) sum(is.na(x)))
- na_index=which(is.na(data$sex),arr.ind = TRUE) # pop_out na index
- miss_data<- (length(na_index)/nrow(data))*100; # count % of missing value
- print(miss_data)
- # normal distribution ...of age
- # dnorm
- x<-original$age
- y<- dnorm(x,mean = mean(x),sd = sd(x))
- plot(x,y,main = "Normal distribution of age",xlab = "x",ylab = "probability Density",las=1)
- ##abline() use to draw a vertical line through mean of x
- abline(v=mean(x))
- ##by pnorm()
- y<- pnorm(x,mean = mean(x),sd = sd(x))
- plot(x,y)
- ##by rnorm()
- y<- rnorm(x,mean = mean(x),sd = sd(x))
- plot(x,y)
- ##qnorm()
- y<- qnorm(x,mean = mean(x),sd = sd(x))
- plot(x,y)
Advertisement
Add Comment
Please, Sign In to add comment