Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 'data.frame': 4521 obs. of 17 variables:
- $ age : int 30 33 35 30 59 35 36 39 41 43 ...
- $ job : chr "unemployed" "services" "management" "management" ...
- $ marital : chr "married" "married" "single" "married" ...
- $ education: chr "primary" "secondary" "tertiary" "tertiary" ...
- $ default : chr "no" "no" "no" "no" ...
- $ balance : int 1787 4789 1350 1476 0 747 307 147 221 -88 ...
- $ housing : chr "no" "yes" "yes" "yes" ...
- $ loan : chr "no" "yes" "no" "yes" ...
- $ contact : chr "cellular" "cellular" "cellular" "unknown" ...
- $ day : int 19 11 16 3 5 23 14 6 14 17 ...
- $ month : chr "oct" "may" "apr" "jun" ...
- $ duration : int 79 220 185 199 226 141 341 151 57 313 ...
- $ campaign : int 1 1 1 4 1 2 1 2 2 1 ...
- $ pdays : int -1 339 330 -1 -1 176 330 -1 -1 147 ...
- $ previous : int 0 4 1 0 0 3 2 0 0 2 ...
- $ poutcome : chr "unknown" "failure" "failure" "unknown" ...
- $ y : chr "no" "no" "no" "no" ...
- #Import data to R
- bank <- read.table(file="bank.csv", sep=";", header=TRUE,stringsAsFactors = FALSE)
- # character data into numeric format
- bank$job <- as.numeric(as.factor(bank$job))
- bank$marital <- as.numeric(as.factor(bank$marital))
- bank$education <- as.numeric(as.factor(bank$education))
- bank$default<- ifelse(bank$default == "yes", 1, 0)
- bank$housing <- ifelse(bank$housing== "yes", 1, 0)
- bank$loan<- ifelse(bank$loan== "yes", 1, 0)
- bank$month <- as.numeric(as.factor(bank$month))
- bank$contact <- as.numeric(as.factor(bank$contact))
- bank$poutcome <- as.numeric(as.factor(bank$poutcome))
- bank$y <- ifelse(bank$y== "yes", 1, 0)
- # create normalization function
- normalize <- function(x) {
- return ((x - min(x)) / (max(x) - min(x)))
- }
- #normalize the data to get rid of outliers if present in the data set
- bank <- as.data.frame(lapply(bank, normalize))
- set.seed(20)
- bank_cluster <- kmeans(bank, 17, nstart = 20)
- #plotting
- bank_cluster$cluster <- as.factor(bank_cluster$cluster)
- # doesn't really work
- ggplot(bank, aes(bank, color = bank$cluster)) + geom_point()
- #install.packages("dbscan")
- library("dbscan")
- db <- dbscan(bank, eps=??)
- # Plot DBSCAN results
- plot(db, df, main = "DBSCAN", frame = FALSE)
- clusters <- hclust(dist(bank))
- plot(clusters)
- clusterCut <- cutree(clusters, 17) # I think number of clusters should be 17
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement