Advertisement
Guest User

Untitled

a guest
Jun 26th, 2019
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.29 KB | None | 0 0
  1. 'data.frame': 4521 obs. of 17 variables:
  2. $ age : int 30 33 35 30 59 35 36 39 41 43 ...
  3. $ job : chr "unemployed" "services" "management" "management" ...
  4. $ marital : chr "married" "married" "single" "married" ...
  5. $ education: chr "primary" "secondary" "tertiary" "tertiary" ...
  6. $ default : chr "no" "no" "no" "no" ...
  7. $ balance : int 1787 4789 1350 1476 0 747 307 147 221 -88 ...
  8. $ housing : chr "no" "yes" "yes" "yes" ...
  9. $ loan : chr "no" "yes" "no" "yes" ...
  10. $ contact : chr "cellular" "cellular" "cellular" "unknown" ...
  11. $ day : int 19 11 16 3 5 23 14 6 14 17 ...
  12. $ month : chr "oct" "may" "apr" "jun" ...
  13. $ duration : int 79 220 185 199 226 141 341 151 57 313 ...
  14. $ campaign : int 1 1 1 4 1 2 1 2 2 1 ...
  15. $ pdays : int -1 339 330 -1 -1 176 330 -1 -1 147 ...
  16. $ previous : int 0 4 1 0 0 3 2 0 0 2 ...
  17. $ poutcome : chr "unknown" "failure" "failure" "unknown" ...
  18. $ y : chr "no" "no" "no" "no" ...
  19.  
  20. #Import data to R
  21.  
  22. bank <- read.table(file="bank.csv", sep=";", header=TRUE,stringsAsFactors = FALSE)
  23.  
  24. # character data into numeric format
  25. bank$job <- as.numeric(as.factor(bank$job))
  26. bank$marital <- as.numeric(as.factor(bank$marital))
  27. bank$education <- as.numeric(as.factor(bank$education))
  28. bank$default<- ifelse(bank$default == "yes", 1, 0)
  29. bank$housing <- ifelse(bank$housing== "yes", 1, 0)
  30. bank$loan<- ifelse(bank$loan== "yes", 1, 0)
  31. bank$month <- as.numeric(as.factor(bank$month))
  32. bank$contact <- as.numeric(as.factor(bank$contact))
  33. bank$poutcome <- as.numeric(as.factor(bank$poutcome))
  34. bank$y <- ifelse(bank$y== "yes", 1, 0)
  35.  
  36. # create normalization function
  37. normalize <- function(x) {
  38. return ((x - min(x)) / (max(x) - min(x)))
  39. }
  40. #normalize the data to get rid of outliers if present in the data set
  41. bank <- as.data.frame(lapply(bank, normalize))
  42.  
  43. set.seed(20)
  44. bank_cluster <- kmeans(bank, 17, nstart = 20)
  45. #plotting
  46. bank_cluster$cluster <- as.factor(bank_cluster$cluster)
  47. # doesn't really work
  48. ggplot(bank, aes(bank, color = bank$cluster)) + geom_point()
  49.  
  50. #install.packages("dbscan")
  51. library("dbscan")
  52. db <- dbscan(bank, eps=??)
  53. # Plot DBSCAN results
  54. plot(db, df, main = "DBSCAN", frame = FALSE)
  55.  
  56. clusters <- hclust(dist(bank))
  57. plot(clusters)
  58. clusterCut <- cutree(clusters, 17) # I think number of clusters should be 17
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement