Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(ggplot2)
- Demographics <- fread("demographics.csv")
- #Cluster states on demographic
- StatePredictors <- Demographics[,-(1:2),with=FALSE]
- length(names(Demographics))
- CityPredictorsIncome <- Demographics[,c((28-10):26)]
- CityPredictorsPopulation <- Demographics[,c(4:16)]
- #Normalise to proportions in each age group
- CityPredictorsPopulation <- CityPredictorsPopulation/Demographics$population
- PCAPopulation <- prcomp(CityPredictorsPopulation,center = T)
- PCAIncome <- prcomp(CityPredictorsIncome,center = T,scale. = T)
- #Proportion of variance explained by components
- plot(PCAPopulation$sdev/sum(PCAPopulation$sdev))
- PCAPopulation$rotation[,1] #First component has >45 year olds
- PCAPopulation$rotation[,2] #Second component has <45
- plot(PCAPopulation$x[,1],PCAPopulation$x[,2])
- PCAKMeansClust <- kmeans(PCAPopulation$x[,1:2],4)
- plot(PCAPopulation$x[,1],PCAPopulation$x[,2])
- points(PCAKMeansClust$centers,col="red")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement