Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Seems like there are some outliers so lets remove anything greater than 2.5 st. deviations from the mean
- sf <- sf[which(sf$SalePrice < mean(sf$SalePrice) + (2.5 * sd(sf$SalePrice))), ]
- #violin plots
- home_value_violin <- ggplot(sf, aes(x=SaleYr, y=SalePrice, fill=SaleYr)) + geom_violin(color = "grey50") +
- xlab("Sale Price($)") + ylab("Count") +
- scale_fill_manual(values=pallete_7_colors) +
- stat_summary(fun.y=mean, geom="point", size=2, colour="white") +
- plotTheme() + theme(legend.position="none") +
- scale_y_continuous(labels = comma) +
- labs(x="Year",y="Sale Price($)",title="Distribution of San Francisco home prices",
- subtitle="Nominal prices (2009 - 2015); Sale price means visualized as points",
- caption="Source: San Francisco Office of the Assessor-Recorder\n@KenSteif & @SimonKassel")
- home_value_violin
- ggsave("plot2_violin.png", home_value_violin, width = 8, height = 4, device = "png")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement