Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- members <- read.csv("~/f1data/members.txt")
- library(nortest)
- members$city <- as.character(members$city)
- members$country <- as.character(members$country)
- members$bdate <- as.character(members$bdate)
- members$city[members$city == ""] <- "Н/д"
- members$country[members$country == ""] <- "Н/д"
- city_table <- table(members$city)[order(table(members$city), decreasing = TRUE)]
- par(mar = c(7, 4, 4, 2) + 0.1)
- barplot(city_table[2:25], las = 2, cex.names = 0.8,
- cex.axis = 0.8, main = "Самые популярные города", ylab = "Кол-во человек")
- country_table <- table(members$country)[order(table(members$country), decreasing = TRUE)]
- barplot(country_table[c(1, 3:9)], las = 2, cex.names = 0.8,
- cex.axis = 0.8, main = "Самые популярные страны", ylab = "Кол-во человек")
- par(mar = c(5, 7, 4, 2) + 0.1)
- barplot(rev(city_table[1:25] / sum(city_table) * 100), las = 2, cex.names = 0.8,
- cex.axis = 0.8, main = "Самые популярные города", xlab = "%", horiz = TRUE)
- barplot(rev(country_table[1:20] / sum(country_table) * 100), las = 2, cex.names = 0.8,
- cex.axis = 0.8, main = "Самые популярные страны", xlab = "%", horiz = TRUE)
- par(mar = c(5, 4, 4, 2) + 0.1)
- members$sex_text <- rep("Н/д", length(members$sex))
- members$sex_text[members$sex == 1] <- "Жен."
- members$sex_text[members$sex == 2] <- "Муж."
- sex_table <- table(members$sex_text)
- barplot(sex_table, ylab = "Кол-во человек", main = "Пол участника")
- barplot(sex_table / sum(sex_table) * 100, ylab = "%", main = "Пол участника")
- par(mar = c(6.5, 4, 4, 2) + 0.1)
- male_name_table <- table(members$first_name[members$sex == 2])
- male_name_table <- male_name_table[order(male_name_table, decreasing = TRUE)]
- barplot(male_name_table[1:25], ylab = "Кол-во человек", main =
- "Самые популярные мужские имена", las = 2)
- male_surname_table <- table(members$last_name[members$sex == 2])
- male_surname_table <- male_surname_table[order(male_surname_table, decreasing = TRUE)]
- barplot(male_surname_table[1:25], ylab = "Кол-во человек", main =
- "Самые популярные мужские фамилии", las = 2)
- female_name_table <- table(members$first_name[members$sex == 1])
- female_name_table <- female_name_table[order(female_name_table, decreasing = TRUE)]
- barplot(female_name_table[1:25], ylab = "Кол-во человек", main =
- "Самые популярные женские имена", las = 2)
- female_surname_table <- table(members$last_name[members$sex == 1])
- female_surname_table <- female_surname_table[order(female_surname_table, decreasing = TRUE)]
- barplot(female_surname_table[1:25], ylab = "Кол-во человек", main =
- "Самые популярные женские фамилии", las = 2)
- get_year <- function(x) {
- decomp <- strsplit(x, "[.]")[[1]]
- if (length(decomp) != 3) return(NA)
- return (as.numeric(decomp[3]))
- }
- V_get_year <- Vectorize(get_year)
- get_month <- function(x) {
- decomp <- strsplit(x, "[.]")[[1]]
- if (length(decomp) < 2) return(NA)
- return (as.numeric(decomp[2]))
- }
- V_get_month <- Vectorize(get_month)
- members$year <- sapply(members$bdate, V_get_year)
- members$month <- sapply(members$bdate, V_get_month)
- valid_year <- (1:length(members$year))[members$year >= 1960]
- print(pearson.test(members$year[valid_year]))
- histobj <- hist(members$year[valid_year])
- plot(histobj, xlab = "Год", ylab = "Кол-во участников",
- main = "Распределение года рождения участников")
- multiplier <- histobj$counts / histobj$density
- m <- mean(members$year[valid_year], na.rm = TRUE)
- std<-sqrt(var(members$year[valid_year], na.rm = TRUE))
- print(c(m, std))
- curve(dnorm(x, mean=m, sd=std) * multiplier[1],
- col="darkblue", add=TRUE, yaxt="n")
- valid_year <- (1:length(members$year))[members$year >= 1960 & members$sex == 2]
- print(pearson.test(members$year[valid_year]))
- histobj <- hist(members$year[valid_year])
- plot(histobj, xlab = "Год", ylab = "Кол-во участников",
- main = "Распределение года рождения мальчиков")
- multiplier <- histobj$counts / histobj$density
- m <- mean(members$year[valid_year], na.rm = TRUE)
- std<-sqrt(var(members$year[valid_year], na.rm = TRUE))
- print(c(m, std))
- curve(dnorm(x, mean=m, sd=std) * multiplier[1],
- col="darkblue", add=TRUE, yaxt="n")
- valid_year <- (1:length(members$year))[members$year >= 1960 & members$sex == 1]
- print(pearson.test(members$year[valid_year]))
- histobj <- hist(members$year[valid_year], breaks = "Scott")
- plot(histobj, xlab = "Год", ylab = "Кол-во участников",
- main = "Распределение года рождения девочек")
- multiplier <- histobj$counts / histobj$density
- m <- mean(members$year[valid_year], na.rm = TRUE)
- std<-sqrt(var(members$year[valid_year], na.rm = TRUE))
- print(c(m, std))
- curve(dnorm(x, mean=m, sd=std) * multiplier[1],
- col="darkblue", add=TRUE, yaxt="n")
- members$sex <- as.factor(members$sex)
- fit <- aov(year ~ sex, data = members[members$year >= 1960 & members$sex != 0, ])
- summary(fit)
- plot(fit)
- month_names <- c("Янв", "Фев", "Мар", "Апр", "Май", "Июн", "Июл", "Авг",
- "Сен", "Окт", "Ноя", "Дек")
- monthcounts <- table(members$month)
- barplot(monthcounts, main = "Распределение месяца рождения участника", xlab = "Месяц",
- ylab = "Кол-во участников", names.arg = month_names)
- members$sex <- as.numeric(members$sex)
- msextbl <- table(members$month[members$sex > 1])
- print(chisq.test(msextbl))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement