Advertisement
Guest User

Untitled

a guest
Feb 14th, 2016
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.70 KB | None | 0 0
  1. members <- read.csv("~/f1data/members.txt")
  2. library(nortest)
  3. members$city <- as.character(members$city)
  4. members$country <- as.character(members$country)
  5. members$bdate <- as.character(members$bdate)
  6.  
  7. members$city[members$city == ""] <- "Н/д"
  8. members$country[members$country == ""] <- "Н/д"
  9.  
  10. city_table <- table(members$city)[order(table(members$city), decreasing = TRUE)]
  11. par(mar = c(7, 4, 4, 2) + 0.1)
  12. barplot(city_table[2:25], las = 2, cex.names = 0.8,
  13. cex.axis = 0.8, main = "Самые популярные города", ylab = "Кол-во человек")
  14. country_table <- table(members$country)[order(table(members$country), decreasing = TRUE)]
  15. barplot(country_table[c(1, 3:9)], las = 2, cex.names = 0.8,
  16. cex.axis = 0.8, main = "Самые популярные страны", ylab = "Кол-во человек")
  17.  
  18. par(mar = c(5, 7, 4, 2) + 0.1)
  19.  
  20. barplot(rev(city_table[1:25] / sum(city_table) * 100), las = 2, cex.names = 0.8,
  21. cex.axis = 0.8, main = "Самые популярные города", xlab = "%", horiz = TRUE)
  22.  
  23. barplot(rev(country_table[1:20] / sum(country_table) * 100), las = 2, cex.names = 0.8,
  24. cex.axis = 0.8, main = "Самые популярные страны", xlab = "%", horiz = TRUE)
  25.  
  26. par(mar = c(5, 4, 4, 2) + 0.1)
  27.  
  28. members$sex_text <- rep("Н/д", length(members$sex))
  29. members$sex_text[members$sex == 1] <- "Жен."
  30. members$sex_text[members$sex == 2] <- "Муж."
  31. sex_table <- table(members$sex_text)
  32. barplot(sex_table, ylab = "Кол-во человек", main = "Пол участника")
  33. barplot(sex_table / sum(sex_table) * 100, ylab = "%", main = "Пол участника")
  34.  
  35. par(mar = c(6.5, 4, 4, 2) + 0.1)
  36. male_name_table <- table(members$first_name[members$sex == 2])
  37. male_name_table <- male_name_table[order(male_name_table, decreasing = TRUE)]
  38. barplot(male_name_table[1:25], ylab = "Кол-во человек", main =
  39. "Самые популярные мужские имена", las = 2)
  40.  
  41. male_surname_table <- table(members$last_name[members$sex == 2])
  42. male_surname_table <- male_surname_table[order(male_surname_table, decreasing = TRUE)]
  43. barplot(male_surname_table[1:25], ylab = "Кол-во человек", main =
  44. "Самые популярные мужские фамилии", las = 2)
  45.  
  46. female_name_table <- table(members$first_name[members$sex == 1])
  47. female_name_table <- female_name_table[order(female_name_table, decreasing = TRUE)]
  48. barplot(female_name_table[1:25], ylab = "Кол-во человек", main =
  49. "Самые популярные женские имена", las = 2)
  50.  
  51.  
  52. female_surname_table <- table(members$last_name[members$sex == 1])
  53. female_surname_table <- female_surname_table[order(female_surname_table, decreasing = TRUE)]
  54. barplot(female_surname_table[1:25], ylab = "Кол-во человек", main =
  55. "Самые популярные женские фамилии", las = 2)
  56.  
  57. get_year <- function(x) {
  58. decomp <- strsplit(x, "[.]")[[1]]
  59. if (length(decomp) != 3) return(NA)
  60. return (as.numeric(decomp[3]))
  61. }
  62.  
  63. V_get_year <- Vectorize(get_year)
  64.  
  65. get_month <- function(x) {
  66. decomp <- strsplit(x, "[.]")[[1]]
  67. if (length(decomp) < 2) return(NA)
  68. return (as.numeric(decomp[2]))
  69. }
  70.  
  71. V_get_month <- Vectorize(get_month)
  72.  
  73. members$year <- sapply(members$bdate, V_get_year)
  74. members$month <- sapply(members$bdate, V_get_month)
  75.  
  76. valid_year <- (1:length(members$year))[members$year >= 1960]
  77. print(pearson.test(members$year[valid_year]))
  78. histobj <- hist(members$year[valid_year])
  79. plot(histobj, xlab = "Год", ylab = "Кол-во участников",
  80. main = "Распределение года рождения участников")
  81. multiplier <- histobj$counts / histobj$density
  82. m <- mean(members$year[valid_year], na.rm = TRUE)
  83. std<-sqrt(var(members$year[valid_year], na.rm = TRUE))
  84. print(c(m, std))
  85. curve(dnorm(x, mean=m, sd=std) * multiplier[1],
  86. col="darkblue", add=TRUE, yaxt="n")
  87.  
  88. valid_year <- (1:length(members$year))[members$year >= 1960 & members$sex == 2]
  89. print(pearson.test(members$year[valid_year]))
  90. histobj <- hist(members$year[valid_year])
  91. plot(histobj, xlab = "Год", ylab = "Кол-во участников",
  92. main = "Распределение года рождения мальчиков")
  93. multiplier <- histobj$counts / histobj$density
  94. m <- mean(members$year[valid_year], na.rm = TRUE)
  95. std<-sqrt(var(members$year[valid_year], na.rm = TRUE))
  96. print(c(m, std))
  97. curve(dnorm(x, mean=m, sd=std) * multiplier[1],
  98. col="darkblue", add=TRUE, yaxt="n")
  99.  
  100. valid_year <- (1:length(members$year))[members$year >= 1960 & members$sex == 1]
  101. print(pearson.test(members$year[valid_year]))
  102. histobj <- hist(members$year[valid_year], breaks = "Scott")
  103. plot(histobj, xlab = "Год", ylab = "Кол-во участников",
  104. main = "Распределение года рождения девочек")
  105. multiplier <- histobj$counts / histobj$density
  106. m <- mean(members$year[valid_year], na.rm = TRUE)
  107. std<-sqrt(var(members$year[valid_year], na.rm = TRUE))
  108. print(c(m, std))
  109. curve(dnorm(x, mean=m, sd=std) * multiplier[1],
  110. col="darkblue", add=TRUE, yaxt="n")
  111.  
  112. members$sex <- as.factor(members$sex)
  113. fit <- aov(year ~ sex, data = members[members$year >= 1960 & members$sex != 0, ])
  114. summary(fit)
  115. plot(fit)
  116.  
  117. month_names <- c("Янв", "Фев", "Мар", "Апр", "Май", "Июн", "Июл", "Авг",
  118. "Сен", "Окт", "Ноя", "Дек")
  119.  
  120. monthcounts <- table(members$month)
  121. barplot(monthcounts, main = "Распределение месяца рождения участника", xlab = "Месяц",
  122. ylab = "Кол-во участников", names.arg = month_names)
  123.  
  124. members$sex <- as.numeric(members$sex)
  125. msextbl <- table(members$month[members$sex > 1])
  126. print(chisq.test(msextbl))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement