Advertisement
Guest User

Untitled

a guest
Jul 25th, 2016
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.56 KB | None | 0 0
  1. library(grDevices) # boxplot.stats()
  2. library(operator.tools) # %!in% logical operator
  3. library(tmap) # 'metro' data set
  4. library(magrittr) # piping
  5. library(dplyr) # exploratory data analysis verbs
  6. library(purrr) # recursive mapping of functions
  7. library(tibble) # improved version of a data.frame
  8. library(ggplot2) # dot plot
  9. library(ggrepel) # avoid label overlap
  10.  
  11. options(scipen=999)
  12. set.seed(888)
  13.  
  14. data("metro")
  15.  
  16. m_spdf <- metro
  17. # Take a sample
  18. m <-
  19. metro@data %>%
  20. as_tibble %>%
  21. select(-name_long,-iso_a3) %>%
  22. sample_n(50)
  23.  
  24. > m
  25. # A tibble: 50 x 10
  26. name pop1950 pop1960 pop1970 pop1980 pop1990
  27. <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
  28. 1 Sydney 1689935 2134673 2892477 3252111 3631940
  29. 2 Havana 1141959 1435511 1779491 1913377 2108381
  30. 3 Campinas 151977 293174 540430 1108903 1693359
  31. 4 Kano 123073 229203 541992 1349646 2095384
  32. 5 Omsk 444326 608363 829860 1032150 1143813
  33. 6 Ouagadougou 33035 59126 115374 265200 537441
  34. 7 Marseille 755805 928768 1182048 1372495 1418279
  35. 8 Taiyuan 196510 349535 621625 1105695 1636599
  36. 9 La Paz 319247 437687 600016 809218 1061850
  37. 10 Baltimore 1167656 1422067 1554538 1748983 1848834
  38. # ... with 40 more rows, and 4 more variables:
  39. # pop2000 <dbl>, pop2010 <dbl>, pop2020 <dbl>,
  40. # pop2030 <dbl>
  41.  
  42. # Calculate the quintile groups for one variable (e.g., `pop1990`)
  43. m_all <-
  44. m %>%
  45. mutate(qnt_1990_all = dplyr::ntile(pop1990,5))
  46.  
  47. # Find the outliers for a different variable (e.g., 'pop1950')
  48. # and subset the df to exlcude these outlier records
  49. m_out <- boxplot.stats(m$pop1950) %>% .[["out"]]
  50.  
  51. m_trim <-
  52. m %>%
  53. filter(pop1950 %!in% m_out) %>%
  54. mutate(qnt_1990_trim = dplyr::ntile(pop1990,5))
  55.  
  56. # Assess whether the outlier trimming impacted the first quintile group
  57. m_comp <-
  58. m_trim %>%
  59. select(name,dplyr::contains("qnt")) %>%
  60. left_join(m_all,.,"name") %>%
  61. select(name,dplyr::contains("qnt"),everything()) %>%
  62. mutate(qnt_1990_chng_lgl = !is.na(qnt_1990_trim) & qnt_1990_trim != qnt_1990_all,
  63. qnt_1990_chng_dir = if_else(qnt_1990_chng_lgl,
  64. paste0(qnt_1990_all," to ",qnt_1990_trim),
  65. "No change"))
  66.  
  67. > m_comp %>% select(name,qnt_1990_chng_lgl,qnt_1990_chng_dir,everything())
  68. # A tibble: 50 x 14
  69. name qnt_1990_chng_lgl qnt_1990_chng_dir qnt_1990_all qnt_1990_trim
  70. <chr> <lgl> <chr> <dbl> <dbl>
  71. 1 Sydney FALSE No change 5 NA
  72. 2 Havana TRUE 4 to 5 4 5
  73. 3 Campinas TRUE 3 to 4 3 4
  74. 4 Kano FALSE No change 4 4
  75. 5 Omsk FALSE No change 3 3
  76. 6 Ouagadougou FALSE No change 1 1
  77. 7 Marseille FALSE No change 3 3
  78. 8 Taiyuan TRUE 3 to 4 3 4
  79. 9 La Paz FALSE No change 2 2
  80. 10 Baltimore FALSE No change 4 4
  81. # ... with 40 more rows, and 9 more variables: pop1950 <dbl>, pop1960 <dbl>,
  82. # pop1970 <dbl>, pop1980 <dbl>, pop1990 <dbl>, pop2000 <dbl>, pop2010 <dbl>,
  83. # pop2020 <dbl>, pop2030 <dbl>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement