Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(grDevices) # boxplot.stats()
- library(operator.tools) # %!in% logical operator
- library(tmap) # 'metro' data set
- library(magrittr) # piping
- library(dplyr) # exploratory data analysis verbs
- library(purrr) # recursive mapping of functions
- library(tibble) # improved version of a data.frame
- library(ggplot2) # dot plot
- library(ggrepel) # avoid label overlap
- options(scipen=999)
- set.seed(888)
- data("metro")
- m_spdf <- metro
- # Take a sample
- m <-
- metro@data %>%
- as_tibble %>%
- select(-name_long,-iso_a3) %>%
- sample_n(50)
- > m
- # A tibble: 50 x 10
- name pop1950 pop1960 pop1970 pop1980 pop1990
- <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
- 1 Sydney 1689935 2134673 2892477 3252111 3631940
- 2 Havana 1141959 1435511 1779491 1913377 2108381
- 3 Campinas 151977 293174 540430 1108903 1693359
- 4 Kano 123073 229203 541992 1349646 2095384
- 5 Omsk 444326 608363 829860 1032150 1143813
- 6 Ouagadougou 33035 59126 115374 265200 537441
- 7 Marseille 755805 928768 1182048 1372495 1418279
- 8 Taiyuan 196510 349535 621625 1105695 1636599
- 9 La Paz 319247 437687 600016 809218 1061850
- 10 Baltimore 1167656 1422067 1554538 1748983 1848834
- # ... with 40 more rows, and 4 more variables:
- # pop2000 <dbl>, pop2010 <dbl>, pop2020 <dbl>,
- # pop2030 <dbl>
- # Calculate the quintile groups for one variable (e.g., `pop1990`)
- m_all <-
- m %>%
- mutate(qnt_1990_all = dplyr::ntile(pop1990,5))
- # Find the outliers for a different variable (e.g., 'pop1950')
- # and subset the df to exlcude these outlier records
- m_out <- boxplot.stats(m$pop1950) %>% .[["out"]]
- m_trim <-
- m %>%
- filter(pop1950 %!in% m_out) %>%
- mutate(qnt_1990_trim = dplyr::ntile(pop1990,5))
- # Assess whether the outlier trimming impacted the first quintile group
- m_comp <-
- m_trim %>%
- select(name,dplyr::contains("qnt")) %>%
- left_join(m_all,.,"name") %>%
- select(name,dplyr::contains("qnt"),everything()) %>%
- mutate(qnt_1990_chng_lgl = !is.na(qnt_1990_trim) & qnt_1990_trim != qnt_1990_all,
- qnt_1990_chng_dir = if_else(qnt_1990_chng_lgl,
- paste0(qnt_1990_all," to ",qnt_1990_trim),
- "No change"))
- > m_comp %>% select(name,qnt_1990_chng_lgl,qnt_1990_chng_dir,everything())
- # A tibble: 50 x 14
- name qnt_1990_chng_lgl qnt_1990_chng_dir qnt_1990_all qnt_1990_trim
- <chr> <lgl> <chr> <dbl> <dbl>
- 1 Sydney FALSE No change 5 NA
- 2 Havana TRUE 4 to 5 4 5
- 3 Campinas TRUE 3 to 4 3 4
- 4 Kano FALSE No change 4 4
- 5 Omsk FALSE No change 3 3
- 6 Ouagadougou FALSE No change 1 1
- 7 Marseille FALSE No change 3 3
- 8 Taiyuan TRUE 3 to 4 3 4
- 9 La Paz FALSE No change 2 2
- 10 Baltimore FALSE No change 4 4
- # ... with 40 more rows, and 9 more variables: pop1950 <dbl>, pop1960 <dbl>,
- # pop1970 <dbl>, pop1980 <dbl>, pop1990 <dbl>, pop2000 <dbl>, pop2010 <dbl>,
- # pop2020 <dbl>, pop2030 <dbl>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement