brianhaas19

Break Free from Plastic (Tidy Tuesday 2021-01-26)

Feb 1st, 2021
1,242
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ###################################################################################################
  2. # Break Free from Plastic (BFFP) Brand Audit 2020 - Top 10 Global Polluters.
  3. # Created by: reddit.com/user/brianhaas19
  4.  
  5. # Link to data
  6. # https://github.com/rfordatascience/tidytuesday/tree/master/data/2021/2021-01-26
  7.  
  8. # Setup
  9. ### Libraries
  10. library(tidyverse)
  11. library(patchwork)
  12. library(glue)
  13. library(ggtext)
  14. theme_set(theme_bw())
  15.  
  16. ### Colors:
  17. cream <- "#FBF8EF"
  18. blue <- "#2E8598"
  19. dark_blue <- "#1C5163"
  20. orange <- "#FE7C5C"
  21.  
  22. # Load data
  23. ### Load data from Tidy Tuesday:
  24. plastics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-26/plastics.csv')
  25.  
  26. # Clean data
  27. # Most of this cleaning is not required for the plot that this notebook produces.
  28. # However it is useful for other analysis.
  29.  
  30. ### Assign generic name:
  31. df1 <- plastics
  32.  
  33. ### Fix duplicate country names:
  34. df1[which(df1$country == "NIGERIA"), "country"] <- "Nigeria"
  35. df1[which(df1$country == "ECUADOR"), "country"] <- "Ecuador"
  36. df1[which(df1$country == "United Kingdom of Great Britain & Northern Ireland"), "country"] <- "United Kingdom"
  37.  
  38. ### Shorten a couple of others:
  39. df1[which(df1$country == "Taiwan_ Republic of China (ROC)"), "country"] <- "Taiwan"
  40. df1[which(df1$country == "Cote D_ivoire"), "country"] <- "Ivory Coast"
  41. df1[which(df1$country == "United States of America"), "country"] <- "USA"
  42.  
  43. ### Change the "EMPTY" category to "N/A":
  44. df1[which(df1$country == "EMPTY"), "country"] <- "N/A"
  45.  
  46. ### Remove the rows where the `parent_company` is "Grand Total":
  47. df1 <- df1 %>%
  48.   filter(parent_company != "Grand Total")
  49.  
  50. ### Fix the names of some of the top 10 polluters which are different for 2019 and 2020:
  51. df1[which(df1$parent_company == "Nestle"), "parent_company"] <- "Nestlé"
  52. df1[which(df1$parent_company == "Philip Morris"), "parent_company"] <- "Philip Morris International"
  53. df1[which(df1$parent_company == "Pepsico"), "parent_company"] <- "PepsiCo"
  54. df1[which(df1$parent_company == "Perfetti van Melle"), "parent_company"] <- "Perfetti Van Melle"
  55. df1[which(df1$parent_company == "P&G"), "parent_company"] <- "Procter & Gamble"
  56.  
  57. ### Create a long format data set:
  58. df1_long <- df1 %>%
  59.   pivot_longer(cols = empty:pvc,
  60.                names_to = "plastic_type",
  61.                values_to = "total") %>%
  62.   dplyr::select(country, year, parent_company, plastic_type, total, everything())
  63.  
  64. # Process data
  65. # To plot the Top 10 Polluters for 2020, we need to identify them:
  66. top_10_company <- df1 %>%
  67.   filter(year == 2020) %>%
  68.   filter(!(parent_company %in% c("Unbranded", "null", "NULL", "Assorted"))) %>% # Exclude generic categories
  69.   group_by(year, parent_company) %>%
  70.   summarise(total = sum(grand_total, na.rm = TRUE),
  71.             count = n()) %>%
  72.   arrange(-total) %>%
  73.   slice_max(count, n = 10) %>%
  74.   head(10) %>% # Leave out Ferrero Group, which is tied for 10th but lower on total plastics
  75.   # Add ranks and make an ordered factor:
  76.   mutate(rank = row_number()) %>%
  77.   mutate(parent_company_rank = glue("{ parent_company } ({ rank })")) %>%
  78.   mutate(parent_company_rank = fct_reorder(parent_company_rank, -rank)) %>%
  79.   ungroup() %>%
  80.   dplyr::select(parent_company, parent_company_rank)
  81.  
  82. # Visualize
  83. # Suggested parameters for R notebook: fig.height=6, fig.width=12
  84. # Now that we have identified the Top 10 Polluters for 2020, we can plot their data for 2019 and 2020:
  85. ### Custom labels for facets:
  86. facet_names <- list(
  87.   "count" = expression(paste(bold("Number of countries"), " in which plastic waste was recorded.")),
  88.   "total" = expression(paste(bold("Total plastic waste items"), " recorded across all audits.")))
  89.  
  90. ### Function for labelling facets:
  91. facet_labeller <- function(variable, value) {
  92.   return(facet_names[value])
  93. }
  94.  
  95. ### Plot:
  96. g1 <- df1 %>%
  97.   filter(parent_company %in% top_10_company$parent_company) %>%
  98.   group_by(year, parent_company) %>%
  99.   summarise(total = sum(grand_total, na.rm = TRUE), # total plastics
  100.             count = n()) %>% # number of countries
  101.   left_join(top_10_company, by = c("parent_company")) %>% # get the ranks
  102.   mutate(year = factor(year)) %>%
  103.   pivot_longer(cols = total:count) %>%
  104.   ggplot() +
  105.   geom_vline(aes(xintercept = parent_company_rank),
  106.              color = "grey", linetype = "dashed", size = 0.25) +
  107.   geom_col(aes(parent_company_rank, value, fill = year),
  108.            position = "dodge") +
  109.   geom_text(aes(parent_company_rank, value,
  110.                 label = format(value, nsmall = 0, big.mark = ',', scientific = F),
  111.                 fill = year, color = year),
  112.             position = position_dodge(width = 1),
  113.             hjust = 0,
  114.             show.legend = F) +
  115.   scale_fill_manual("Year", breaks = c("2020", "2019"), labels = paste("<span style='color:",
  116.                                                                        c(orange, blue),
  117.                                                                        "'>",
  118.                                                                        c("2020", "2019"),
  119.                                                                        "</span>"),
  120.                     values = c(orange, blue)) +
  121.   scale_color_manual(values = c(blue, orange)) +
  122.   scale_y_continuous(expand = expansion(mult = c(0, 0.11), add = c(0, 0))) +
  123.   facet_wrap(~name, scales = "free_x", labeller = facet_labeller) +
  124.   coord_flip() +
  125.   labs(x = "Parent Company (Rank)", y = NULL,
  126.        title = expression(paste(bold("Break Free from Plastic (BFFP) Brand Audit 2020"), " - Top 10 Global Polluters.")),
  127.        subtitle = str_wrap("Each year thousands of volunteers around the world conduct brand audits on plastic waste. The results are submitted to the BFFP project for analysis. This plot highlights the top 10 Global Polluters from 2020, and includes their 2019 numbers for comparison. \n The BFFP project primarily ranks companies according to how many countries were found to be polluted by plastic waste from that company (this is shown in the plot on the left below). They also report the total number of plastic waste items recorded in brand audits for each company (this is shown in the plot on the right below).", 130),
  128.        caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19"), "\tData source: ", italic("https://github.com/rfordatascience/tidytuesday/tree/master/data/2021/2021-01-26")))) +
  129.   theme(plot.background = element_rect(fill = cream),
  130.         plot.title = element_text(color = dark_blue, size = 16),
  131.         plot.subtitle = element_text(color = dark_blue),
  132.         plot.caption = element_text(color = dark_blue, hjust = 0),
  133.         panel.grid = element_blank(),
  134.         panel.background = element_rect(fill = cream),
  135.         axis.title = element_text(color = dark_blue),
  136.         axis.text = element_text(color = dark_blue),
  137.         axis.ticks = element_line(color = dark_blue),
  138.         strip.background = element_rect(fill = cream, color = NA),
  139.         strip.text = element_text(color = dark_blue, size = 12, face = "bold"),
  140.         legend.position = "top",
  141.         legend.background = element_rect(fill = cream),
  142.         legend.title = element_text(color = dark_blue),
  143.         legend.text = element_markdown(size = 12, face = "bold", vjust = 0.2))
  144. g1
  145.  
  146. ### Uncomment to save:
  147. # ggsave(filename = "break_free_from_plastic.png",
  148. #        plot = g1,
  149. #        width = 12, height = 6)
  150. ###################################################################################################
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×