Guest User

Untitled

a guest
Oct 20th, 2017
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.05 KB | None | 0 0
  1. ## OFFICIAL NAMES -------------------------------------------------------------------------------------------------
  2.  
  3. cleanup_mechs <- function(df_to_clean, report_folder_path, report_start_year = 2014) {
  4.  
  5. # Author : A.Chafetz, USAID
  6. # Purpose : replace mechanism/partner names with what is current/official in FACTSInfo
  7. # Date : Oct 13, 2017
  8. # Updated : Oct 20, 2017
  9.  
  10. #dependencies
  11. pacman::p_load(readxl, tidyverse)
  12.  
  13. #import official mech and partner names; source: FACTS Info
  14. df_names <- read_excel(Sys.glob(file.path(report_folder_path,"*Standard COP Matrix Report*.xls")), skip = 1)
  15.  
  16. #rename variable stubs
  17. names(df_names) <- gsub("Prime Partner", "primepartner", names(df_names))
  18. names(df_names) <- gsub("Mechanism Name", "implementingmechanismname", names(df_names))
  19.  
  20. #figure out latest name for IM and partner (should both be from the same year)
  21. df_names <- df_names %>%
  22.  
  23. #rename variables that don't fit pattern
  24. rename(operatingunit = `Operating Unit`, mechanismid = `Mechanism Identifier`,
  25. primepartner__0 = primepartner, implementingmechanismname__0 = implementingmechanismname) %>%
  26. #reshape long
  27. gather(type, name, -operatingunit, -mechanismid) %>%
  28.  
  29. #split out type and year (eg type = primeparnter__1 --> type = primepartner, year = 1)
  30. separate(type, c("type", "year"), sep="__") %>%
  31.  
  32. #add year (assumes first year if report is 2014)
  33. mutate(year = as.numeric(year) + report_start_year) %>%
  34.  
  35. #drop lines/years with missing names
  36. filter(!is.na(name)) %>%
  37.  
  38. #group to figure out latest year with names and keep only latest year's names (one obs per mech)
  39. group_by(operatingunit, mechanismid, type) %>%
  40. filter(year==max(year)) %>%
  41. ungroup() %>%
  42.  
  43. #reshape wide so primepartner and implementingmechanismname are two seperate columsn to match fact view dataset
  44. spread(type, name) %>%
  45.  
  46. #convert mechanism id to string for merging back onto main df
  47. mutate(mechanismid = ifelse(!is.character(mechanismid), as.character(mechanismid), mechanismid)) %>%
  48.  
  49. #keep only names with mechid and renaming with _F to identify as from FACTS
  50. select(mechanismid, implementingmechanismname, primepartner) %>%
  51. rename(implementingmechanismname_F = implementingmechanismname, primepartner_F = primepartner)
  52.  
  53. #match mechanism id type for compatible merge
  54. df_to_clean <- mutate(df_to_clean, mechanismid = ifelse(!is.character(mechanismid), as.character(mechanismid), mechanismid))
  55.  
  56. #merge in official names
  57. df_to_clean <- left_join(df_to_clean, df_names, by="mechanismid")
  58.  
  59. #replace prime partner and mech names with official names
  60. df_to_clean <- df_to_clean %>%
  61. mutate(implementingmechanismname = ifelse(is.na(implementingmechanismname_F), implementingmechanismname, implementingmechanismname_F),
  62. primepartner = ifelse(is.na(primepartner_F), primepartner, primepartner_F)) %>%
  63. select(-ends_with("_F"))
  64. }
Add Comment
Please, Sign In to add comment