Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## OFFICIAL NAMES -------------------------------------------------------------------------------------------------
- cleanup_mechs <- function(df_to_clean, report_folder_path, report_start_year = 2014) {
- # Author : A.Chafetz, USAID
- # Purpose : replace mechanism/partner names with what is current/official in FACTSInfo
- # Date : Oct 13, 2017
- # Updated : Oct 20, 2017
- #dependencies
- pacman::p_load(readxl, tidyverse)
- #import official mech and partner names; source: FACTS Info
- df_names <- read_excel(Sys.glob(file.path(report_folder_path,"*Standard COP Matrix Report*.xls")), skip = 1)
- #rename variable stubs
- names(df_names) <- gsub("Prime Partner", "primepartner", names(df_names))
- names(df_names) <- gsub("Mechanism Name", "implementingmechanismname", names(df_names))
- #figure out latest name for IM and partner (should both be from the same year)
- df_names <- df_names %>%
- #rename variables that don't fit pattern
- rename(operatingunit = `Operating Unit`, mechanismid = `Mechanism Identifier`,
- primepartner__0 = primepartner, implementingmechanismname__0 = implementingmechanismname) %>%
- #reshape long
- gather(type, name, -operatingunit, -mechanismid) %>%
- #split out type and year (eg type = primeparnter__1 --> type = primepartner, year = 1)
- separate(type, c("type", "year"), sep="__") %>%
- #add year (assumes first year if report is 2014)
- mutate(year = as.numeric(year) + report_start_year) %>%
- #drop lines/years with missing names
- filter(!is.na(name)) %>%
- #group to figure out latest year with names and keep only latest year's names (one obs per mech)
- group_by(operatingunit, mechanismid, type) %>%
- filter(year==max(year)) %>%
- ungroup() %>%
- #reshape wide so primepartner and implementingmechanismname are two seperate columsn to match fact view dataset
- spread(type, name) %>%
- #convert mechanism id to string for merging back onto main df
- mutate(mechanismid = ifelse(!is.character(mechanismid), as.character(mechanismid), mechanismid)) %>%
- #keep only names with mechid and renaming with _F to identify as from FACTS
- select(mechanismid, implementingmechanismname, primepartner) %>%
- rename(implementingmechanismname_F = implementingmechanismname, primepartner_F = primepartner)
- #match mechanism id type for compatible merge
- df_to_clean <- mutate(df_to_clean, mechanismid = ifelse(!is.character(mechanismid), as.character(mechanismid), mechanismid))
- #merge in official names
- df_to_clean <- left_join(df_to_clean, df_names, by="mechanismid")
- #replace prime partner and mech names with official names
- df_to_clean <- df_to_clean %>%
- mutate(implementingmechanismname = ifelse(is.na(implementingmechanismname_F), implementingmechanismname, implementingmechanismname_F),
- primepartner = ifelse(is.na(primepartner_F), primepartner, primepartner_F)) %>%
- select(-ends_with("_F"))
- }
Add Comment
Please, Sign In to add comment