Untitled

library(tidyverse)
library(ggplot2)

# First graph
percent <- function(x, digits = 0, format = "f", ...) {
  paste0(formatC(100 * x, format = format, digits = digits, ...), "%")
}
job_data <- read_csv("data_01.csv", col_types= cols(Women = col_double(), Men = col_double()))
job_data <- job_data[-c(28), ]
job_data$sorted_descr <- factor(job_data$descr, levels = job_data$descr[order(job_data$Men)])
job_data$custom_color <- ifelse(job_data$Men >= job_data$Women, rgb(076, 101, 112, maxColorValue = 255),
                                ifelse(job_data$Women >= job_data$Men, rgb(177, 89, 83, maxColorValue = 255), "grey"))
job_data$men_perc <- percent(job_data$Men)
job_data$women_perc <- percent(job_data$Women)
job_data$custom_women_loc <- ifelse(job_data$Men >= job_data$Women, 1.3,
                                    ifelse(job_data$Women >= job_data$Men, -0.3, 0))
job_data$custom_men_loc <- ifelse(job_data$Men >= job_data$Women, -0.3,
                                  ifelse(job_data$Women >= job_data$Men, 1.3, 0))


ggplot(job_data) +
  geom_segment(aes(x=sorted_descr, xend=sorted_descr, y=Women, yend=Men), color=job_data$custom_color, size=2, alpha=0.7) +
  geom_point(aes(x=sorted_descr, y=Women), color=rgb(177, 89, 83, maxColorValue = 255), size=2) +
  geom_text(aes(x=sorted_descr, y=Women,label=women_perc), hjust=job_data$custom_women_loc, vjust=0.3, size=3, color=rgb(177, 89, 83, maxColorValue = 255)) +
  geom_point(aes(x=sorted_descr, y=Men), color=rgb(076, 101, 112, maxColorValue = 255), size=2) +
  geom_text(aes(x=sorted_descr, y=Men,label=men_perc), hjust=job_data$custom_men_loc, vjust=0.3, size=3, color=rgb(076, 101, 112, maxColorValue = 255)) +
  coord_flip() +
  theme_light() +
  scale_y_continuous(breaks = seq(0, 1, 0.1)) +
  theme(
    legend.position = "none",
    panel.border = element_blank(),
  ) +
  xlab("") +
  ylab("")


# Second graph
library(tidyverse)

women_rate <- read_csv("data_02.csv")[ ,1:6]
gender_dataset_clean <- women_rate %>%
  gather(key, Value, -year) %>%
  separate(key, c("IndustryName"), "\\.") %>%
  filter(IndustryName != 'Management, administration, banking & insurance, legal professions')
gender_dataset_clean$NumValue <- as.numeric(gender_dataset_clean$Value)
gender_dataset_clean$FactorIndustryName <- as.factor(gender_dataset_clean$IndustryName)
print.data.frame(gender_dataset_clean)

ggplot(gender_dataset_clean) +
  geom_smooth(aes(x=year, y=NumValue, group = FactorIndustryName, col = FactorIndustryName),se=FALSE) +
  geom_point(data = subset(gender_dataset_clean, year == max(year)|year == min(year)),
             aes(x = year, y = NumValue, group = FactorIndustryName, col = FactorIndustryName), shape=18, size = 3)+
  geom_text(data = subset(gender_dataset_clean, year == max(year)),
            aes(x = year, y = NumValue, label = FactorIndustryName, col = FactorIndustryName))+
  theme_bw()+
  theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),panel.border = element_blank())+
  theme(legend.position = "none")+

  scale_y_continuous(breaks = seq(0, 600000, 50000)) +
  scale_x_continuous(breaks = seq(1970, 2016, 5)) +
  scale_color_brewer(palette = "Reds") +
  geom_text(data = subset(gender_dataset_clean, year == max(year)), aes(label = IndustryName, colour = FactorIndustryName, x = Inf, y = NumValue), hjust = -.1) +
  theme(plot.margin = unit(c(1,3,1,1), "lines"))+
  xlab("") +
  ylab("") +
  ggtitle("The rise in the employment rate of women has mostly happened in higher-qualified sectors",
          subtitle = "Female labour participation in Switzerland by job cluster, 1970-2016")


# Third graph
library(readxl)
library(ggplot2)
library(tidyr)

yea = c(2016,2015,2014,2013,2012,2011,2010,2000,1990,1980,1970)
my_data3 = data.frame()
for(i in 4:14){
  dat = read_xlsx('data_03.xlsx', sheet = i, skip = 4)
  dat = dat[440:456,5:8]
  names(dat) = c('Profession', 'Total', 'Men', 'Women')
  dat$Women = as.numeric(dat$Women)
  dat$Men = as.numeric(dat$Men)
  dat$Total = as.numeric(dat$Total)
  dat$Men = round(dat$Men / dat$Total, digits = 4) * 100
  dat$Women = 100 - dat$Men
  dat$Total = 100
  dat$Year = yea[i-3]
  my_data3 <- rbind(my_data3, dat)
}

head(my_data3)
my_data_4 <- my_data3 %>%
  split(my_data3$Profession) %>%
  map_df(~data.frame(men_approx = approx(my_data3$Year, my_data3$Men, n = 80),
                         women_approx = approx(my_data3$Year, my_data3$Women, n = 80),
                          Profession = my_data3$Profession))
head(my_data_4)

ggplot(my_data3, aes(x = Year)) +
  geom_ribbon(aes(ymin = Men, ymax = Women, fill = Men < Women)) +
  geom_line(aes(y = Men), color = "black") + geom_line(aes(y = Women), color = "red") +
  scale_fill_manual(values=alpha(c("black", "red"), 0.4), name="fill") +
  facet_wrap(Profession ~ ., ) +
  theme(legend.position = "none", axis.ticks.length = unit(.25, "cm")) +
  xlab("") +
  ylab("")