Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(tidyverse)
- theme_set(theme_bw())
- generate_ice_table = function(csv_file){
- # Creates a matrix in which each row is a line curve, except last two columns.
- # Last two columns are the predictions and the values of the covariate.
- dat = read_csv(csv_file,
- col_names = F)
- n_col = ncol(dat)
- # Finds row with first NA
- ix_start_ice = which(is.na(dat[,2])) + 1
- x_values = as.numeric(dat[1,2:n_col])
- unique_x_values = unique(x_values)
- ice_matrix = as.matrix(dat[ix_start_ice:nrow(dat),2:n_col]) %>%
- apply(., 2, as.numeric) %>%
- t()
- actual_predicted = tibble(x = x_values, y_hat = 0.0)
- for(i in 1:(nrow(actual_predicted))){
- ix = which(abs(unique_x_values - x_values[i]) < 1e-8)
- actual_predicted$y_hat[i] = ice_matrix[i,ix]
- }
- ice_table = cbind(ice_matrix,
- actual_predicted$y_hat,
- actual_predicted$x)
- return(ice_table)
- }
- plot_ice = function(
- csv_file,
- centered = F,
- point_size = 0.7,
- ice_line_size = 0.4,
- pdp_line_size = 1.3,
- ice_line_alpha = 0.5,
- point_alpha = 0.5){
- ice_table = generate_ice_table(csv_file)
- plot_out = plot_ice_aux(
- ice_table,
- centered = centered,
- point_size = point_size,
- ice_line_size = ice_line_size,
- pdp_line_size = pdp_line_size,
- ice_line_alpha = ice_line_alpha,
- point_alpha = point_alpha)
- return(plot_out)
- }
- plot_ice_aux = function(
- ice_table,
- centered = F,
- point_size = 0.7,
- ice_line_size = 0.4,
- pdp_line_size = 1.3,
- ice_line_alpha = 0.5,
- point_alpha = 0.5){
- n_row = nrow(ice_table)
- n_col = ncol(ice_table)
- n_obs = ncol(ice_table) - 2
- if(centered){
- y_ref = ice_table[,1]
- ice_table[,1:n_obs] = ice_table[,1:n_obs] - matrix(rep(y_ref, n_obs), ncol = n_obs, byrow = F)
- ice_table[,n_obs+1] = ice_table[,n_obs+1] - y_ref
- }
- vec = rep(0.0, n_obs*n_row)
- for(i in 1:n_row){
- ix_1 = (i-1)*n_obs + 1
- ix_2 = (i)*n_obs
- vec[ix_1:ix_2] = ice_table[i,1:n_obs]
- }
- unique_x_values = unique(ice_table[,n_col])
- df_ice = tibble(x = rep(unique_x_values, n_row),
- group = rep(1:n_row, each = n_obs),
- y_hat = vec)
- pdp_df = df_ice %>%
- group_by(x) %>%
- summarize(y_hat = mean(y_hat))
- actual_predicted = tibble(
- x = ice_table[,n_obs+2],
- y_hat = ice_table[,n_obs+1]
- )
- plot_out = ggplot() +
- geom_line(data = df_ice,
- aes(x, y_hat, group = group),
- size = ice_line_size,
- alpha = ice_line_alpha,
- color = 'grey') +
- geom_point(data = actual_predicted,
- aes(x, y_hat),
- size = point_size,
- alpha = point_alpha) +
- geom_line(data = pdp_df,
- aes(x, y_hat),
- size = pdp_line_size)
- return(plot_out)
- }
- csv_file = "~/Desktop/ICE_C5_Zone_06_STEP2_Sum_of_Bay_3_Total_Arcs.csv"
- plot_ice("~/Desktop/ICE_C5_Zone_06_STEP2_Sum_of_Bay_3_Total_Arcs.csv")
- plot_ice("~/Desktop/ICE_C5_Zone_06_STEP2_Sum_of_Bay_3_Total_Arcs.csv", centered = T)
- csv_file = "~/Desktop/prueba_ice.csv"
- plot_ice("~/Desktop/prueba_ice.csv")
- plot_ice("~/Desktop/prueba_ice.csv", centered = T)
Add Comment
Please, Sign In to add comment