Untitled

# split data in train and test
train <- data3 %>% sample_frac(.85)
test  <- data3 %>% anti_join(train, by='id')

# count amount of churn and not churn
n_0 <- train %>% filter(churn==0) %>% count() %>% pull(n)
n_1 <- train %>% filter(churn==1) %>% count() %>% pull(n)

# remove the id and the target variable
x_train <- train %>% select(-id,-churn) %>% as.matrix()

# do the scaling on training data
x_train <- predict(preProcess(x_train,method=c('center','scale')), x_train)

# remove the id and the target variable
x_test <- test %>% select(-id,-churn) %>% as.matrix()

# do the scaling on test data
x_test <- predict(preProcess(x_test,method=c('center','scale')), x_test)

# separate the target variable
y_train <- train$churn
y_test <- test$churn

# create the neural network
model <- keras_model_sequential()
model %>%
  layer_dense(units = 512, activation = 'relu',
    kernel_initializer = keras::initializer_glorot_uniform(7),
    input_shape = ncol(x_train)) %>%
  layer_activity_regularization(l1=0.01, l2 = 0.01) %>%
  layer_dropout(rate = 0.6, seed=7) %>%
  layer_dense(units = 128, activation = 'relu',
    kernel_initializer = keras::initializer_glorot_uniform(7)) %>%
  layer_activity_regularization(l1=0.01, l2 = 0.01) %>%
  layer_dropout(rate = 0.5, seed=7) %>%
  layer_dense(units = 1, activation = 'sigmoid')

# define the loss function and other parameters
model %>%
 compile(loss = 'binary_crossentropy',
  optimizer = optimizer_adam(lr = 0.0001, beta_1 = 0.9,
  beta_2 = 0.999, epsilon = 1e-08, decay = 0.0001),
  metrics = list('accuracy'))

# define the execution parameters
model %>%
 fit(x_train, y_train,
  class_weight = list('0'= 1,'1'= n_0/n_1),
  epochs = 50, batch_size = 32,
  validation_data=list(x_test, y_test))

# save the model
model %>% save_model_hdf5("[path]/[model_name].h5")