Untitled

make_model <- function(modelling_data_f){
  train_f <- modelling_data_f[train_idx, ]
  test_f <- modelling_data_f[-train_idx, ]
  # extract feature names
  feature_names_f <- colnames(train_f)[!colnames(train_f) %in% c('reviewid', 'score_to_predict')]
  mymodel_f <- cv.glmnet(y = train_f[,'score_to_predict'],
                         x = train_f[,feature_names_f],
                         family = "gaussian", nfolds = 10, alpha = 1)

  # predict on the test data
  pred_f <- predict(mymodel_f, s="lambda.1se", newx = test_f[,feature_names_f], type="response")
  # and calculate model performance metrics
  error_f <- test_f[,'score_to_predict'] - pred_f
  rmse_f <- sqrt(mean(error_f^2))
  print('RMSE:')
  print(rmse_f)
  mae_f <- mean(abs(error_f))
  print('MAE:')
  print(mae_f)
  return(mymodel_f)
}