Guest User

Untitled

a guest
Dec 3rd, 2025
42
0
179 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.52 KB | Software | 0 0
  1. # We will now proceed to remove Age and Gender from the dataset. This is to
  2. # simplify the model and to reduce noise.
  3. train_data = train_data %>% dplyr::select(-Age, -Gender)
  4. test_data  = test_data  %>% dplyr::select(-Age, -Gender)
  5.  
  6. # We remove the GradeClass target variable so that it will not leak out into
  7. # the features later on
  8. train_scaled = train_data %>% dplyr::select(-GradeClass)
  9. test_scaled  = test_data  %>% dplyr::select(-GradeClass)
  10.  
  11. num_features = c("StudyTimeWeekly", "Absences")
  12.  
  13. # For knn, it is a distance based algorithm. It is best to scale the numerical
  14. # features down so that it will not be dominated by the larger values
  15. sc = scale(train_scaled[, num_features])
  16. train_scaled[, num_features] = sc
  17. test_scaled[, num_features]  = scale(
  18.   test_scaled[, num_features],
  19.   center = attr(sc, "scaled:center"),
  20.   scale  = attr(sc, "scaled:scale")
  21. )
  22.  
  23. # Use one-hot encoding for the categorical features
  24. dummies = dummyVars(~ ., data = train_scaled)
  25. train_knn = predict(dummies, train_scaled)
  26. test_knn  = predict(dummies, test_scaled)
  27. summary(df$StudyTimeWeekly)
  28. # Base knn Absences# Base knn model with k = 1
  29. set.seed(100)
  30. knn_pred = knn(train_knn, test_knn, train_data$GradeClass, k=1)
  31.  
  32. # Confusion matrix
  33. # Accuracy of 59.45%. The average F1 score is also 0.444. Both are worse off
  34. # compared to the logistic regression models.
  35. CM1 = confusionMatrix(knn_pred, test_data$GradeClass)
  36. CM1
  37. F1_per_class = CM1$byClass[,"F1"]
  38. F1_per_class[is.na(F1_per_class)] = 0
  39. F1_per_class
  40. mean(F1_per_class)
  41. # 0.4437909
Advertisement
Add Comment
Please, Sign In to add comment