Advertisement
Guest User

Project 1

a guest
Sep 19th, 2018
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.04 KB | None | 0 0
  1. # load dataset
  2. data <- read.csv("C:/Users/lenag/Desktop/COURSES/Machine Learning/Project 1/Absenteeism_at_work.csv", sep=";", check.names=FALSE)
  3.  
  4. # 740 observations & 21 attributes
  5.  
  6. attributes <- colnames(data)
  7.  
  8. # if we consider that attributes 11 (Hit target) & 12 ( Discipline failure) are irrelevants  
  9. datanew <- data[,-11]
  10. datanew <- datanew[,-11]
  11.  
  12. #Weight and height useless because they are related to body mass index
  13. datanew <- datanew[,-16]
  14. datanew <- datanew[,-16]
  15.  
  16. # pick up new attributes (4 removed)
  17. attributesfinal <-colnames(datafinal)
  18.  
  19. #if we consider that the row 324 (325 on xls) is an outlier
  20. datanew <- datanew[-325,]
  21.  
  22. #if we want to remove all rows where Absenteeism_Value > 100 of the attribute Absenteeism at work (attribute 21)
  23. datafinal <- datanew
  24. k=0
  25. for (i in 1:739){
  26.   if (datanew[i,17]>100){
  27.     datafinal <- datafinal[-(i-k),]
  28.     k=k+1
  29.   }
  30. }
  31. #data final : col 11&12 removed + extra value for Attribute "Absenteeism at work"
  32.  
  33. # VIZUALIZATION DATA FINAL
  34.  
  35. x = datafinal[,1]
  36. y = datafinal[,2]
  37.  
  38. #....
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement