Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # load dataset
- data <- read.csv("C:/Users/lenag/Desktop/COURSES/Machine Learning/Project 1/Absenteeism_at_work.csv", sep=";", check.names=FALSE)
- # 740 observations & 21 attributes
- attributes <- colnames(data)
- # if we consider that attributes 11 (Hit target) & 12 ( Discipline failure) are irrelevants
- datanew <- data[,-11]
- datanew <- datanew[,-11]
- #Weight and height useless because they are related to body mass index
- datanew <- datanew[,-16]
- datanew <- datanew[,-16]
- # pick up new attributes (4 removed)
- attributesfinal <-colnames(datafinal)
- #if we consider that the row 324 (325 on xls) is an outlier
- datanew <- datanew[-325,]
- #if we want to remove all rows where Absenteeism_Value > 100 of the attribute Absenteeism at work (attribute 21)
- datafinal <- datanew
- k=0
- for (i in 1:739){
- if (datanew[i,17]>100){
- datafinal <- datafinal[-(i-k),]
- k=k+1
- }
- }
- #data final : col 11&12 removed + extra value for Attribute "Absenteeism at work"
- # VIZUALIZATION DATA FINAL
- x = datafinal[,1]
- y = datafinal[,2]
- #....
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement