Project 1

# load dataset
data <- read.csv("C:/Users/lenag/Desktop/COURSES/Machine Learning/Project 1/Absenteeism_at_work.csv", sep=";", check.names=FALSE)

# 740 observations & 21 attributes

attributes <- colnames(data)

# if we consider that attributes 11 (Hit target) & 12 ( Discipline failure) are irrelevants
datanew <- data[,-11]
datanew <- datanew[,-11]

#Weight and height useless because they are related to body mass index
datanew <- datanew[,-16]
datanew <- datanew[,-16]

# pick up new attributes (4 removed)
attributesfinal <-colnames(datafinal)

#if we consider that the row 324 (325 on xls) is an outlier
datanew <- datanew[-325,]

#if we want to remove all rows where Absenteeism_Value > 100 of the attribute Absenteeism at work (attribute 21)
datafinal <- datanew
k=0
for (i in 1:739){
  if (datanew[i,17]>100){
    datafinal <- datafinal[-(i-k),]
    k=k+1
  }
}
#data final : col 11&12 removed + extra value for Attribute "Absenteeism at work"

# VIZUALIZATION DATA FINAL

x = datafinal[,1]
y = datafinal[,2]

#....