Advertisement
backlight0815

Untitled

Aug 8th, 2022
495
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.69 KB | None | 0 0
  1. library(tidyr)
  2. library(ggplot2)
  3. library(dplyr)
  4. library(plyr)
  5. library(tidyverse) #
  6.  
  7.  
  8. #import the data
  9. employee <- read.csv(file="c:\\Users\\Asus\\OneDrive\\Desktop\\employee_attrition.csv",header=TRUE,sep=",")
  10.  
  11. #different viewing methods
  12. #for first 6 lines
  13. head(employee)
  14. head(employee,10)
  15.  
  16. #for last 6 lines
  17. tail(employee)
  18. tail(employee,10)
  19.  
  20. str(employee)
  21. summary(employee)
  22. head(employee)
  23. View(employee)#View the dataset in table form
  24.  
  25. names(employee)=c("Employee_ID(PK)","Record_Date","Birthday_Date","Orighire_Date","Termination_Date","Age","Length_of_Service","City","Department","Job_Title","Store_Number","Gender","Gender_Full","Term_desc","Type_term","Status_Year","Status","Business_unit")
  26. names(employee)
  27.  
  28. #Data Cleaning
  29. employee$Gender_Full <- NULL
  30. employee$Store_Number <-NULL
  31. employee$Birthday_Date <-NULL
  32. employee$Type_Term <-NULL
  33. employee$Orighire_Date <- NULL
  34. View(employee)
  35. #how data stores
  36. class(employee)
  37.  
  38. length(employee)#number of column
  39. ncol(employee)
  40. nrow(employee)#number of row
  41. summary(employee)
  42.  
  43. #What is the largest in this company
  44. max(employee$Age) #65
  45. #what is the smallest in this company
  46. min(employee$Age) #19
  47. #Who is the longest length of services in this company?
  48. max(employee$Length_of_Service) #26
  49. #Who is the smallest length of services in this company?
  50. min(employee$Length_of_Service) #0
  51.  
  52. employee[employee$Gender=="Male",] #writing condition with categorical data
  53. male=employee[(employee$Age>60)&(employee$Gender=="M"),]
  54. nrow(male)
  55. View(male)
  56.  
  57. #How many employee is terminated
  58. Number_Terminated=employee[employee$Status=="TERMINATED",]
  59. #How many employee is worked more than 15 years and terminated
  60. Number_Terminated_1=employee[(employee$`Length of Service`>14)&(employee$Status=="TERMINATED"),]
  61.  
  62. nrow(Terminated)
  63. View(Terminated)
  64.  
  65. nrow(Number_Terminated_1)
  66. View(Number_Terminated_1)
  67.  
  68.  
  69. Female=employee[(employee$Age>60)&(employee$Gender=="F"),]
  70. nrow(Female)
  71. View(Female)
  72.  
  73. #Categorize the Length of services
  74. factor(employee$Length_of_Service)#list out the categories
  75. nlevels(factor(employee$Length_of_Service)) #how many level
  76.  
  77.  
  78. View(employee)
  79.  
  80. statusCount<-as.data.frame.matrix(employee %>%
  81.                                     group_by(Status_Year) %>%
  82.                                     select(Status) %>%
  83.                                     table())
  84.  
  85. statusCount$Total<-statusCount$ACTIVE+statusCount$TERMINATED
  86.  
  87. statusCount$PercentTerminate<-statusCount$TERMINATED/(statusCount$Total)*100
  88.  
  89.  
  90. statusCount
  91.  
  92. mean(statusCount$PercentTerminate)
  93. #Analysis part
  94. #just terminates
  95.  
  96. #statusCount<-as.data.frame.matrix(empset %>%
  97.   #                                  group_by(Status) %>%
  98.   #                                 select(STATUS) %>%
  99. #table())
  100. #statusCount$Total<-statusCount$ACTIVE+statusCount$TERMINATED
  101. #statusCount$PercentTerminate<-statusCount$TERMINATED/(statusCount$Total)*100
  102. #statusCount
  103. TerminateData<- employee %>% filter(Status=="TERMINATED")
  104. ggplot(TerminateData)+geom_bar(aes(x=Status_Year, fill=Term_desc))
  105. ggplot(TerminateData)+geom_bar(aes(x=Length_of_Service, fill=Status))
  106. ggplot(TerminateData)+geom_bar(aes(x=as.factor(Department), fill=as.factor(Term_desc)))+
  107.   theme(axis.text.x = element_text(angle=90, hjust=1,vjust=0.5))
  108.  
  109. AgeData<- employee %>% filter(Age <40 & Status=="TERMINATED")
  110. ggplot(AgeData)+geom_bar(aes(x=Age, fill=Term_desc))
  111. ggplot(data=employee,aes(x=Age,y=Length_of_Service)) + geom_line()
  112. #Scatter plot
  113. #To show older will work how long for the services
  114. ggplot(employee, aes(x=Age,y=Length_of_Service, color=Age)) + geom_point() + xlim(10,60)+ ylim(0,25)
  115. library(caret)
  116. featurePlot(x=MYdataset[,6:7],y=MYdataset$STATUS,plot="density",auto.key = list(columns = 2))
  117. View(employee)
  118.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement