Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- %matplotlib inline
- human_resources = sns.load_dataset("humanresources")
- print(human_resources.head(n=4))
- #Creating a PointPlot
- sns.set(style="whitegrid")
- g = sns.factorplot(x="time_spend_company", y="satisfaction_level", hue="salary", data=human_resources,
- size=5, kind="point", palette="pastel", ci=95, dodge=True, join=False)
- g.despine(left=True)
- g.set_ylabels("Satisfaction Level")
- g.set_xlabels("")
- plt.title('Pointplot: Years in the Company against Satisfaction Level')
- plt.show()
- #Employees who have worked between 3-6 years tend to have the lowest satisfaction levels,
- #vs those who have been there for <3 years, or more than 6 years.
- #Data also indicates that in general, those in the high income salary do not tend to have higher satisfaction levels.
- #Contrary to popular belief that money is a primary factor affecting employee job satisfaction!
- sns.set(style="darkgrid")
- cp = sns.factorplot(x="promotion_last_5years", y="satisfaction_level", hue="salary", data=human_resources,
- size=5, kind="bar", palette="pastel", ci=95)
- cp.despine(left=True)
- cp.set_ylabels("Job Satisfaction")
- cp.set_xlabels("Promotion in the last 5 years")
- plt.title('Barplot: Career progression against Job Satisfaction')
- plt.show()
- #Generally, employees who got promoted in the last 5 years tend to indicate higher job satisfaction.
- #This change was however minimal for those in the high salary bracket.
- #Further indicating that money is not the primary factor affecting job satisfaction!
- sns.set(style="darkgrid")
- cp = sns.factorplot(x="salary", y="average_monthly_hours", hue="left", data=human_resources,
- size=5, kind="bar", palette="pastel", ci=95)
- cp.despine(left=True)
- cp.set_ylabels("Average Monthly Hours at Work Place")
- cp.set_xlabels("Salary Brackets")
- plt.title('Barplot: Time spent at work place against Salary')
- plt.show()
- #Generally people who left in the low and medium income brackets tend to have worked longer hours.
- #Except for those in the high salary brackets (once again!). In fact these group of people actually worked shorter hours.
- # Showing the information in a seaborn facet grid.
- sns.set(style="ticks") #Setting the overall aesthetic
- # Tell seaborn about the structure of our data.
- g = sns.FacetGrid(human_resources, row="department", col="salary")
- # Name the plot type and the variable to be plotted using the structure.
- g.map(plt.hist, "satisfaction_level", color="steelblue", lw=0)
- # Moving the plots apart to make room for our titles.
- plt.subplots_adjust(top=0.9)
- # Making a more informative axis name.
- g.set_axis_labels('Satisfaction Levels')
- plt.suptitle('Satisfaction Levels by Department and Salary')
- # Removing excess lines around the plot.
- sns.despine(trim=True)
- plt.show()
- #FacetGrid shows the satisfaction breakdown by department, and salary.
- #Sales had the largest number of employees, followed by technical and support staff
- #Data shows employees were moderate to adequately satisfied.
- fig = plt.figure(figsize=(6,4))
- ax=sns.kdeplot(human_resources.loc[(human_resources['left'] == 0),'average_monthly_hours'] , color='b',shade=True, label='Stayed')
- ax=sns.kdeplot(human_resources.loc[(human_resources['left'] == 1),'average_monthly_hours'] , color='r',shade=True, label='Left')
- plt.title('Average monthly hours worked')
- #People who left either worked little hours, or worked more than 250 hours.
- #Employees who stayed worked an average of 150-250 hours.
Add Comment
Please, Sign In to add comment