Advertisement
Guest User

Untitled

a guest
Nov 21st, 2019
122
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.36 KB | None | 0 0
  1. import pandas
  2.  
  3. df = pandas.read_csv("census_income_dataset.csv")
  4. #print(df.isnull().sum())
  5. df['occupation'].replace({'?' : 'unknown'}, inplace = True)
  6. df['workclass'].replace({'?' : 'unknown'}, inplace = True)
  7. df['native_country'].replace({'?' : 'unknown'}, inplace = True)
  8. #df['occupation'].replace({'?' : 'unknown'}, inplace = True)
  9. import matplotlib.pyplot as plot
  10. import numpy as np
  11.  
  12. df.groupby('sex').size().plot(kind = 'pie', autopct = '%1.1f%%',explode = (0,0.1))
  13.  
  14. plot.title('Gender distribution')
  15. plot.show()
  16.  
  17. figure, ax = plot.subplots()
  18. # alpha makes the bars abit transparent so that you can see the other bars
  19. ax.hist(df['age'], color='red', label='Age frequency of respondents',alpha = 0.50)
  20. ax.set_title('Histogram showing the frequency of ages')
  21. ax.set_xlabel('Ages')
  22. ax.set_ylabel('Frequency')
  23. plot.show()
  24.  
  25.  
  26. df.groupby('occupation').size().plot(kind = 'pie', autopct = '%1.1f%%')
  27.  
  28. plot.title('Occupations')
  29. plot.show()
  30.  
  31. df.groupby('race').size().plot(kind = 'pie', autopct = '%1.1f%%')
  32. plot.show('Race')
  33.  
  34.  
  35. z = df.groupby(['occupation'])['hours_per_week'].mean()
  36. z.plot(kind = 'bar', color = 'purple')
  37. plot.title('Hours worked per week by different occupations')
  38. plot.xlabel('Occupation')
  39. plot.ylabel('Hours per week')
  40. plot.show()
  41.  
  42. df.groupby(['occupation','sex'])['hours_per_week'].mean().unstack().plot(kind = 'bar', stacked = True) #if we put false, it would be unstacked
  43. plot.title('Occupation and sex of respondents by hours worked per week')
  44. plot.xlabel('Occupation')
  45. plot.ylabel('Hours per week')
  46.  
  47. plot.show()
  48.  
  49. #df.groupby('native_country').size().plot(kind = 'pie', autopct = '%1.1f%%')
  50.  
  51. #plot.title('Countries')
  52. #plot.show()
  53.  
  54.  
  55. #df.groupby(['native_country','sex'])['hours_per_week'].mean().unstack().plot(kind = 'bar', stacked = True) #if we put false, it would be unstacked
  56.  
  57. df.groupby(['education','sex'])['hours_per_week'].mean().unstack().plot(kind = 'bar', stacked = True) #if we put false, it would be unstacked
  58. plot.title('Education level of people of different genders by hours per week')
  59. plot.xlabel('Education')
  60. plot.ylabel('hours per week')
  61.  
  62. plot.show()
  63.  
  64. import seaborn
  65. df_x = df[['age','education','marital_status','occupation','sex','hours_per_week','income_level']]
  66. figure,ax = plot.subplots()
  67. seaborn.heatmap(df_x.corr(), cmap = 'Reds', annot = True)
  68. # annot means annotation, c
  69. plot.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement