Advertisement
Guest User

Untitled

a guest
Nov 21st, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.24 KB | None | 0 0
  1. import pandas
  2. df=pandas.read_csv('studentsperformance.csv')
  3. print(df)
  4.  
  5. import sklearn
  6.  
  7. empties=df.isnull().sum()
  8. print(empties)
  9.  
  10. df['race_ethnicity'].replace({'group A':0,'group B':1, 'group C':2,'group D':3,'group E':4}, inplace=True)
  11. df['gender'].replace({'male':0, 'female':1},inplace=True)
  12. df['lunch'].replace({'standard':0, 'free/reduced':1},inplace=True)
  13. df['test_preparation_course'].replace({'none':0, 'completed':1},inplace=True)
  14. df['parental_level_of_education'].replace({'bachelors_degree':0,'some_college':1,'masters_degree':2,'associates_degree':3,
  15. 'high_school':4,'some_high_school':5},inplace=True)
  16. print(df)
  17.  
  18. subset=df[['math_score','reading_score','writing_score']]
  19. array= subset.values
  20. X= array[:,0:3]
  21.  
  22. from sklearn.cluster import KMeans
  23. model = KMeans(n_clusters=3, random_state=42)
  24. model.fit_predict(X)
  25. print('Learning// Done')
  26.  
  27. centronoids = model.cluster_centers_
  28. dataframe = pandas.DataFrame(centronoids,columns=['math_score','reading_score','writing_score'])
  29. print(dataframe)
  30.  
  31. subset['label']=model.labels_
  32. subset = subset[subset['label']== 1]
  33. print(subset)
  34.  
  35. import xlwt
  36. subset.to_excel('cluster1.xls', columns=['math_score','reading_score','writing_score'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement