Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas
- df=pandas.read_csv('studentsperformance.csv')
- print(df)
- import sklearn
- empties=df.isnull().sum()
- print(empties)
- df['race_ethnicity'].replace({'group A':0,'group B':1, 'group C':2,'group D':3,'group E':4}, inplace=True)
- df['gender'].replace({'male':0, 'female':1},inplace=True)
- df['lunch'].replace({'standard':0, 'free/reduced':1},inplace=True)
- df['test_preparation_course'].replace({'none':0, 'completed':1},inplace=True)
- df['parental_level_of_education'].replace({'bachelors_degree':0,'some_college':1,'masters_degree':2,'associates_degree':3,
- 'high_school':4,'some_high_school':5},inplace=True)
- print(df)
- subset=df[['math_score','reading_score','writing_score']]
- array= subset.values
- X= array[:,0:3]
- from sklearn.cluster import KMeans
- model = KMeans(n_clusters=3, random_state=42)
- model.fit_predict(X)
- print('Learning// Done')
- centronoids = model.cluster_centers_
- dataframe = pandas.DataFrame(centronoids,columns=['math_score','reading_score','writing_score'])
- print(dataframe)
- subset['label']=model.labels_
- subset = subset[subset['label']== 1]
- print(subset)
- import xlwt
- subset.to_excel('cluster1.xls', columns=['math_score','reading_score','writing_score'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement