Advertisement
krutmaster

Pycaret: shuffle in data

Oct 5th, 2022
975
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.19 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import matplotlib as mpl
  5. import seaborn as sns
  6. from pycaret.clustering import *
  7. from sklearn.datasets import make_blobs
  8. from sklearn.model_selection import StratifiedKFold
  9. mpl.rcParams['figure.dpi'] = 300
  10.  
  11.  
  12. cols = ['column1', 'column2', 'column3',
  13.         'column4', 'column5']
  14. arr = make_blobs(n_samples=1000, n_features=5, random_state=20,
  15.                  centers=3, cluster_std=1)
  16. skfolds=StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
  17. data = pd.DataFrame(data=arr[0], columns = cols)
  18. print(data.head())
  19.  
  20. #data.hist(bins=30, figsize=(12, 10), grid=False)
  21. #plt.show()
  22.  
  23. #plt.figure(figsize=(10, 8))
  24. #sns.heatmap(data.corr().round(decimals=2), annot=True)
  25. #plt.show()
  26.  
  27. #plot_kws = {'scatter_kws': {'s': 2}, 'line_kws': {'color': 'red'}}
  28. #sns.pairplot(data, kind='reg', vars=data.columns[:-1], plot_kws=plot_kws)
  29. #plt.show()
  30.  
  31. cluster = setup(data, session_id=7652)
  32. model = create_model('kmeans')
  33.  
  34. plot_model(model, 'elbow')
  35.  
  36. model = create_model('kmeans', num_clusters = 3)
  37. plot_model(model, 'cluster')
  38.  
  39. #save_model(model, 'clustering_model')
  40. results = assign_model(model)
  41. print(results.head(10))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement