Advertisement
Guest User

Untitled

a guest
Jun 17th, 2019
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.40 KB | None | 0 0
  1. Code:
  2.  
  3. from random import randint
  4. import pandas_datareader.data as web
  5. import pandas as pd
  6. import datetime
  7. import itertools as it
  8. import numpy as np
  9. import csv
  10. import matplotlib.pyplot as plt
  11.  
  12. df = pd.read_csv('Filename.txt')
  13. df.columns = ['Date','col1','col2','col3']
  14. #Date is %Y-%m-%d format, and columns are integers.
  15.  
  16. reversed_df = df.iloc[::-1]
  17. df["Date"] = df["Date"].astype("datetime64[ns]")
  18. #print(df.dtypes)
  19.  
  20. pd.set_option('display.max_columns', None)
  21. pd.set_option('display.max_rows', None)
  22. pd.set_option('display.width', None)
  23. Date = reversed_df.Date
  24. ColumnOne = reversed_df.col1
  25. ColumnTwo = reversed_df.col2
  26. ColumnThree = reversed_df.col3
  27. df = pd.concat([ColumnOne, ColumnTwo, ColumnThree], axis=1)
  28. #print(df)
  29. data = df
  30. target = ColumnOne
  31. #Target would be one of the features(Columns), and for each feature I would repeat the code below fitting and plotting the data.
  32.  
  33. from sklearn.model_selection import train_test_split
  34. data_train, data_test, target_train, target_test = train_test_split(data,target, test_size = 0.30, random_state = 1)
  35.  
  36. from sklearn.cluster import KMeans
  37.  
  38. kmeans_model = KMeans(n_clusters=3)
  39. #Number of clusters = Number of features(Columns).
  40. Columns = df
  41. kmeans_model.fit(Columns)
  42. labels = kmeans_model.labels_
  43.  
  44. from sklearn.decomposition import PCA
  45.  
  46. pca_2 = PCA(2)
  47. plot_columns = pca_2.fit_transform(Columns)
  48. plt.scatter(x=plot_columns[:,0], y=plot_columns[:,1], c=labels)
  49. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement