Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Code:
- from random import randint
- import pandas_datareader.data as web
- import pandas as pd
- import datetime
- import itertools as it
- import numpy as np
- import csv
- import matplotlib.pyplot as plt
- df = pd.read_csv('Filename.txt')
- df.columns = ['Date','col1','col2','col3']
- #Date is %Y-%m-%d format, and columns are integers.
- reversed_df = df.iloc[::-1]
- df["Date"] = df["Date"].astype("datetime64[ns]")
- #print(df.dtypes)
- pd.set_option('display.max_columns', None)
- pd.set_option('display.max_rows', None)
- pd.set_option('display.width', None)
- Date = reversed_df.Date
- ColumnOne = reversed_df.col1
- ColumnTwo = reversed_df.col2
- ColumnThree = reversed_df.col3
- df = pd.concat([ColumnOne, ColumnTwo, ColumnThree], axis=1)
- #print(df)
- data = df
- target = ColumnOne
- #Target would be one of the features(Columns), and for each feature I would repeat the code below fitting and plotting the data.
- from sklearn.model_selection import train_test_split
- data_train, data_test, target_train, target_test = train_test_split(data,target, test_size = 0.30, random_state = 1)
- from sklearn.cluster import KMeans
- kmeans_model = KMeans(n_clusters=3)
- #Number of clusters = Number of features(Columns).
- Columns = df
- kmeans_model.fit(Columns)
- labels = kmeans_model.labels_
- from sklearn.decomposition import PCA
- pca_2 = PCA(2)
- plot_columns = pca_2.fit_transform(Columns)
- plt.scatter(x=plot_columns[:,0], y=plot_columns[:,1], c=labels)
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement