Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from enum import unique
- import pandas as pd
- import matplotlib.pyplot as plt
- import numpy as np
- import operator
- import seaborn as sns
- from scipy import stats
- from mpl_toolkits.mplot3d import Axes3D
- from os import path
- from matplotlib.colors import LogNorm
- from collections import Counter
- import sys
- import math
- from sklearn.preprocessing import minmax_scale
- # from matplotlib.colors import LogNorm
- from sklearn import preprocessing
- import argparse
- import joblib
- import os
- headers = {"C-100003", "2018-07-27", "FOD Turbine",
- "C-100005", "2018-06-25", "FOD Compressor",
- "C-100004", "2018-06-07", "FOD Compressor",
- "C-100002", "2018-06-08", "FOD Compressor",
- "C-100001", "2018-07-18", "FOD Compressor"}
- data = pd.read_csv("Multi_P1FWM_turbo_failure_deviation_ano.csv", header=0)
- ##############################################################
- parser = argparse.ArgumentParser()
- parser.add_argument('-a', "--anonymous", action='store_true', default=False, help='Hide axes')
- parser.add_argument('-t', "--truck", type=str, default=None, help='PLot one truck ID')
- parser.add_argument('-f', "--filename", type=str, default="Multi_P1FWM_turbo_failure_deviation_ano.csv", help='CSV filename')
- parser.add_argument('-n', "--normalise", action='store_true', default=False, help='Normalise')
- parser.add_argument("--max_pics", type=int, default=16, help='Max histograms on a plot')
- parser.add_argument("--cols", type=int, default=4, help='Number of histogram columns in plot')
- parser.add_argument("--rows", type=int, default=None, help='Number of rows to read from CSV file')
- parser.add_argument("--top", type=int, default=None, help='Top n')
- parser.add_argument("--start", type=int, default=0, help='Start histogram')
- parser.add_argument("--trucks", type=str, default=None, help='Multiple chassis IDs, comma seperated')
- args = parser.parse_args()
- print("Reading data...", end="", flush=True)
- if not os.path.exists("df_train.pickle"):
- lines = [] # list(range(1, 100000))
- df_train = pd.read_csv(args.filename,
- sep=";", dtype={'VEHICL_ID': str},
- # skiprows=lines,
- nrows=args.rows
- )
- print( "pickling...", end="", flush=True )
- joblib.dump( df_train,"df_train.pickle" )
- df_train.to_pickle("df_train.pickle")
- else:
- print("from pickle...", end="", flush=True)
- df_train = pd.read_pickle("df_train.pickle")
- print("Ready")
- print(df_train.shape)
- print(df_train.head(2))
- print(data.shape)
- c = data[data['VEHICLE_ID'] == 1]
- d = data[data['VEHICLE_ID'] == 2]
- e = data[data['VEHICLE_ID'] == 3]
- f = data[data['VEHICLE_ID'] == 4]
- g = data[data['VEHICLE_ID'] == 5]
- m1 = c.SEND_DATETIME
- m2 = d.SEND_DATETIME
- m3 = e.SEND_DATETIME
- m4 = f.SEND_DATETIME
- m5 = g.SEND_DATETIME
- m1 = pd.to_datetime(m1)
- m2 = pd.to_datetime(m2)
- m3 = pd.to_datetime(m3)
- m4 = pd.to_datetime(m4)
- m5 = pd.to_datetime(m5)
- n=m1.dt.year
- print(m1)
- A=g.sample(n=5)
- B=g.drop(['VEHICLE_ID','SEND_DATETIME','PARAMETER_CODE','T_CHASSIS','Y_INDEX_2_X_INDEX_13_VALUE'] ,axis=1)
- print(B.shape)
- fig, axes = plt.subplots(nrows=1, ncols=g.shape[0])
- fig.set_figheight(20)
- fig.set_figwidth(20)
- num_pics = data.shape[0]
- if num_pics > args.max_pics:
- st = num_pics - args.max_pics
- num_pics = args.max_pics
- print(num_pics)
- lable="SEND_DATETIME"
- cols = args.cols
- rows = int(num_pics/cols)+ 1*((num_pics % cols)!=0)
- sp=1
- pc=0
- fig = plt.figure(figsize=(cols*3, rows*3)) #plt.figure(figsize=(sz,sz))
- plt.subplots_adjust( hspace=0.7, wspace=0.5 )
- for index, row in B.iterrows():
- ax = fig.add_subplot(rows, cols, sp)
- print(type(row))
- xx = row.iloc[:19]
- yy = row.iloc[19:]
- drop_reshape1 = row.values.reshape(20, 20)
- im = drop_reshape1.astype(int)
- im = np.flipud(im) # rot90(im) #flipud(im) #rot90()
- if args.normalise:
- _min = 0
- _max = 1
- im += -(np.min(im))
- im /= np.max(im) / (_max - _min)
- #ax.append( fig.add_subplot(20, 20, index+1) )
- im_masked = np.ma.masked_where(im == 0, im)
- plt.imshow(im_masked, interpolation='none')
- ax.set_aspect('equal')
- ax.get_xaxis().set_ticks([0, 19])
- ax.get_yaxis().set_ticks([])
- #
- ax.set_xticks(np.arange(-.5, 19, 1), minor=True);
- ax.set_yticks(np.arange(-.5, 19, 1), minor=True);
- ax.grid(which='minor', color='w', linestyle='-', linewidth=1)
- #
- if not args.anonymous:
- ax.set_xlabel("engine speed")
- ax.set_ylabel("engine torque")
- plt.colorbar(orientation='vertical', ax=ax, format='%.1f', fraction=0.0408, pad=0.04)
- # plt.clim(0, 10);
- else:
- plt.colorbar(orientation='vertical', ax=ax, ticks=[])
- sp += 1
- if not args.anonymous:
- fig.suptitle('VEHICLE_ID'+ ": " + str('SEND_DATETIME') + ' P1FWM') # Training data '+str(st)+" +"+str(nn) )
- fn = "training_" + str('SEND_DATETIME') + "_" + str(index) + "+" + str(pc)
- if args.normalise:
- fn += "_N"
- fn += ".png"
- print("Saving", fn)
- if os.path.exists(fn):
- os.remove(fn)
- fig.savefig(fn, dpi=288)
- plt.show()
- #ax.set_aspect('equal')
- #ax.get_xaxis().set_ticks([0, 19])
- #ax.get_yaxis().set_ticks([])
- #
- #ax.set_xticks(np.arange(-.5, 19, 1), minor=True);
- #ax.set_yticks(np.arange(-.5, 19, 1), minor=True);
- #ax.grid(which='minor', color='w', linestyle='-', linewidth=1)
- #
- #if not args.anonymous:
- #ax.set_xlabel("engine speed")
- #ax.set_ylabel("engine torque")
- #plt.colorbar(orientation='vertical', ax=ax, format='%.1f', fraction=0.0408, pad=0.04)
- # plt.clim(0, 10);
- #else:
- #plt.colorbar(orientation='vertical', ax=ax, ticks=[])
- #print(drop_reshape1.shape)
- #x = drop_reshape1[:,19]
- #y = drop_reshape1[19,:]
- #fig2 = plt.figure()
- #plt.hist2d(x, y, bins=100)
- #plt.xlabel('x')
- #plt.ylabel('y')
- #cbar = plt.colorbar()
- #cbar.ax.set_ylabel('Counts')
- #heat plots
- #df = pd.DataFrame(drop_reshape1)
- #df.columns = df.columns + 1
- #df.index = df.index + 1
- #f, ax = plt.subplots(figsize=(10, 6))
- #corr = df.corr()
- #hm = sns.heatmap(round(corr, 2), annot=True, ax=ax, cmap="coolwarm", fmt='.2f',linewidths=.05)
- #f.subplots_adjust(top=0.93)
- #t = f.suptitle('Wine Attributes Correlation Heatmap', fontsize=14)
- #img plots
- #histogram plots
- #plt.hist(drop_reshape1)
- #plt.title(m5[index])
- #plt.show()
- #fig.savefig("dataname_"+str(index)+".png")
- #drop = m.drop(['VEHICLE_ID','SEND_DATETIME','PARAMETER_CODE','T_CHASSIS','Y_INDEX_2_X_INDEX_13_VALUE'] ,axis=1)
- #drop_array=np.array(B)
- #drop_reshape = drop_array.reshape(20,20)
- #plt.title('2015-06-04 19:42:43')
- #plt.hist(drop_reshape)
- #plt.show()
- #a = np.array(m)
- #b = np.array(l)
- #d = np.array(data)
- #sort = sorted(data,key=operator.itemgetter(1))
- #s.plot()
- #plt.savefig('vehicle id ')
- #print(l)
- #print(m)
- #v=data.ix[0:,['SEND_DATETIME','Y_INDEX_1_X_INDEX_1_VALUE']]
- #print (grouped)
- #data = data[data['VEHICLE_ID'] == 1]
- #data = data.set_index(['SEND_DATETIME'])
- #data.plot()
- #plt.show()
- # Assign colors for each airline and the names
- # Make the histogram using a list of lists
- # Normalize the flights and assign colors and names
- #plt.hist([x1, x2, x3, x4, x5], bins=int(180 / 15), normed=True,
- #color=colors, label=names)
- # Plot formatting
- #g= np.reshape(a, (np.product(a.shape),))
- #k= np.reshape(m, (np.product(m.shape),))
- #print(g.shape)
- #plt.ploy(a,)
- #plt.show()
- #BY FAR BEST PLOTS
- # use the function regplot to make a scatterplot
- #sns.regplot(x=data["VEHICLE_ID"], y=data["Y_INDEX_1_X_INDEX_1_VALUE"])
- #plt.show()
- # Without regression fit:
- #sns.regplot(x=data["VEHICLE_ID"], y=data["Y_INDEX_1_X_INDEX_1_VALUE"], fit_reg=False)
- #plt.show()
- #BY FAR BEST PLOTS
- #fig = plt.figure()
- #ax = fig.add_subplot(111, projection='3d')
- #ax.scatter(data['VEHICLE_ID'], data['Y_INDEX_1_X_INDEX_1_VALUE'], data['C-100002'], c='skyblue', s=60)
- #ax.view_init(30, 185)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement