Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import h5py
- import matplotlib
- import matplotlib.pyplot as plt
- from keras.models import Model, Sequential
- from keras.layers import Dense, Input, concatenate,Conv1D, Conv2D, MaxPooling2D, Conv2DTranspose,MaxPooling1D, Cropping2D, Multiply, subtract, Flatten, Reshape, Permute, LSTM, TimeDistributed,Dropout,BatchNormalization,UpSampling1D
- from keras.optimizers import SGD
- from keras.callbacks import ModelCheckpoint,EarlyStopping
- import tensorflow as tf
- from keras.models import load_model
- from keras import optimizers
- from keras import regularizers
- from math import sqrt
- from sklearn.metrics import mean_squared_error
- from sklearn.metrics import mean_absolute_error
- import pickle
- from sklearn.preprocessing import StandardScaler
- import time
- from sklearn import preprocessing
- from numpy import argmax
- from keras.utils import to_categorical
- from tabulate import tabulate
- from numpy import array
- #implement the data
- file_540=open("D:\\NCSU Research\\GEARS_Jul9\\GEARS\\540\\x_test_540.pkl","rb")
- data = pickle.load(file_540)
- ##判断是否有缺失值
- #print(data.isnull().sum()) 有且某几个feature超过一半
- ##统计缺失值情况
- def collect_nan_val(dataframe):
- return dataframe.isna().sum() / dataframe.shape[0]*100
- #print(collect_nan_val(data))
- data=data.fillna(method="ffill",inplace=False)
- #print(data)
- data["label"]=pd.cut(data["glucose level"], bins=[0,70,130,10000], labels=[-1,0,1])
- data_without=data.drop(labels=["time","glucose level"],axis=1,inplace=False)
- data_0=data_without
- data_0=data_0.reset_index(drop=True)
- data_1=data_without.drop(data.index[0])
- data_1.columns=["fingerstick_1", "basis_gsr_1", "basis_skintem_1", "acceleration_1", "bolus_1", "basal_1", "meal_1", "basis_sleep_1", "label_1"]
- data_1=data_1.reset_index(drop=True) #after reseting will give a new dataframe
- #print(data_1)
- data_2=data_1.drop(data_1.index[0])
- data_2.columns=["fingerstick_2", "basis_gsr_2", "basis_skintem_2", "acceleration_2", "bolus_2", "basal_2", "meal_2", "basis_sleep_2", "label_2"]
- data_2=data_2.reset_index(drop=True)
- data_3=data_2.drop(data_2.index[0])
- data_3.columns=["fingerstick_3", "basis_gsr_3", "basis_skintem_3", "acceleration_3", "bolus_3", "basal_3", "meal_3", "basis_sleep_3", "label_3"]
- data_3=data_3.reset_index(drop=True)
- data_4=data_3.drop(data_3.index[0])
- data_4.columns=["fingerstick_4", "basis_gsr_4", "basis_skintem_4", "acceleration_4", "bolus_4", "basal_4", "meal_4", "basis_sleep_4", "label_4"]
- data_4=data_4.reset_index(drop=True)
- data_5=data_4.drop(data_4.index[0])
- data_5.columns=["fingerstick_5", "basis_gsr_5", "basis_skintem_5", "acceleration_5", "bolus_5", "basal_5", "meal_5", "basis_sleep_5", "label_5"]
- data_5=data_5.reset_index(drop=True)
- data_6=data_5.drop(data_5.index[0])#though index is always 0, the output for data_6 is began at 6
- data_6.columns=["fingerstick_6", "basis_gsr_6", "basis_skintem_6", "acceleration_6", "bolus_6", "basal_6", "meal_6", "basis_sleep_6", "label_6"]
- data_6=data_6.reset_index(drop=True)
- data_new=pd.concat([data_0,data_1,data_2,data_3,data_4,data_5,data_6],axis=1)
- #print(data_new.dropna())
- data_new=data_new.dropna()#cut the line that include "NAN"
- #print(data_new.dtypes) #label_x is category
- #X_scaled=data_new.apply(lambda x: (x-np.min(x))/(np.max(x)-np.min(x)))
- #get the new label for the new data set
- new_label=data_6["label_6"]
- new_label=new_label.tolist()
- #print(len(new_label))
- #print(type(new_label))
- label_new=new_label[7:]
- #print(len(label_new))
- #change the lbael list into dataframe and combine with the data_new
- from pandas.core.frame import DataFrame
- n={"label_new": label_new}
- data_label=DataFrame(n)
- data_complete=pd.concat([data_new,data_label],axis=1)
- data_complete=data_complete.dropna()#cut the line that include "NAN"
- #print(data_complete)
- #standardize
- #x_columns = [x for x in data_complete.columns if x not in ["label_new"]]
- #X=data_complete[x_columns]
- #y=data_complete["label_new"]
- #print(X)
- X_scale=preprocessing.scale(data_new)
- X_scaled=DataFrame(X_scale)
- #print(X_scaled.mean())
- #print(len(X_scale[1]))
- col_name = [x for x in data_complete.columns if x not in ["label_new"]]
- X_scaled.columns = col_name
- #print(X_scaled["label_6"].mean())
- #X_scaled=data_new.apply(lambda x: (x-np.min(x))/(np.max(x)-np.min(x)))
- data_f=pd.concat([X_scaled,data_label],axis=1).dropna()
- #print(data_f)
- data_f.to_csv("D:\\NCSU Research\\GEARS_Jul9\\GEARS\\540\\data_test.complete.csv", index=0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement