Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # timestamp low high open close volume
- import pandas as pd
- from sklearn import preprocessing
- from collections import deque
- import numpy as np
- import random
- import time
- import tensorflow as tf
- from tensorflow.keras.models import Sequential
- from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
- from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
- from tensorflow.python.keras.layers import CuDNNLSTM
- from tensorflow.python.keras.models import load_model
- SEQ_LEN = 60
- FUTURE_PERIOD_PREDICT = 3
- RATIO_TO_PREDICT = "LTC-USD"
- EPOCHS = 10
- BATCH_SIZE = 64
- NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"
- #print(df.head())
- def classify(current, future): #if the price is higher in the future than it is right now in our training data
- if float(future) > float(current):
- return 1 #buy
- else:
- return 0 #sell
- # seqs scalling balance BUILDING SEQUENTIAL DATA
- def preprocess_df(df):
- #scalling
- df = df.drop('future', 1)
- for col in df.columns:
- if col!= "target":
- df[col] = df[col].pct_change() # normalizes the data
- df.dropna(inplace=True)
- df[col] = preprocessing.scale(df[col].values) #scale the data between 0-1
- df.dropna(inplace=True)
- sequential_data = []
- prev_days = deque(maxlen=SEQ_LEN)
- for i in df.values:
- prev_days.append([n for n in i[:-1]]) # append each column and not taking a target
- if len(prev_days) == SEQ_LEN:
- sequential_data.append([np.array(prev_days), i[-1]])
- random.shuffle(sequential_data)
- # BALANCING THE DATA
- buys = []
- sells = []
- for seq, target in sequential_data:
- if target == 0:
- sells.append([seq, target])
- elif target == 1:
- buys.append([seq, target])
- random.shuffle(buys)
- random.shuffle(sells)
- lower = min(len(buys), len(sells))
- buys = buys[:lower]
- sells = sells[:lower]
- sequential_data = buys + sells
- random.shuffle(sequential_data)
- X = []
- y = []
- for seq, target in sequential_data:
- X.append(seq)
- y.append(target)
- return np.array(X), y
- # DATA PROCESSING
- main_df = pd.DataFrame()
- ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]
- for ratio in ratios:
- dataset = f"crypto_data/{ratio}.csv"
- df = pd.read_csv(dataset ,names=["time","low", "high", "open", "close", "volume"])
- #print(df.head())
- df.rename(columns={"close": f"{ratio}_close","volume": f"{ratio}_volume"}, inplace=True)
- df.set_index("time", inplace=True)
- df = df[[f"{ratio}_close", f"{ratio}_volume"]] #now this dataframe is "close" and the volume
- #print(df.head())
- # Now merge all of them together
- if len(main_df) == 0:
- main_df = df
- else:
- main_df = main_df.join(df)
- main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
- #print(main_df[[f"{RATIO_TO_PREDICT}_close", "future"]].head(100))
- # Map the classify function to the new column called target
- main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"]))
- #print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head(20))
- times = sorted(main_df.index.values)
- last_5pct = times[-int(0.05 * len(times))]
- #print(last_5pct)
- validation_main_df = main_df[(main_df.index >= last_5pct)] # validation data set
- main_df = main_df[(main_df.index < last_5pct)] # training data set
- # seqs scalling balance
- train_x, train_y = preprocess_df(main_df)
- validation_x, validation_y = preprocess_df(validation_main_df)
- train_x = np.array(train_x)
- train_y = np.array(train_y)
- validation_x = np.array(validation_x)
- validation_y = np.array(validation_y)
- # print(f"train data: {len(train_x)} validation: {len(validation_x)}")
- # print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
- # print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")
- model = Sequential()
- model.add(LSTM(128, input_shape=(train_x.shape[1:]),return_sequences=True))
- model.add(Dropout(0.2))
- model.add(BatchNormalization())
- model.add(LSTM(128, return_sequences=True))
- model.add(Dropout(0.1))
- model.add(BatchNormalization())
- model.add(LSTM(128))
- model.add(Dropout(0.2))
- model.add(BatchNormalization())
- model.add(Dense(32, activation="relu"))
- model.add(Dropout(0.2))
- model.add(Dense(2, activation="softmax"))
- opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
- model.compile(loss='sparse_categorical_crossentropy',
- optimizer=opt,
- metrics=['acc'])
- tensorboard = TensorBoard(log_dir=f'C:\\Users\\31089\\Desktop\\DeepLearning\\CryptoPrediction\\logs\\{NAME}')
- #filepath = "RNN_Final-{epoch:02d}" # unique file name that will include the epoch and the validation acc for that epoch
- #checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones
- filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}" # unique file name that will include the epoch and the validation acc for that epoch
- checkpoint = ModelCheckpoint("models\{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones
- # Train model
- history = model.fit(
- train_x, train_y,
- batch_size=BATCH_SIZE,
- epochs=EPOCHS,
- validation_data=(validation_x, validation_y),
- callbacks=[tensorboard, checkpoint],
- )
- # Score model
- score = model.evaluate(validation_x, validation_y, verbose=0)
- print('Test loss:', score[0])
- print('Test accuracy:', score[1])
- # Save model
- model.save("models\{}".format(NAME))
- def _print_shape(arg):
- try: print(arg.shape)
- except: pass
- def preprocess(df):
- #scalling
- df = df.drop('future', 1)
- _print_shape(df)
- for col in df.columns:
- if col!= "target":
- df[col] = df[col].pct_change() # normalizes the data
- df.dropna(inplace=True)
- df[col] = preprocessing.scale(df[col].values) #scale the data between 0-1
- df.dropna(inplace=True)
- _print_shape(df)
- sequential_data = []
- prev_days = deque(maxlen=SEQ_LEN)
- print("//")
- for i in df.values:
- prev_days.append([n for n in i[:-1]]) # append each column and not taking a target
- if len(prev_days) == SEQ_LEN:
- sequential_data.append([np.array(prev_days), i[-1]])
- _print_shape(sequential_data[-1])
- print("//")
- random.shuffle(sequential_data)
- # BALANCING THE DATA
- buys = []
- sells = []
- for seq, target in sequential_data:
- if target == 0:
- sells.append([seq, target])
- elif target == 1:
- buys.append([seq, target])
- random.shuffle(buys)
- random.shuffle(sells)
- lower = min(len(buys), len(sells))
- buys = buys[:lower]
- sells = sells[:lower]
- _print_shape(np.array(buys))
- _print_shape(np.array(sells))
- sequential_data = buys + sells
- random.shuffle(sequential_data)
- X = []
- y = []
- for seq, target in sequential_data:
- X.append(seq)
- y.append(target)
- _print_shape(np.array(X))
- _print_shape(np.array(y))
- return np.array(X), y
- model = load_model("C://Users//31089//Desktop//DeepLearning//CryptoPrediction//models//60-SEQ-3-PRED-1582881204")
- preprocess(main_df)
- # print(ready_x.shape)
- #predictions = []
- # ready_x = preprocess(main_df)
- # for x in ready_x:
- # print(x.shape)
- # l_p = model.predict_classes(x)
- # predictions.append(l_p[0])
- #use something like np.reshape(x_train, *x_train.shape, 1)
- # plot_prediction(main_df, predictions)
Advertisement
Add Comment
Please, Sign In to add comment