Untitled

# timestamp low high open close volume
import pandas as pd
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from tensorflow.python.keras.layers import CuDNNLSTM
from tensorflow.python.keras.models import load_model

SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"
#print(df.head())

def classify(current, future): #if the price is higher in the future than it is right now in our training data
    if float(future) > float(current):
        return 1 #buy
    else:
        return 0 #sell

# seqs scalling balance BUILDING SEQUENTIAL DATA
def preprocess_df(df):
    #scalling
    df = df.drop('future', 1)

    for col in df.columns:
        if col!= "target":
            df[col] = df[col].pct_change() # normalizes the data
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values) #scale the data between 0-1

    df.dropna(inplace=True)

    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)

    for i in df.values:
        prev_days.append([n for n in i[:-1]]) # append each column and not taking a target
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])

    random.shuffle(sequential_data)

    # BALANCING THE DATA
    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])

    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells
    random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)

    return np.array(X), y

# DATA PROCESSING
main_df = pd.DataFrame()

ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]
for ratio in ratios:
    dataset = f"crypto_data/{ratio}.csv"
    df = pd.read_csv(dataset ,names=["time","low", "high", "open", "close", "volume"])
    #print(df.head())
    df.rename(columns={"close": f"{ratio}_close","volume": f"{ratio}_volume"}, inplace=True)
    df.set_index("time", inplace=True)
    df = df[[f"{ratio}_close", f"{ratio}_volume"]] #now this dataframe is "close" and the volume
    #print(df.head())

    # Now merge all of them together
    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)

main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
#print(main_df[[f"{RATIO_TO_PREDICT}_close", "future"]].head(100))

# Map the classify function to the new column called target
main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"]))
#print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head(20))

times = sorted(main_df.index.values)
last_5pct = times[-int(0.05 * len(times))]
#print(last_5pct)

validation_main_df = main_df[(main_df.index >= last_5pct)] # validation data set
main_df = main_df[(main_df.index < last_5pct)] # training data set

# seqs scalling balance
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

train_x = np.array(train_x)
train_y = np.array(train_y)
validation_x = np.array(validation_x)
validation_y = np.array(validation_y)

# print(f"train data: {len(train_x)} validation: {len(validation_x)}")
# print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
# print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',
             optimizer=opt,
             metrics=['acc'])

tensorboard = TensorBoard(log_dir=f'C:\\Users\\31089\\Desktop\\DeepLearning\\CryptoPrediction\\logs\\{NAME}')

#filepath = "RNN_Final-{epoch:02d}"  # unique file name that will include the epoch and the validation acc for that epoch
#checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models\{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Save model
model.save("models\{}".format(NAME))


def _print_shape(arg):
    try:    print(arg.shape)
    except: pass

def preprocess(df):
    #scalling
    df = df.drop('future', 1)
    _print_shape(df)

    for col in df.columns:
        if col!= "target":
            df[col] = df[col].pct_change() # normalizes the data
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values) #scale the data between 0-1

    df.dropna(inplace=True)
    _print_shape(df)

    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)

    print("//")
    for i in df.values:
        prev_days.append([n for n in i[:-1]]) # append each column and not taking a target
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])
        _print_shape(sequential_data[-1])
    print("//")

    random.shuffle(sequential_data)

    # BALANCING THE DATA
    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])

    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]
    _print_shape(np.array(buys))
    _print_shape(np.array(sells))

    sequential_data = buys + sells
    random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    _print_shape(np.array(X))
    _print_shape(np.array(y))
    return np.array(X), y

model = load_model("C://Users//31089//Desktop//DeepLearning//CryptoPrediction//models//60-SEQ-3-PRED-1582881204")


preprocess(main_df)
# print(ready_x.shape)
#predictions = []
    # ready_x = preprocess(main_df)
# for x in ready_x:
#     print(x.shape)
#  l_p = model.predict_classes(x)
#  predictions.append(l_p[0])
#use something like np.reshape(x_train, *x_train.shape, 1)
# plot_prediction(main_df, predictions)