Guest User

Untitled

a guest
Mar 3rd, 2020
183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.81 KB | None | 0 0
  1. # timestamp low high open close volume
  2. import pandas as pd
  3. from sklearn import preprocessing
  4. from collections import deque
  5. import numpy as np
  6. import random
  7. import time
  8. import tensorflow as tf
  9. from tensorflow.keras.models import Sequential
  10. from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
  11. from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
  12. from tensorflow.python.keras.layers import CuDNNLSTM
  13. from tensorflow.python.keras.models import load_model
  14.  
  15. SEQ_LEN = 60
  16. FUTURE_PERIOD_PREDICT = 3
  17. RATIO_TO_PREDICT = "LTC-USD"
  18. EPOCHS = 10
  19. BATCH_SIZE = 64
  20. NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"
  21. #print(df.head())
  22.  
  23. def classify(current, future): #if the price is higher in the future than it is right now in our training data
  24.     if float(future) > float(current):
  25.         return 1 #buy
  26.     else:
  27.         return 0 #sell
  28.    
  29. # seqs scalling balance BUILDING SEQUENTIAL DATA
  30. def preprocess_df(df):
  31.     #scalling
  32.     df = df.drop('future', 1)
  33.    
  34.     for col in df.columns:
  35.         if col!= "target":
  36.             df[col] = df[col].pct_change() # normalizes the data
  37.             df.dropna(inplace=True)
  38.             df[col] = preprocessing.scale(df[col].values) #scale the data between 0-1
  39.            
  40.     df.dropna(inplace=True)
  41.    
  42.     sequential_data = []
  43.     prev_days = deque(maxlen=SEQ_LEN)
  44.    
  45.     for i in df.values:
  46.         prev_days.append([n for n in i[:-1]]) # append each column and not taking a target
  47.         if len(prev_days) == SEQ_LEN:
  48.             sequential_data.append([np.array(prev_days), i[-1]])
  49.    
  50.     random.shuffle(sequential_data)
  51.    
  52.     # BALANCING THE DATA
  53.     buys = []
  54.     sells = []
  55.    
  56.     for seq, target in sequential_data:
  57.         if target == 0:
  58.             sells.append([seq, target])
  59.         elif target == 1:
  60.             buys.append([seq, target])
  61.    
  62.     random.shuffle(buys)
  63.     random.shuffle(sells)
  64.    
  65.     lower = min(len(buys), len(sells))
  66.    
  67.     buys = buys[:lower]
  68.     sells = sells[:lower]
  69.    
  70.     sequential_data = buys + sells
  71.     random.shuffle(sequential_data)
  72.    
  73.     X = []
  74.     y = []
  75.    
  76.     for seq, target in sequential_data:
  77.         X.append(seq)
  78.         y.append(target)
  79.        
  80.     return np.array(X), y
  81.            
  82. # DATA PROCESSING
  83. main_df = pd.DataFrame()
  84.  
  85. ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]
  86. for ratio in ratios:
  87.     dataset = f"crypto_data/{ratio}.csv"
  88.     df = pd.read_csv(dataset ,names=["time","low", "high", "open", "close", "volume"])
  89.     #print(df.head())
  90.     df.rename(columns={"close": f"{ratio}_close","volume": f"{ratio}_volume"}, inplace=True)
  91.     df.set_index("time", inplace=True)
  92.     df = df[[f"{ratio}_close", f"{ratio}_volume"]] #now this dataframe is "close" and the volume
  93.     #print(df.head())
  94.    
  95.     # Now merge all of them together
  96.     if len(main_df) == 0:
  97.         main_df = df
  98.     else:
  99.         main_df = main_df.join(df)
  100.        
  101. main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
  102. #print(main_df[[f"{RATIO_TO_PREDICT}_close", "future"]].head(100))
  103.  
  104. # Map the classify function to the new column called target
  105. main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"]))
  106. #print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head(20))
  107.  
  108. times = sorted(main_df.index.values)
  109. last_5pct = times[-int(0.05 * len(times))]
  110. #print(last_5pct)
  111.  
  112. validation_main_df = main_df[(main_df.index >= last_5pct)] # validation data set
  113. main_df = main_df[(main_df.index < last_5pct)] # training data set
  114.  
  115. # seqs scalling balance
  116. train_x, train_y = preprocess_df(main_df)
  117. validation_x, validation_y = preprocess_df(validation_main_df)
  118.  
  119. train_x = np.array(train_x)
  120. train_y = np.array(train_y)
  121. validation_x = np.array(validation_x)
  122. validation_y = np.array(validation_y)
  123.  
  124. # print(f"train data: {len(train_x)} validation: {len(validation_x)}")
  125. # print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
  126. # print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")
  127.  
  128. model = Sequential()
  129. model.add(LSTM(128, input_shape=(train_x.shape[1:]),return_sequences=True))
  130. model.add(Dropout(0.2))
  131. model.add(BatchNormalization())
  132.  
  133. model.add(LSTM(128, return_sequences=True))
  134. model.add(Dropout(0.1))
  135. model.add(BatchNormalization())
  136.  
  137. model.add(LSTM(128))
  138. model.add(Dropout(0.2))
  139. model.add(BatchNormalization())
  140.          
  141. model.add(Dense(32, activation="relu"))
  142. model.add(Dropout(0.2))
  143.          
  144. model.add(Dense(2, activation="softmax"))
  145.  
  146. opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
  147.          
  148. model.compile(loss='sparse_categorical_crossentropy',
  149.              optimizer=opt,
  150.              metrics=['acc'])
  151.  
  152. tensorboard = TensorBoard(log_dir=f'C:\\Users\\31089\\Desktop\\DeepLearning\\CryptoPrediction\\logs\\{NAME}')
  153.  
  154. #filepath = "RNN_Final-{epoch:02d}"  # unique file name that will include the epoch and the validation acc for that epoch
  155. #checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones
  156.  
  157. filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
  158. checkpoint = ModelCheckpoint("models\{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones
  159.  
  160. # Train model
  161. history = model.fit(
  162.     train_x, train_y,
  163.     batch_size=BATCH_SIZE,
  164.     epochs=EPOCHS,
  165.     validation_data=(validation_x, validation_y),
  166.     callbacks=[tensorboard, checkpoint],
  167. )
  168.  
  169. # Score model
  170. score = model.evaluate(validation_x, validation_y, verbose=0)
  171. print('Test loss:', score[0])
  172. print('Test accuracy:', score[1])
  173.  
  174. # Save model
  175. model.save("models\{}".format(NAME))
  176.  
  177.  
  178. def _print_shape(arg):
  179.     try:    print(arg.shape)
  180.     except: pass
  181.        
  182. def preprocess(df):
  183.     #scalling
  184.     df = df.drop('future', 1)
  185.     _print_shape(df)
  186.    
  187.     for col in df.columns:
  188.         if col!= "target":
  189.             df[col] = df[col].pct_change() # normalizes the data
  190.             df.dropna(inplace=True)
  191.             df[col] = preprocessing.scale(df[col].values) #scale the data between 0-1
  192.  
  193.     df.dropna(inplace=True)
  194.     _print_shape(df)
  195.  
  196.     sequential_data = []
  197.     prev_days = deque(maxlen=SEQ_LEN)
  198.  
  199.     print("//")
  200.     for i in df.values:
  201.         prev_days.append([n for n in i[:-1]]) # append each column and not taking a target
  202.         if len(prev_days) == SEQ_LEN:
  203.             sequential_data.append([np.array(prev_days), i[-1]])
  204.         _print_shape(sequential_data[-1])
  205.     print("//")
  206.  
  207.     random.shuffle(sequential_data)
  208.  
  209.     # BALANCING THE DATA
  210.     buys = []
  211.     sells = []
  212.  
  213.     for seq, target in sequential_data:
  214.         if target == 0:
  215.             sells.append([seq, target])
  216.         elif target == 1:
  217.             buys.append([seq, target])
  218.    
  219.     random.shuffle(buys)
  220.     random.shuffle(sells)
  221.  
  222.     lower = min(len(buys), len(sells))
  223.  
  224.     buys = buys[:lower]
  225.     sells = sells[:lower]
  226.     _print_shape(np.array(buys))
  227.     _print_shape(np.array(sells))
  228.  
  229.     sequential_data = buys + sells
  230.     random.shuffle(sequential_data)
  231.  
  232.     X = []
  233.     y = []
  234.  
  235.     for seq, target in sequential_data:
  236.         X.append(seq)
  237.         y.append(target)
  238.     _print_shape(np.array(X))
  239.     _print_shape(np.array(y))
  240.     return np.array(X), y
  241.  
  242. model = load_model("C://Users//31089//Desktop//DeepLearning//CryptoPrediction//models//60-SEQ-3-PRED-1582881204")
  243.  
  244.  
  245. preprocess(main_df)
  246. # print(ready_x.shape)
  247. #predictions = []
  248.     # ready_x = preprocess(main_df)
  249. # for x in ready_x:
  250. #     print(x.shape)
  251. #  l_p = model.predict_classes(x)
  252. #  predictions.append(l_p[0])
  253. #use something like np.reshape(x_train, *x_train.shape, 1)
  254. # plot_prediction(main_df, predictions)
Advertisement
Add Comment
Please, Sign In to add comment