Advertisement
Guest User

Untitled

a guest
Mar 25th, 2017
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.19 KB | None | 0 0
  1. from __future__ import print_function
  2.  
  3. import argparse, os
  4. import theano
  5. import theano.tensor as T
  6. import lasagne
  7. import numpy as np
  8. import pandas as pd
  9. from sklearn.model_selection import train_test_split
  10. from sklearn import metrics
  11.  
  12. def getargs():
  13. parser = argparse.ArgumentParser(description="")
  14.  
  15. parser.add_argument('-i', '--indir', default="testdata")
  16. parser.add_argument('-o', '--outprefix', default="out")
  17.  
  18. parser.add_argument('-n', '--hidden', type=int, required=True,
  19. help='Number of hidden nodes')
  20. parser.add_argument('-e', '--epochs', type=int, default=1000,
  21. help='Number of validation epochs (iterations)')
  22. parser.add_argument('-l', '--learn', type=float, default=0.001,
  23. help='Learning rate')
  24.  
  25. parser.add_argument('mode', choices=['train', 'test'])
  26.  
  27. args = parser.parse_args()
  28. return args
  29.  
  30. class BasicMLP:
  31. def __init__(self, X, y):
  32. self.X = X
  33. self.y = y
  34. self.shape = X.shape
  35. self.num_classes = len(y.unique())
  36.  
  37. def network(self, num_units, learning_rate):
  38. # Define layer structure
  39. self.l_in = lasagne.layers.InputLayer(shape=self.shape)
  40. l_hidden = lasagne.layers.DenseLayer(
  41. self.l_in, num_units=num_units,
  42. nonlinearity=lasagne.nonlinearities.sigmoid)
  43. l_output = lasagne.layers.DenseLayer(
  44. l_hidden, num_units=self.num_classes,
  45. nonlinearity=lasagne.nonlinearities.softmax)
  46. self.net_output = lasagne.layers.get_output(l_output)
  47.  
  48. # Define objective
  49. true_output = T.ivector('true_output')
  50. loss = T.mean(lasagne.objectives.categorical_crossentropy(
  51. self.net_output, true_output))
  52.  
  53. # Define update
  54. all_params = lasagne.layers.get_all_params(l_output)
  55. updates = lasagne.updates.adam(loss, all_params,
  56. learning_rate=learning_rate)
  57. self.train = theano.function([self.l_in.input_var, true_output], loss,
  58. updates=updates)
  59.  
  60. def train_network(self, n_epochs):
  61. for n in range(n_epochs):
  62. print(n, self.train(self.X, self.y))
  63.  
  64. def get_output(self, X2, do_argmax=True):
  65. get_output = theano.function([self.l_in.input_var], self.net_output)
  66. y_predicted = get_output(X2)
  67. if do_argmax: y_predicted = np.argmax(y_predicted, axis=1)
  68. return(y_predicted)
  69.  
  70. class ToyData():
  71. """
  72. Load Kaggle Digit Recognizer MNIST data from datadir &
  73. write predictions in the Kaggle submission format
  74. """
  75. def __init__(self, datadir):
  76. self.datadir = datadir
  77.  
  78. def _load_train(self):
  79. df = pd.read_csv(os.path.join(self.datadir, "train.csv"))
  80. X = df.iloc[:,1:]
  81. y = df.iloc[:,0]
  82. return(X, y)
  83.  
  84. def load_train(self):
  85. df_X, df_y = self._load_train()
  86. X, X_test, y, y_test = train_test_split(df_X, df_y, test_size=0.8)
  87. return(X, X_test, y, y_test)
  88.  
  89. def load_test(self):
  90. X, y = self._load_train()
  91. X_new = pd.read_csv(os.path.join(self.datadir, "test.csv"))
  92. return(X, y, X_new)
  93.  
  94. def write_submission(self, y_predicted, out_file):
  95. y_new = pd.DataFrame(y_predicted, columns=['Label'])
  96. y_new.insert(0, 'ImageId', range(1, len(y_new)+1))
  97. y_new.to_csv(out_file, index=False)
  98.  
  99. if __name__ == '__main__':
  100. args = getargs()
  101.  
  102. N_UNITS = args.hidden
  103. N_EPOCHS = args.epochs
  104. LEARNING_RATE = args.learn
  105. OUT_PREFIX = args.outprefix
  106.  
  107. # Prep input
  108. td = ToyData(args.indir)
  109. if(args.mode == 'train'):
  110. X, X_test, y, y_test = td.load_train()
  111. else:
  112. X, y, X_new = td.load_test()
  113.  
  114. bmlp = BasicMLP(X, y)
  115. bmlp.network(N_UNITS, LEARNING_RATE)
  116. bmlp.train_network(N_EPOCHS)
  117.  
  118. # Evaluation
  119. print(metrics.accuracy_score(y, bmlp.get_output(X)))
  120. if(args.mode == 'train'):
  121. print(metrics.accuracy_score(y_test, bmlp.get_output(X_test)))
  122. else:
  123. # Make predictions using trained model
  124. out_file = "{}-node{}-learn{}-epoch{}.csv".format(
  125. OUT_PREFIX, N_UNITS, LEARNING_RATE, N_EPOCHS)
  126. td.write_submission(bmlp.get_output(X_new), out_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement