Advertisement
redmage123

Untitled

Oct 4th, 2017
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.50 KB | None | 0 0
  1. #!/usr/bin/python3
  2.  
  3. ''' In this example, we're going to use linear regression in tensorflow to predict housing prices based
  4. on the size of the lot as our features.
  5. '''
  6. import pandas as pd
  7. import matplotlib.pyplot as plt
  8. from matplotlib.mlab import PCA
  9. from mpl_toolkits.mplot3d import Axes3D
  10. import numpy as np
  11. import tensorflow as tf
  12. import sys
  13. from sklearn import model_selection
  14. from sklearn import preprocessing
  15.  
  16. np.set_printoptions(precision=3,suppress=True)
  17.  
  18. def pca(dataset):
  19.  
  20. plt.scatter(dataset[:,0],dataset[:,1])
  21. plt.plot()
  22. plt.show()
  23. results = PCA(dataset)
  24.  
  25. x = []
  26. y = []
  27.  
  28. for item in results.Y:
  29. x.append(item[0])
  30. y.append(item[1])
  31.  
  32. plt.close('all')
  33. fig1 = plt.figure()
  34. pltData = [x,y]
  35. plt.scatter(pltData[0],pltData[1],c='b')
  36. xAxisLine = ((min(pltData[0]),max(pltData[0])),(0,0),(0,0))
  37. yAxisLine = ((min(pltData[1]),max(pltData[1])),(0,0),(0,0))
  38. plt.xlabel('RM')
  39. plt.ylabel('MEDV')
  40. plt.show()
  41.  
  42.  
  43. rng = np.random
  44.  
  45. # learning_rate is the alpha value that we pass to the gradient descent algorithm.
  46. learning_rate = 0.1
  47.  
  48.  
  49. # How many cycles we're going to run to try and get our optimum fit.
  50. training_epochs = 1000
  51. display_step = 50
  52.  
  53. # We're going to pull in a the csv file and extract the X value (RM) and Y value (MEDV)
  54. boston_dataset = pd.read_csv('data/housing.csv')
  55. label = boston_dataset['MEDV']
  56. features = boston_dataset['RM'].reshape(-1,1)
  57. dataset = np.asarray(boston_dataset['RM'])
  58. dataset = np.column_stack((np.asarray(boston_dataset['RM']),np.asarray(boston_dataset['MEDV'])))
  59. pca(dataset)
  60.  
  61.  
  62. train_X, test_X, train_Y, test_Y = model_selection.train_test_split(features, label, test_size = 0.33, random_state = 5)
  63.  
  64.  
  65. scaler = preprocessing.StandardScaler()
  66. train_X = scaler.fit_transform(train_X)
  67. # This is the total number of data samples that we're going to run through.
  68. n_samples = train_X.shape[0]
  69.  
  70. # Variable placeholders.
  71. X = tf.placeholder('float')
  72. Y = tf.placeholder('float')
  73.  
  74. W = tf.Variable(rng.randn(), name = 'weight')
  75. b = tf.Variable(rng.randn(), name = 'bias')
  76.  
  77. # Here we describe our training model. It's a linear regression model using the standard y = mx + b
  78. # point slope formula. We calculate the cost by using least mean squares.
  79.  
  80. # This is our prediction algorithm: y = mx + b
  81. prediction = tf.add(tf.multiply(X,W),b)
  82.  
  83. # Let's now calculate the cost of the prediction algorithm using least mean squares
  84. training_cost = tf.reduce_sum(tf.pow(prediction-Y,2))/(2 * n_samples)
  85.  
  86. # This is our gradient descent optimizer algorithm. We're passing in alpha, our learning rate
  87. # and we want the minimum value of the training cost.
  88. optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(training_cost)
  89.  
  90. init = tf.global_variables_initializer()
  91.  
  92. # Now we'll run our training data through our model.
  93. with tf.Session() as tf_session:
  94.  
  95. # Initialize all of our tensorflow variables.
  96. tf_session.run(init)
  97.  
  98. # We'll run the data through for 1000 times (The value of training_epochs).
  99. for epoch in range(training_epochs):
  100.  
  101. # For each training cycle, pass in the x and y values to our optimizer algorithm to calculate the cost.
  102. for (x,y) in zip(train_X,train_Y):
  103. tf_session.run(optimizer,feed_dict = {X: x, Y: y})
  104.  
  105. # For every fifty cycles, let's check and see how we're doing.
  106. if (epoch + 1 ) % 50 == 0:
  107. c = tf_session.run(training_cost,feed_dict = {X: train_X, Y: train_Y})
  108. print ('Epoch: ', '%04d' % (epoch+1),'cost=','{:.9f}'.format(c), \
  109. 'W = ',tf_session.run(W), 'b = ',tf_session.run(b))
  110.  
  111.  
  112. print ('Optimization finished')
  113. print ('Training cost = ',training_cost,' W = ',tf_session.run(W), ' b = ', tf_session.run(b),'\n')
  114.  
  115. plt.plot(train_X, train_Y, 'ro',label='Original data')
  116. # plt.axis(0,2,0,5))
  117.  
  118. plt.plot(train_X,tf_session.run(W) * train_X + tf_session.run(b), label = 'Fitted line')
  119. plt.legend()
  120. plt.show()
  121.  
  122. # We're now going to run test data to see how well our trained model works.
  123.  
  124. print ('Testing...(mean square loss comparison)')
  125. testing_cost = tf_session.run(tf.reduce_sum(tf.pow(prediction - Y, 2)) / (2 * test_Y.shape[0]),
  126. feed_dict = {X: test_X, Y: test_Y})
  127. print ('Testing cost = ',testing_cost)
  128. print ('Absolute mean square loss difference: ', abs(training_cost - testing_cost))
  129.  
  130. plt.plot(test_X,test_Y,'bo',label='Testing data')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement