Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- ''' In this example, we're going to use linear regression in tensorflow to predict housing prices based
- on the size of the lot as our features.
- '''
- import pandas as pd
- import matplotlib.pyplot as plt
- from matplotlib.mlab import PCA
- from mpl_toolkits.mplot3d import Axes3D
- import numpy as np
- import tensorflow as tf
- import sys
- from sklearn import model_selection
- from sklearn import preprocessing
- np.set_printoptions(precision=3,suppress=True)
- def pca(dataset):
- plt.scatter(dataset[:,0],dataset[:,1])
- plt.plot()
- plt.show()
- results = PCA(dataset)
- x = []
- y = []
- for item in results.Y:
- x.append(item[0])
- y.append(item[1])
- plt.close('all')
- fig1 = plt.figure()
- pltData = [x,y]
- plt.scatter(pltData[0],pltData[1],c='b')
- xAxisLine = ((min(pltData[0]),max(pltData[0])),(0,0),(0,0))
- yAxisLine = ((min(pltData[1]),max(pltData[1])),(0,0),(0,0))
- plt.xlabel('RM')
- plt.ylabel('MEDV')
- plt.show()
- rng = np.random
- # learning_rate is the alpha value that we pass to the gradient descent algorithm.
- learning_rate = 0.1
- # How many cycles we're going to run to try and get our optimum fit.
- training_epochs = 1000
- display_step = 50
- # We're going to pull in a the csv file and extract the X value (RM) and Y value (MEDV)
- boston_dataset = pd.read_csv('data/housing.csv')
- label = boston_dataset['MEDV']
- features = boston_dataset['RM'].reshape(-1,1)
- dataset = np.asarray(boston_dataset['RM'])
- dataset = np.column_stack((np.asarray(boston_dataset['RM']),np.asarray(boston_dataset['MEDV'])))
- pca(dataset)
- train_X, test_X, train_Y, test_Y = model_selection.train_test_split(features, label, test_size = 0.33, random_state = 5)
- scaler = preprocessing.StandardScaler()
- train_X = scaler.fit_transform(train_X)
- # This is the total number of data samples that we're going to run through.
- n_samples = train_X.shape[0]
- # Variable placeholders.
- X = tf.placeholder('float')
- Y = tf.placeholder('float')
- W = tf.Variable(rng.randn(), name = 'weight')
- b = tf.Variable(rng.randn(), name = 'bias')
- # Here we describe our training model. It's a linear regression model using the standard y = mx + b
- # point slope formula. We calculate the cost by using least mean squares.
- # This is our prediction algorithm: y = mx + b
- prediction = tf.add(tf.multiply(X,W),b)
- # Let's now calculate the cost of the prediction algorithm using least mean squares
- training_cost = tf.reduce_sum(tf.pow(prediction-Y,2))/(2 * n_samples)
- # This is our gradient descent optimizer algorithm. We're passing in alpha, our learning rate
- # and we want the minimum value of the training cost.
- optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(training_cost)
- init = tf.global_variables_initializer()
- # Now we'll run our training data through our model.
- with tf.Session() as tf_session:
- # Initialize all of our tensorflow variables.
- tf_session.run(init)
- # We'll run the data through for 1000 times (The value of training_epochs).
- for epoch in range(training_epochs):
- # For each training cycle, pass in the x and y values to our optimizer algorithm to calculate the cost.
- for (x,y) in zip(train_X,train_Y):
- tf_session.run(optimizer,feed_dict = {X: x, Y: y})
- # For every fifty cycles, let's check and see how we're doing.
- if (epoch + 1 ) % 50 == 0:
- c = tf_session.run(training_cost,feed_dict = {X: train_X, Y: train_Y})
- print ('Epoch: ', '%04d' % (epoch+1),'cost=','{:.9f}'.format(c), \
- 'W = ',tf_session.run(W), 'b = ',tf_session.run(b))
- print ('Optimization finished')
- print ('Training cost = ',training_cost,' W = ',tf_session.run(W), ' b = ', tf_session.run(b),'\n')
- plt.plot(train_X, train_Y, 'ro',label='Original data')
- # plt.axis(0,2,0,5))
- plt.plot(train_X,tf_session.run(W) * train_X + tf_session.run(b), label = 'Fitted line')
- plt.legend()
- plt.show()
- # We're now going to run test data to see how well our trained model works.
- print ('Testing...(mean square loss comparison)')
- testing_cost = tf_session.run(tf.reduce_sum(tf.pow(prediction - Y, 2)) / (2 * test_Y.shape[0]),
- feed_dict = {X: test_X, Y: test_Y})
- print ('Testing cost = ',testing_cost)
- print ('Absolute mean square loss difference: ', abs(training_cost - testing_cost))
- plt.plot(test_X,test_Y,'bo',label='Testing data')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement