Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Wed Mar 21 09:23:51 2018
- @author: kazin
- """
- import pyodbc as cn
- import numpy as np
- import pandas as pd
- from sklearn import preprocessing
- #Connecting to database
- server = 'facil.database.windows.net'
- database = 'main'
- username = 'facildatabase'
- password = 'DifficultPassword69.'
- driver = '{ODBC Driver 11 for SQL Server}'
- cnxn = cn.connect('DRIVER=' + driver + ';SERVER=' + server + ';DATABASE=' + database + ';UID=' + username + ';PWD=' + password)
- print(cnxn)
- ##Importing Data to dataframes form daatabase
- nutsComplete = pd.read_sql('SELECT * from dbo.NutsComplete', con = cnxn)
- boltsComplete = pd.read_sql('SELECT * from dbo.BoltsComplete', con = cnxn)
- contracts = pd.read_sql('SELECT * from dbo.contracts', con = cnxn)
- coatings = pd.read_sql('SELECT * from dbo.coatings', con = cnxn)
- geometryCoating = pd.read_sql('SELECT * from dbo.geometryCoating', con = cnxn)
- countryCodes = pd.read_sql('SELECT * from dbo.countryCodes', con = cnxn)
- rfqPortal = pd.read_sql('SELECT * from dbo.rfqPortal', con = cnxn)
- ###Exporting dataframes to .csv files
- #nutsComplete.to_csv('nutsComplete.csv', sep='\t', encoding='utf-8')
- #boltsComplete.to_csv('boltsComplete.csv', sep= '\t', encoding='utf-8')
- #contracts.to_csv('contracts.csv', sep= '\t', encoding='utf-8')
- #coatings.to_csv('coatings.csv', sep= '\t', encoding='utf-8')
- #geometryCoating.to_csv('geometryCoating.csv', sep= '\t', encoding='utf-8')
- #countryCodes.to_csv('countryCodes.csv', sep= '\t', encoding='utf-8')
- #KPI Calculation for boltsComplete
- headers = list(boltsComplete)
- KpiBolts =[]
- boltsComplete = boltsComplete.astype('float64')
- for x in range(len(headers)):
- KpiBolts.append((boltsComplete['Pprice'].corr(boltsComplete[headers[x]].astype(float))))
- labels = ['ColumnName','Value']
- KPIBolts = pd.DataFrame(np.column_stack([headers, KpiBolts]),
- columns=['ColumnName','Value'])
- ##Exporting the KPI calculation boltsComplete values into .csv
- KPIBolts.to_csv('KpiBolts.csv',sep=',',encoding='utf-8')
- #Replacing all the nan values with 0
- boltsComplete.fillna(0, inplace = True)
- ##Normalizing the data
- scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
- scaled = scaler.fit_transform(boltsComplete)
- BoltsNormalizedDataFrame = pd.DataFrame(scaled,columns=headers)
- #KPI Calculation for nutsComplete
- headers = list(nutsComplete)
- KpiNuts =[]
- nutsComplete['Has_Washer'] = nutsComplete['Has_Washer'].astype(int)
- nutsComplete = nutsComplete.astype('float64')
- for x in range(len(headers)):
- KpiNuts.append((nutsComplete['Pprice'].corr(nutsComplete[headers[x]].astype(float))))
- labels = ['ColumnName','Value']
- KPIN = pd.DataFrame(np.column_stack([headers, KpiNuts]),
- columns=['ColumnName','Value'])
- ##Exporting the KPI calculation nutsComplete values into .csv
- KPIN.to_csv('KpiNuts.csv',sep=',',encoding='utf-8')
- #Replacing all the nan values with 0
- nutsComplete.fillna(0, inplace = True)
- scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
- scaled = scaler.fit_transform(nutsComplete)
- NutsNormalizedDataFrame = pd.DataFrame(scaled, columns = headers)
- #Creating matrix for most valuables KPI in Bolts DataFrame
- HeadDiameter_matrix = BoltsNormalizedDataFrame['HeadDiameter'].as_matrix()
- Length_matrix = BoltsNormalizedDataFrame['Length'].as_matrix()
- TotalLength_matrix = BoltsNormalizedDataFrame['TotalLength'].as_matrix()
- ShoulderDiameter_matrix = BoltsNormalizedDataFrame['ShoulderDiameter'].as_matrix()
- Weight_matrix = BoltsNormalizedDataFrame['Weight'].as_matrix()
- TEGWNE_matrix = BoltsNormalizedDataFrame['TEGWNE'].as_matrix()
- #Creating matrix for most valuables KPI in Nuts DataFrame
- Diameter_matrix = NutsNormalizedDataFrame['Diameter'].as_matrix()
- Height_matrix = NutsNormalizedDataFrame['Height'].as_matrix()
- TEGWNE_Nuts_matrix = NutsNormalizedDataFrame['TEGWNE'].as_matrix()
- Pprice_matrix = NutsNormalizedDataFrame['Pprice'].as_matrix()
- ####NeuralNetwork implementation###
- X = np.vstack([np.hstack([Diameter_matrix]), np.hstack([Height_matrix]), np.hstack([TEGWNE_Nuts_matrix])])
- #X = np.vstack([np.mean([Diameter_matrix]), np.mean([Height_matrix]), np.mean([TEGWNE_Nuts_matrix])])
- X= X.reshape(-1,1)
- ### Creating some useful variables
- num_examples = len(X)
- nn_input_dim = 1
- nn_output_din = 1
- ##Gradient descent parameters picked by the hand
- epsilon = 0.01
- reg_lambda = 0.01
- #Helper function to evaluate the total loss on the dataset
- def calculate_loss(model):
- W1, b1, W2, b2, W3, b3 = model['W1'], model['b1'], model['W2'], model['b2'], model['W3'], model['b3']
- ##forward propagation to calculate our predictions
- z1 = X.dot(W1) + b1
- a1 = np.tanh(z1)
- z2 = a1.dot(W2) + b2
- a2 = np.tanh(z2)
- z3 = a2.dot(W3) + b3
- exp_scores = np.exp(z3)
- probs = exp_scores / np.sum(exp_scores, axis = 1, keepdims = True)
- ##Calculating the loss
- correct_logprobs = -np.log(probs[range(num_examples),Pprice_matrix])
- data_loss = np.sum(correct_logprobs)
- ##Adding the regulatization term to loss
- data_loss += reg_lambda/2 - (np.sum(np.square(W1)) + (np.sum(np.square(W2))) + (np.sum(np.square(W3))))
- return 1./num_examples * data_loss
- def predict(model, x):
- W1, b1, W2, b2, W3, b3 = model['W1'], model['b1'], model['W2'], model['b2'], model['W3'], model['b3']
- ##forward propagation to calculate our predictions
- z1 = X.dot(W1) + b1
- a1 = np.tanh(z1)
- z2 = a1.dot(W2) + b2
- a2 = np.tanh(z2)
- z3 = a2.dot(W3) + b3
- exp_scores = np.exp(z3)
- probs = exp_scores / np.sum(exp_scores, axis = 1, keepdims = True)
- return np.argmax(probs, axis = 1)
- def build_model(nn_hdim, num_passes=20000, print_loss=False):
- nn_input_dim = 1
- nn_output_dim = 1
- # Initialize the parameters to random values. We need to learn these.
- np.random.seed(0)
- W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
- b1 = np.zeros((1, nn_hdim))
- W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
- W3 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
- b3 = np.zeros((1, nn_output_dim))
- b2 = np.zeros((nn_input_dim, nn_output_dim))
- # This is what we return at the end
- model = {}
- # Gradient descent. For each batch...
- for i in range(0, num_passes):
- # Forward propagation
- z1 = X.dot(W1) + b1
- a1 = np.tanh(z1)
- z2 = a1.dot(W2) + b2
- a2 = np.tanh(z2)
- z3 = a2.dot(W3) + b3
- exp_scores = np.exp(z3)
- probs = exp_scores / np.sum(exp_scores, axis = 1, keepdims = True)
- probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
- # Backpropagation
- delta3 = probs
- delta3[range(num_examples)] -= 1
- dW2 = (a1.T).dot(delta3)
- db2 = np.sum(delta3, axis=0, keepdims=True)
- delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
- dW1 = np.dot(X.T, delta2)
- db1 = np.sum(delta2, axis=0)
- # Add regularization terms (b1 and b2 don't have regularization terms)
- dW2 += reg_lambda * W2
- dW1 += reg_lambda * W1
- # Gradient descent parameter update
- W1 += -epsilon * dW1
- b1 += -epsilon * db1
- W2 += -epsilon * dW2
- b2 += -epsilon * db2
- # Assign new parameters to the model
- model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2 }
- # Optionally print the loss.
- # This is expensive because it uses the whole dataset, so we don't want to do it too often.
- if print_loss and i % 1000 == 0:
- print (i, calculate_loss(model))
- return model
- import seaborn as sns
- model = build_model(3,print_loss = True)
- sns.pointplot(lambda x: predict (model, x))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement