Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.ensemble import RandomForestClassifier
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import matplotlib.cm as cm
- #include neural net packages
- import lasagne
- from lasagne import layers
- from lasagne.updates import nesterov_momentum
- from nolearn.lasagne import NeuralNet
- from nolearn.lasagne import visualize
- # create the training & test sets
- #not including header
- dataset = pd.read_csv("../input/train.csv")
- target = dataset[[0]].values.ravel()
- train = dataset.iloc[:,1:].values
- test = pd.read_csv("../input/test.csv").values
- # create and train the random forest
- # multi-core CPUs can use: rf = RandomForestClassifier(n_estimators=100, n_jobs=2)
- rf = RandomForestClassifier(n_estimators=100)
- rf.fit(train, target)
- pred = rf.predict(test)
- np.savetxt('submission_rand_forest.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
- #random forest gave a really good accuracy. But when I was reading about image processing then the COnvolute neural networks give you the best ouput.
- #let me try
- #before that let us reshape the data
- #the image size is 28*28=784 columns
- #so each row has 784 columns indicating every pixel value
- # convert to array, specify data type, and reshape
- target = target.astype(np.uint8)
- train = np.array(train).reshape((-1, 1, 28, 28)).astype(np.uint8)
- test = np.array(test).reshape((-1, 1, 28, 28)).astype(np.uint8)
- #plot every pixel and see what digit is coming out
- plt.imshow(train[1729][0], cmap=cm.binary) # draw the picture
- net1 = NeuralNet(
- layers=[('input', layers.InputLayer),
- ('hidden', layers.DenseLayer),
- ('output', layers.DenseLayer),
- ],
- # layer parameters:
- input_shape=(None,1,28,28),
- hidden_num_units=1000, # number of units in 'hidden' layer
- output_nonlinearity=lasagne.nonlinearities.softmax,
- output_num_units=10, # 0,1,2,....,9 digits
- # optimization paramters
- update=nesterov_momentum,
- update_learning_rate=0.0001,
- update_momentum=0.9,
- max_epochs=15, #number of times iterate the samples
- verbose=1,
- )
- # Train the network
- net1.fit(train, target)
- #advantage of CNN is you can use maxpooling, Conv layer, Dense layer.
- def CNN(n_epochs):
- net1 = NeuralNet(
- layers=[
- ('input', layers.InputLayer),
- ('conv1', layers.Conv2DLayer),
- ('pool1', layers.MaxPool2DLayer),
- ('conv2', layers.Conv2DLayer),
- ('hidden3', layers.DenseLayer),
- ('output', layers.DenseLayer),
- ],
- input_shape=(None, 1, 28, 28),
- conv1_num_filters=7,
- conv1_filter_size=(3, 3),
- conv1_nonlinearity=lasagne.nonlinearities.rectify,
- pool1_pool_size=(2, 2),
- conv2_num_filters=12,
- conv2_filter_size=(2, 2),
- conv2_nonlinearity=lasagne.nonlinearities.rectify,
- hidden3_num_units=1000,
- output_num_units=10,
- output_nonlinearity=lasagne.nonlinearities.softmax,
- update_learning_rate=0.0001,
- update_momentum=0.9,
- max_epochs=n_epochs,
- verbose=1,
- )
- return net1
- cnn = CNN(15).fit(train,target)
- #though CNN took more time to run than NN but
- #CNN work better than just NN with same number of epoch and weights
- # use the CNN model to classify test data
- pred = cnn.predict(test)
- # save results
- np.savetxt('cnn_submission.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement