Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf-8
- # # Lab 3: Bayes Classifier and Boosting
- # ## Jupyter notebooks
- #
- # In this lab, you can use Jupyter <https://jupyter.org/> to get a nice layout of your code and plots in one document. However, you may also use Python as usual, without Jupyter.
- #
- # If you have Python and pip, you can install Jupyter with `sudo pip install jupyter`. Otherwise you can follow the instruction on <http://jupyter.readthedocs.org/en/latest/install.html>.
- #
- # And that is everything you need! Now use a terminal to go into the folder with the provided lab files. Then run `jupyter notebook` to start a session in that folder. Click `lab3.ipynb` in the browser window that appeared to start this very notebook. You should click on the cells in order and either press `ctrl+enter` or `run cell` in the toolbar above to evaluate all the expressions.
- # ## Import the libraries
- #
- # Check out `labfuns.py` if you are interested in the details.
- import sys
- import numpy as np
- import numpy
- from scipy import misc
- from imp import reload
- from labfuns import *
- from sklearn import decomposition
- from matplotlib.colors import ColorConverter
- # ## Bayes classifier functions to implement
- #
- # The lab descriptions state what each function should do.
- # Note that you do not need to handle the W argument for this part
- # in: labels - N x 1 vector of class labels
- # out: prior - C x 1 vector of class priors
- def computePrior(labels,W=None):
- C=max(labels)
- N=float(len(labels))
- prior=map(lambda n: n/N,[len(filter(eq(k),labels)) for k in range(C+1)])
- print(prior)
- return prior
- # Note that you do not need to handle the W argument for this part
- # in: X - N x d matrix of N data points
- # labels - N x 1 vector of class labels
- # out: mu - C x d matrix of class means
- # sigma - d x d x C matrix of class covariances
- def fst((a,b)): return a
- def snd((a,b)): return b
- def fix(f): return lambda a: lambda b: f(b)(a)
- def take(i): return lambda l: l[:i]
- def eq(a): return lambda b: a == b
- def div(a): return lambda b: a / b
- def compose(f,g): return lambda x: f(g(x))
- def mean(x): return sum(x)/len(x)
- def sig((muk,Xk)):
- diff=(Xk-muk)
- return sum(numpy.dot(diff.T,diff))/float(len(Xk))
- def mlParams(X,labels,W=None):
- C = max(labels)
- N = len(X)
- ofClass = lambda i: compose(eq(i),snd)
- xsByClass = numpy.array([map(fst,filter(ofClass(k),zip(X,labels))) for k in range(C+1)])
- mu = numpy.array(map(mean, xsByClass))
- sigma = numpy.array(map(sig,zip(mu,xsByClass)))
- return mu, sigma
- # in: X - N x d matrix of M data points
- # prior - C x 1 vector of class priors
- # mu - C x d matrix of class means
- # sigma - d x d x C matrix of class covariances
- # out: h - N x 1 class predictions for test points
- def classify(X,prior,mu,sigma,covdiag=True):
- b = (X - mu).T
- print(b)
- # Example code for solving a psd system
- # L = np.linalg.cholesky(A)
- # y = np.linalg.solve(L,b)
- # x = np.linalg.solve(L.H,y)
- return h
- # ## Test the Maximum Likelihood estimates
- #
- # Call `genBlobs` and `plotGaussian` to verify your estimates.
- X, labels = genBlobs(centers=5)
- prior=computePrior(labels)
- mu, sigma = mlParams(X,labels)
- _,_,x_star,_,_,_ = trteSplitEven(X,labels,0.7)
- classify(x_star,prior,mu,sigma)
- sys.exit(0)
- # plotGaussian(X,labels,mu,sigma)
- # ## Boosting functions to implement
- #
- # The lab descriptions state what each function should do.
- # in: X - N x d matrix of N data points
- # labels - N x 1 vector of class labels
- # T - number of boosting iterations
- # out: priors - length T list of prior as above
- # mus - length T list of mu as above
- # sigmas - length T list of sigma as above
- # alphas - T x 1 vector of vote weights
- def trainBoost(X,labels,T=5,covdiag=True):
- # Your code here
- return priors,mus,sigmas,alphas
- # in: X - N x d matrix of N data points
- # priors - length T list of prior as above
- # mus - length T list of mu as above
- # sigmas - length T list of sigma as above
- # alphas - T x 1 vector of vote weights
- # out: yPred - N x 1 class predictions for test points
- def classifyBoost(X,priors,mus,sigmas,alphas,covdiag=True):
- # Your code here
- return c
- # ## Define our testing function
- #
- # The function below, `testClassifier`, will be used to try out the different datasets. `fetchDataset` can be provided with any of the dataset arguments `wine`, `iris`, `olivetti` and `vowel`. Observe that we split the data into a **training** and a **testing** set.
- np.set_printoptions(threshold=np.nan)
- np.set_printoptions(precision=25)
- np.set_printoptions(linewidth=200)
- def testClassifier(dataset='iris',dim=0,split=0.7,doboost=False,boostiter=5,covdiag=True,ntrials=100):
- X,y,pcadim = fetchDataset(dataset)
- means = np.zeros(ntrials,);
- for trial in range(ntrials):
- # xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplit(X,y,split)
- xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplitEven(X,y,split)
- # Do PCA replace default value if user provides it
- if dim > 0:
- pcadim = dim
- if pcadim > 0:
- pca = decomposition.PCA(n_components=pcadim)
- pca.fit(xTr)
- xTr = pca.transform(xTr)
- xTe = pca.transform(xTe)
- ## Boosting
- if doboost:
- # Compute params
- priors,mus,sigmas,alphas = trainBoost(xTr,yTr,T=boostiter)
- yPr = classifyBoost(xTe,priors,mus,sigmas,alphas,covdiag=covdiag)
- else:
- ## Simple
- # Compute params
- prior = computePrior(yTr)
- mu, sigma = mlParams(xTr,yTr)
- # Predict
- yPr = classify(xTe,prior,mu,sigma,covdiag=covdiag)
- # Compute classification error
- print "Trial:",trial,"Accuracy",100*np.mean((yPr==yTe).astype(float))
- means[trial] = 100*np.mean((yPr==yTe).astype(float))
- print "Final mean classification accuracy ", np.mean(means), "with standard deviation", np.std(means)
- # ## Plotting the decision boundary
- #
- # This is some code that you can use for plotting the decision boundary
- # boundary in the last part of the lab.
- def plotBoundary(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True):
- X,y,pcadim = fetchDataset(dataset)
- xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplitEven(X,y,split)
- pca = decomposition.PCA(n_components=2)
- pca.fit(xTr)
- xTr = pca.transform(xTr)
- xTe = pca.transform(xTe)
- pX = np.vstack((xTr, xTe))
- py = np.hstack((yTr, yTe))
- if doboost:
- ## Boosting
- # Compute params
- priors,mus,sigmas,alphas = trainBoost(xTr,yTr,T=boostiter,covdiag=covdiag)
- else:
- ## Simple
- # Compute params
- prior = computePrior(yTr)
- mu, sigma = mlParams(xTr,yTr)
- xRange = np.arange(np.min(pX[:,0]),np.max(pX[:,0]),np.abs(np.max(pX[:,0])-np.min(pX[:,0]))/100.0)
- yRange = np.arange(np.min(pX[:,1]),np.max(pX[:,1]),np.abs(np.max(pX[:,1])-np.min(pX[:,1]))/100.0)
- grid = np.zeros((yRange.size, xRange.size))
- for (xi, xx) in enumerate(xRange):
- for (yi, yy) in enumerate(yRange):
- if doboost:
- ## Boosting
- grid[yi,xi] = classifyBoost(np.matrix([[xx, yy]]),priors,mus,sigmas,alphas,covdiag=covdiag)
- else:
- ## Simple
- grid[yi,xi] = classify(np.matrix([[xx, yy]]),prior,mu,sigma,covdiag=covdiag)
- classes = range(np.min(y), np.max(y)+1)
- ys = [i+xx+(i*xx)**2 for i in range(len(classes))]
- colormap = cm.rainbow(np.linspace(0, 1, len(ys)))
- plt.hold(True)
- conv = ColorConverter()
- for (color, c) in zip(colormap, classes):
- try:
- CS = plt.contour(xRange,yRange,(grid==c).astype(float),15,linewidths=0.25,colors=conv.to_rgba_array(color))
- except ValueError:
- pass
- xc = pX[py == c, :]
- plt.scatter(xc[:,0],xc[:,1],marker='o',c=color,s=40,alpha=0.5)
- plt.xlim(np.min(pX[:,0]),np.max(pX[:,0]))
- plt.ylim(np.min(pX[:,1]),np.max(pX[:,1]))
- plt.show()
- # ## Run some experiments
- #
- # Call the `testClassifier` and `plotBoundary` functions for this part.
- # Example usage of the functions
- testClassifier(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True)
- p
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement