Advertisement
Guest User

Untitled

a guest
Oct 13th, 2015
345
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.    
  2.  
  3.      
  4.     # coding: utf-8
  5.      
  6.     # # Lab 3: Bayes Classifier and Boosting
  7.      
  8.     # ## Jupyter notebooks
  9.     #
  10.     # In this lab, you can use Jupyter <https://jupyter.org/> to get a nice layout of your code and plots in one document. However, you may also use Python as usual, without Jupyter.
  11.     #
  12.     # If you have Python and pip, you can install Jupyter with `sudo pip install jupyter`. Otherwise you can follow the instruction on <http://jupyter.readthedocs.org/en/latest/install.html>.
  13.     #
  14.     # And that is everything you need! Now use a terminal to go into the folder with the provided lab files. Then run `jupyter notebook` to start a session in that folder. Click `lab3.ipynb` in the browser window that appeared to start this very notebook. You should click on the cells in order and either press `ctrl+enter` or `run cell` in the toolbar above to evaluate all the expressions.
  15.      
  16.     # ## Import the libraries
  17.     #
  18.     # Check out `labfuns.py` if you are interested in the details.
  19.     import sys
  20.     import numpy as np
  21.     import numpy
  22.     from scipy import misc
  23.     from imp import reload
  24.     from labfuns import *
  25.     from sklearn import decomposition
  26.     from matplotlib.colors import ColorConverter
  27.      
  28.     # ## Bayes classifier functions to implement
  29.     #
  30.     # The lab descriptions state what each function should do.
  31.      
  32.     # Note that you do not need to handle the W argument for this part
  33.     # in: labels - N x 1 vector of class labels
  34.     # out: prior - C x 1 vector of class priors
  35.     def computePrior(labels,W=None):
  36.         C=max(labels)
  37.         N=float(len(labels))
  38.         prior=map(lambda n: n/N,[len(filter(eq(k),labels)) for k in range(C+1)])
  39.         print(prior)
  40.         return prior
  41.      
  42.     # Note that you do not need to handle the W argument for this part
  43.     # in:      X - N x d matrix of N data points
  44.     #     labels - N x 1 vector of class labels
  45.     # out:    mu - C x d matrix of class means
  46.     #      sigma - d x d x C matrix of class covariances
  47.      
  48.     def fst((a,b)): return a
  49.     def snd((a,b)): return b
  50.     def fix(f): return lambda a: lambda b: f(b)(a)
  51.     def take(i): return lambda l: l[:i]
  52.     def eq(a): return lambda b: a == b
  53.     def div(a): return lambda b: a / b
  54.     def compose(f,g): return lambda x: f(g(x))
  55.     def mean(x): return sum(x)/len(x)
  56.      
  57.      
  58.     def sig((muk,Xk)):
  59.         diff=(Xk-muk)
  60.         return sum(numpy.dot(diff.T,diff))/float(len(Xk))
  61.      
  62.     def mlParams(X,labels,W=None):
  63.         C = max(labels)
  64.         N = len(X)
  65.         ofClass = lambda i: compose(eq(i),snd)
  66.         xsByClass = numpy.array([map(fst,filter(ofClass(k),zip(X,labels))) for k in range(C+1)])
  67.         mu = numpy.array(map(mean, xsByClass))
  68.         sigma = numpy.array(map(sig,zip(mu,xsByClass)))
  69.         return mu, sigma
  70.      
  71.     # in:      X - N x d matrix of M data points
  72.     #      prior - C x 1 vector of class priors
  73.     #         mu - C x d matrix of class means
  74.     #      sigma - d x d x C matrix of class covariances
  75.     # out:     h - N x 1 class predictions for test points
  76.     def classify(X,prior,mu,sigma,covdiag=True):
  77.         b = (X - mu).T
  78.         print(b)
  79.         # Example code for solving a psd system
  80.         # L = np.linalg.cholesky(A)
  81.         # y = np.linalg.solve(L,b)
  82.         # x = np.linalg.solve(L.H,y)
  83.         return h
  84.      
  85.      
  86.     # ## Test the Maximum Likelihood estimates
  87.     #
  88.     # Call `genBlobs` and `plotGaussian` to verify your estimates.
  89.      
  90.     X, labels = genBlobs(centers=5)
  91.     prior=computePrior(labels)
  92.     mu, sigma = mlParams(X,labels)
  93.     _,_,x_star,_,_,_ = trteSplitEven(X,labels,0.7)
  94.     classify(x_star,prior,mu,sigma)
  95.     sys.exit(0)
  96.     # plotGaussian(X,labels,mu,sigma)
  97.      
  98.      
  99.     # ## Boosting functions to implement
  100.     #
  101.     # The lab descriptions state what each function should do.
  102.      
  103.     # in:       X - N x d matrix of N data points
  104.     #      labels - N x 1 vector of class labels
  105.     #           T - number of boosting iterations
  106.     # out: priors - length T list of prior as above
  107.     #         mus - length T list of mu as above
  108.     #      sigmas - length T list of sigma as above
  109.     #      alphas - T x 1 vector of vote weights
  110.     def trainBoost(X,labels,T=5,covdiag=True):
  111.         # Your code here
  112.         return priors,mus,sigmas,alphas
  113.      
  114.     # in:       X - N x d matrix of N data points
  115.     #      priors - length T list of prior as above
  116.     #         mus - length T list of mu as above
  117.     #      sigmas - length T list of sigma as above
  118.     #      alphas - T x 1 vector of vote weights
  119.     # out:  yPred - N x 1 class predictions for test points
  120.     def classifyBoost(X,priors,mus,sigmas,alphas,covdiag=True):
  121.         # Your code here
  122.         return c
  123.      
  124.      
  125.     # ## Define our testing function
  126.     #
  127.     # The function below, `testClassifier`, will be used to try out the different datasets. `fetchDataset` can be provided with any of the dataset arguments `wine`, `iris`, `olivetti` and `vowel`. Observe that we split the data into a **training** and a **testing** set.
  128.      
  129.     np.set_printoptions(threshold=np.nan)
  130.     np.set_printoptions(precision=25)
  131.     np.set_printoptions(linewidth=200)
  132.      
  133.     def testClassifier(dataset='iris',dim=0,split=0.7,doboost=False,boostiter=5,covdiag=True,ntrials=100):
  134.      
  135.         X,y,pcadim = fetchDataset(dataset)
  136.      
  137.         means = np.zeros(ntrials,);
  138.      
  139.         for trial in range(ntrials):
  140.      
  141.             # xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplit(X,y,split)
  142.             xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplitEven(X,y,split)
  143.      
  144.             # Do PCA replace default value if user provides it
  145.             if dim > 0:
  146.                 pcadim = dim
  147.             if pcadim > 0:
  148.                 pca = decomposition.PCA(n_components=pcadim)
  149.                 pca.fit(xTr)
  150.                 xTr = pca.transform(xTr)
  151.                 xTe = pca.transform(xTe)
  152.      
  153.             ## Boosting
  154.             if doboost:
  155.                 # Compute params
  156.                 priors,mus,sigmas,alphas = trainBoost(xTr,yTr,T=boostiter)
  157.                 yPr = classifyBoost(xTe,priors,mus,sigmas,alphas,covdiag=covdiag)
  158.             else:
  159.             ## Simple
  160.                 # Compute params
  161.                 prior = computePrior(yTr)
  162.                 mu, sigma = mlParams(xTr,yTr)
  163.                 # Predict
  164.                 yPr = classify(xTe,prior,mu,sigma,covdiag=covdiag)
  165.      
  166.             # Compute classification error
  167.             print "Trial:",trial,"Accuracy",100*np.mean((yPr==yTe).astype(float))
  168.      
  169.             means[trial] = 100*np.mean((yPr==yTe).astype(float))
  170.      
  171.         print "Final mean classification accuracy ", np.mean(means), "with standard deviation", np.std(means)
  172.      
  173.      
  174.     # ## Plotting the decision boundary
  175.     #
  176.     # This is some code that you can use for plotting the decision boundary
  177.     # boundary in the last part of the lab.
  178.      
  179.     def plotBoundary(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True):
  180.      
  181.         X,y,pcadim = fetchDataset(dataset)
  182.         xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplitEven(X,y,split)
  183.         pca = decomposition.PCA(n_components=2)
  184.         pca.fit(xTr)
  185.         xTr = pca.transform(xTr)
  186.         xTe = pca.transform(xTe)
  187.      
  188.         pX = np.vstack((xTr, xTe))
  189.         py = np.hstack((yTr, yTe))
  190.      
  191.         if doboost:
  192.             ## Boosting
  193.             # Compute params
  194.             priors,mus,sigmas,alphas = trainBoost(xTr,yTr,T=boostiter,covdiag=covdiag)
  195.         else:
  196.             ## Simple
  197.             # Compute params
  198.             prior = computePrior(yTr)
  199.             mu, sigma = mlParams(xTr,yTr)
  200.      
  201.         xRange = np.arange(np.min(pX[:,0]),np.max(pX[:,0]),np.abs(np.max(pX[:,0])-np.min(pX[:,0]))/100.0)
  202.         yRange = np.arange(np.min(pX[:,1]),np.max(pX[:,1]),np.abs(np.max(pX[:,1])-np.min(pX[:,1]))/100.0)
  203.      
  204.         grid = np.zeros((yRange.size, xRange.size))
  205.      
  206.         for (xi, xx) in enumerate(xRange):
  207.             for (yi, yy) in enumerate(yRange):
  208.                 if doboost:
  209.                     ## Boosting
  210.                     grid[yi,xi] = classifyBoost(np.matrix([[xx, yy]]),priors,mus,sigmas,alphas,covdiag=covdiag)
  211.                 else:
  212.                     ## Simple
  213.                     grid[yi,xi] = classify(np.matrix([[xx, yy]]),prior,mu,sigma,covdiag=covdiag)
  214.      
  215.         classes = range(np.min(y), np.max(y)+1)
  216.         ys = [i+xx+(i*xx)**2 for i in range(len(classes))]
  217.         colormap = cm.rainbow(np.linspace(0, 1, len(ys)))
  218.      
  219.         plt.hold(True)
  220.         conv = ColorConverter()
  221.         for (color, c) in zip(colormap, classes):
  222.             try:
  223.                 CS = plt.contour(xRange,yRange,(grid==c).astype(float),15,linewidths=0.25,colors=conv.to_rgba_array(color))
  224.             except ValueError:
  225.                 pass
  226.             xc = pX[py == c, :]
  227.             plt.scatter(xc[:,0],xc[:,1],marker='o',c=color,s=40,alpha=0.5)
  228.      
  229.         plt.xlim(np.min(pX[:,0]),np.max(pX[:,0]))
  230.         plt.ylim(np.min(pX[:,1]),np.max(pX[:,1]))
  231.         plt.show()
  232.      
  233.      
  234.     # ## Run some experiments
  235.     #
  236.     # Call the `testClassifier` and `plotBoundary` functions for this part.
  237.      
  238.     # Example usage of the functions
  239.      
  240.     testClassifier(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True)
  241.     p
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement