Untitled



    # coding: utf-8

    # # Lab 3: Bayes Classifier and Boosting

    # ## Jupyter notebooks
    #
    # In this lab, you can use Jupyter <https://jupyter.org/> to get a nice layout of your code and plots in one document. However, you may also use Python as usual, without Jupyter.
    #
    # If you have Python and pip, you can install Jupyter with `sudo pip install jupyter`. Otherwise you can follow the instruction on <http://jupyter.readthedocs.org/en/latest/install.html>.
    #
    # And that is everything you need! Now use a terminal to go into the folder with the provided lab files. Then run `jupyter notebook` to start a session in that folder. Click `lab3.ipynb` in the browser window that appeared to start this very notebook. You should click on the cells in order and either press `ctrl+enter` or `run cell` in the toolbar above to evaluate all the expressions.

    # ## Import the libraries
    #
    # Check out `labfuns.py` if you are interested in the details.
    import sys
    import numpy as np
    import numpy
    from scipy import misc
    from imp import reload
    from labfuns import *
    from sklearn import decomposition
    from matplotlib.colors import ColorConverter

    # ## Bayes classifier functions to implement
    #
    # The lab descriptions state what each function should do.

    # Note that you do not need to handle the W argument for this part
    # in: labels - N x 1 vector of class labels
    # out: prior - C x 1 vector of class priors
    def computePrior(labels,W=None):
        C=max(labels)
        N=float(len(labels))
        prior=map(lambda n: n/N,[len(filter(eq(k),labels)) for k in range(C+1)])
        print(prior)
        return prior

    # Note that you do not need to handle the W argument for this part
    # in:      X - N x d matrix of N data points
    #     labels - N x 1 vector of class labels
    # out:    mu - C x d matrix of class means
    #      sigma - d x d x C matrix of class covariances

    def fst((a,b)): return a
    def snd((a,b)): return b
    def fix(f): return lambda a: lambda b: f(b)(a)
    def take(i): return lambda l: l[:i]
    def eq(a): return lambda b: a == b
    def div(a): return lambda b: a / b
    def compose(f,g): return lambda x: f(g(x))
    def mean(x): return sum(x)/len(x)


    def sig((muk,Xk)):
        diff=(Xk-muk)
        return sum(numpy.dot(diff.T,diff))/float(len(Xk))

    def mlParams(X,labels,W=None):
        C = max(labels)
        N = len(X)
        ofClass = lambda i: compose(eq(i),snd)
        xsByClass = numpy.array([map(fst,filter(ofClass(k),zip(X,labels))) for k in range(C+1)])
        mu = numpy.array(map(mean, xsByClass))
        sigma = numpy.array(map(sig,zip(mu,xsByClass)))
        return mu, sigma

    # in:      X - N x d matrix of M data points
    #      prior - C x 1 vector of class priors
    #         mu - C x d matrix of class means
    #      sigma - d x d x C matrix of class covariances
    # out:     h - N x 1 class predictions for test points
    def classify(X,prior,mu,sigma,covdiag=True):
        b = (X - mu).T
        print(b)
        # Example code for solving a psd system
        # L = np.linalg.cholesky(A)
        # y = np.linalg.solve(L,b)
        # x = np.linalg.solve(L.H,y)
        return h


    # ## Test the Maximum Likelihood estimates
    #
    # Call `genBlobs` and `plotGaussian` to verify your estimates.

    X, labels = genBlobs(centers=5)
    prior=computePrior(labels)
    mu, sigma = mlParams(X,labels)
    _,_,x_star,_,_,_ = trteSplitEven(X,labels,0.7)
    classify(x_star,prior,mu,sigma)
    sys.exit(0)
    # plotGaussian(X,labels,mu,sigma)


    # ## Boosting functions to implement
    #
    # The lab descriptions state what each function should do.

    # in:       X - N x d matrix of N data points
    #      labels - N x 1 vector of class labels
    #           T - number of boosting iterations
    # out: priors - length T list of prior as above
    #         mus - length T list of mu as above
    #      sigmas - length T list of sigma as above
    #      alphas - T x 1 vector of vote weights
    def trainBoost(X,labels,T=5,covdiag=True):
        # Your code here
        return priors,mus,sigmas,alphas

    # in:       X - N x d matrix of N data points
    #      priors - length T list of prior as above
    #         mus - length T list of mu as above
    #      sigmas - length T list of sigma as above
    #      alphas - T x 1 vector of vote weights
    # out:  yPred - N x 1 class predictions for test points
    def classifyBoost(X,priors,mus,sigmas,alphas,covdiag=True):
        # Your code here
        return c


    # ## Define our testing function
    #
    # The function below, `testClassifier`, will be used to try out the different datasets. `fetchDataset` can be provided with any of the dataset arguments `wine`, `iris`, `olivetti` and `vowel`. Observe that we split the data into a **training** and a **testing** set.

    np.set_printoptions(threshold=np.nan)
    np.set_printoptions(precision=25)
    np.set_printoptions(linewidth=200)

    def testClassifier(dataset='iris',dim=0,split=0.7,doboost=False,boostiter=5,covdiag=True,ntrials=100):

        X,y,pcadim = fetchDataset(dataset)

        means = np.zeros(ntrials,);

        for trial in range(ntrials):

            # xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplit(X,y,split)
            xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplitEven(X,y,split)

            # Do PCA replace default value if user provides it
            if dim > 0:
                pcadim = dim
            if pcadim > 0:
                pca = decomposition.PCA(n_components=pcadim)
                pca.fit(xTr)
                xTr = pca.transform(xTr)
                xTe = pca.transform(xTe)

            ## Boosting
            if doboost:
                # Compute params
                priors,mus,sigmas,alphas = trainBoost(xTr,yTr,T=boostiter)
                yPr = classifyBoost(xTe,priors,mus,sigmas,alphas,covdiag=covdiag)
            else:
            ## Simple
                # Compute params
                prior = computePrior(yTr)
                mu, sigma = mlParams(xTr,yTr)
                # Predict
                yPr = classify(xTe,prior,mu,sigma,covdiag=covdiag)

            # Compute classification error
            print "Trial:",trial,"Accuracy",100*np.mean((yPr==yTe).astype(float))

            means[trial] = 100*np.mean((yPr==yTe).astype(float))

        print "Final mean classification accuracy ", np.mean(means), "with standard deviation", np.std(means)


    # ## Plotting the decision boundary
    #
    # This is some code that you can use for plotting the decision boundary
    # boundary in the last part of the lab.

    def plotBoundary(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True):

        X,y,pcadim = fetchDataset(dataset)
        xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplitEven(X,y,split)
        pca = decomposition.PCA(n_components=2)
        pca.fit(xTr)
        xTr = pca.transform(xTr)
        xTe = pca.transform(xTe)

        pX = np.vstack((xTr, xTe))
        py = np.hstack((yTr, yTe))

        if doboost:
            ## Boosting
            # Compute params
            priors,mus,sigmas,alphas = trainBoost(xTr,yTr,T=boostiter,covdiag=covdiag)
        else:
            ## Simple
            # Compute params
            prior = computePrior(yTr)
            mu, sigma = mlParams(xTr,yTr)

        xRange = np.arange(np.min(pX[:,0]),np.max(pX[:,0]),np.abs(np.max(pX[:,0])-np.min(pX[:,0]))/100.0)
        yRange = np.arange(np.min(pX[:,1]),np.max(pX[:,1]),np.abs(np.max(pX[:,1])-np.min(pX[:,1]))/100.0)

        grid = np.zeros((yRange.size, xRange.size))

        for (xi, xx) in enumerate(xRange):
            for (yi, yy) in enumerate(yRange):
                if doboost:
                    ## Boosting
                    grid[yi,xi] = classifyBoost(np.matrix([[xx, yy]]),priors,mus,sigmas,alphas,covdiag=covdiag)
                else:
                    ## Simple
                    grid[yi,xi] = classify(np.matrix([[xx, yy]]),prior,mu,sigma,covdiag=covdiag)

        classes = range(np.min(y), np.max(y)+1)
        ys = [i+xx+(i*xx)**2 for i in range(len(classes))]
        colormap = cm.rainbow(np.linspace(0, 1, len(ys)))

        plt.hold(True)
        conv = ColorConverter()
        for (color, c) in zip(colormap, classes):
            try:
                CS = plt.contour(xRange,yRange,(grid==c).astype(float),15,linewidths=0.25,colors=conv.to_rgba_array(color))
            except ValueError:
                pass
            xc = pX[py == c, :]
            plt.scatter(xc[:,0],xc[:,1],marker='o',c=color,s=40,alpha=0.5)

        plt.xlim(np.min(pX[:,0]),np.max(pX[:,0]))
        plt.ylim(np.min(pX[:,1]),np.max(pX[:,1]))
        plt.show()


    # ## Run some experiments
    #
    # Call the `testClassifier` and `plotBoundary` functions for this part.

    # Example usage of the functions

    testClassifier(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True)
    p