Untitled

"""
CSCC11 - Introduction to Machine Learning, Winter 2020, Assignment 3
B. Chan, S. Wei, D. Fleet

===========================================================

 COMPLETE THIS TEXT BOX:

 Student Name: Felix Chen
 Student number: 1003331628
 UtorID: chenfeli

 I hereby certify that the work contained here is my own


 Felix Chen
 (sign with your name)

===========================================================
"""

import numpy as np

from functools import partial

class GMM:
    def __init__(self, init_centers):
        """ This class represents the GMM model.

        TODO: You will need to implement the methods of this class:
        - _e_step: ndarray, ndarray -> ndarray
        - _m_step: ndarray, ndarray -> None

        Implementation description will be provided under each method.

        For the following:
        - N: Number of samples.
        - D: Dimension of input features.
        - K: Number of Gaussians.
             NOTE: K > 1

        Args:
        - init_centers (ndarray (shape: (K, D))): A KxD matrix consisting K D-dimensional centers, each for a Gaussian.
        """
        assert len(init_centers.shape) == 2, f"init_centers should be a KxD matrix. Got: {init_centers.shape}"
        (self.K, self.D) = init_centers.shape
        assert self.K > 1, f"There must be at least 2 clusters. Got: {self.K}"

        # Shape: K x D
        self.centers = np.copy(init_centers)

        # Shape: K x D x D
        self.covariances = np.tile(np.eye(self.D), reps=(self.K, 1, 1))

        # Shape: K x 1
        self.mixture_proportions = np.ones(shape=(self.K, 1)) / self.K

    def _e_step(self, train_X):
        """ This method performs the E-step of the EM algorithm.

        Args:
        - train_X (ndarray (shape: (N, D))): A NxD matrix consisting N D-dimensional input data.

        Output:
        - probability_matrix (ndarray (shape: (N, K))): A NxK matrix consisting N conditional probabilities of p(z_k|x_i) (i.e. the responsibilities).
        """

        probability_matrix = np.empty(shape=(train_X.shape[0], self.K))

        # ====================================================
        # TODO: Implement your solution within the box
        def b(i, h):
            """
            We use this form for numerical stability, such that our exponents dont underflow
            b = ln( likelihood * prior ) = 1/2 [ quadratic * ln(normalization) * -2ln(prior)]

            for cluster h
            for input yi
            """

            assert 0 <= h < self.K, "h needs to be valid label"

            y = train_X[i]

            mu = self.centers[h]
            C = self.covariances[h]

            mix = self.mixture_proportions[h]

            r = (y - mu).T @ np.linalg.inv(C) @ (y - mu)
            r += np.multiply(self.D, np.log(np.multiply(2, np.pi))) + np.log(np.linalg.det(C))
            r -= np.multiply(2, np.log(mix))
            r = np.divide(r, 2)

            return r[0]

        N, _ = train_X.shape

        for i in range(N):

            # Think notes might be wrong, we take right shift by MAX (b j) to avoid underflow....
            exponents = np.array([b(i, h) for h in range(self.K)])
            c = max(exponents)
            exponents = -exponents + c

            eValue = np.power(np.e, exponents)
            normalizeFactor = np.sum(eValue)

            posteriorsForI = np.divide(eValue, normalizeFactor)

            probability_matrix[i] = posteriorsForI

        # ====================================================

        assert probability_matrix.shape == (train_X.shape[0], self.K), f"probability_matrix shape mismatch. Expected: {(train_X.shape[0], self.K)}. Got: {probability_matrix.shape}"

        return probability_matrix

    def _m_step(self, train_X, probability_matrix):
        """ This method performs the M-step of the EM algorithm.

        NOTE: This method updates self.centers, self.covariances, and self.mixture_proportions

        Args:
        - train_X (ndarray (shape: (N, D))): A NxD matrix consisting N D-dimensional input data.
        - probability_matrix (ndarray (shape: (N, K))): A NxK matrix consisting N conditional probabilities of p(z_k|x_i) (i.e. the responsibilities).

        Output:
        - centers (ndarray (shape: (K, D))): A KxD matrix consisting K D-dimensional means for each Gaussian component.
        - covariances (ndarray (shape: (K, D, D))): A KxDxD tensor consisting K DxD covariance matrix for each Gaussian component.
        - mixture_proportions (ndarray (shape: (K, 1))): A K-column vector consistent the mixture proportion for each Gaussian component.
        """

        # ====================================================
        # TODO: Implement your solution within the box
        N, _ = train_X.shape

        # Shape: K x D
        centers = np.zeros((self.K, self.D))
        # Shape: K x D x D
        covariances = np.zeros((self.K, self.D, self.D))
        # Shape: K x 1
        mixture_proportions = np.zeros((self.K, 1))

        for j in range(self.K):
            posteriorsForJ = probability_matrix[:,j:j+1]
            # We're going to use this 3 times..
            sumPosteriorsForJ = np.sum(posteriorsForJ)

            mixture_proportions[j] = np.divide(sumPosteriorsForJ, N)

            mu = np.sum(np.repeat(posteriorsForJ, self.D, axis=1) * train_X, axis = 0)
            mu = np.divide(mu, sumPosteriorsForJ)
            centers[j] = mu

            # C = np.zeros((self.D, self.D))
            # for i in range(N):
            #     C = np.add(C, posteriorsForJ[i] * (train_X[i:i+1].T - mu.T) @ (train_X[i:i+1].T - mu.T).T)

            # print((train_X[0:1].T - mu.T) @ (train_X[0:1].T - mu.T).T)

            # C = np.divide(C, sumPosteriorsForJ)
            # print(C)

            meanCentered = train_X-mu
            outerProducts = meanCentered[:,:,None]*meanCentered[:,None,:]
            outerProducts = np.repeat(posteriorsForJ, self.D * self.D).reshape((N, self.D, self.D)) * outerProducts
            C = np.sum(outerProducts, axis = 0) / sumPosteriorsForJ
            covariances[j] = C
        # ====================================================

        assert centers.shape == (self.K, self.D), f"centers shape mismatch. Expected: {(self.K, self.D)}. Got: {centers.shape}"
        assert covariances.shape == (self.K, self.D, self.D), f"covariances shape mismatch. Expected: {(self.K, self.D, self.D)}. Got: {covariances.shape}"
        assert mixture_proportions.shape == (self.K, 1), f"mixture_proportions shape mismatch. Expected: {(self.K, 1)}. Got: {mixture_proportions.shape}"

        return centers, covariances, mixture_proportions

    def train(self, train_X, max_iterations=1000):
        """ This method trains the GMM model using EM algorithm.

        NOTE: This method updates self.centers, self.covariances, and self.mixture_proportions

        Args:
        - train_X (ndarray (shape: (N, D))): A NxD matrix consisting N D-dimensional input data.
        - max_iterations (int): Maximum number of iterations.

        Output:
        - labels (ndarray (shape: (N, 1))): A N-column vector consisting N labels of input data.
        """
        assert len(train_X.shape) == 2 and train_X.shape[1] == self.D, f"train_X should be a NxD matrix. Got: {train_X.shape}"
        assert max_iterations > 0, f"max_iterations must be positive. Got: {max_iterations}"
        N = train_X.shape[0]

        e_step = partial(self._e_step, train_X=train_X)
        m_step = partial(self._m_step, train_X=train_X)

        labels = np.empty(shape=(N, 1), dtype=np.long)
        for _ in range(max_iterations):
            old_labels = labels
            # E-Step
            probability_matrix = e_step()

            # Reassign labels
            labels = np.argmax(probability_matrix, axis=1).reshape((N, 1))

            # Check convergence
            if np.allclose(old_labels, labels):
                break

            # M-Step
            self.centers, self.covariances, self.mixture_proportions = m_step(probability_matrix=probability_matrix)

        return labels