Untitled

from numpy import corrcoef
import numpy as np
# (a)
# Implement my_corrcoef and compare with numpy.corrcoef
# use the same signature as numpy.corrcoef
def my_corrcoef(x, y):
    x_mean = np.mean(x)
    y_mean = np.mean(y)

    upper_sum = 0

    for i in range(0, len(x)):
        upper_sum += (x[i] - x_mean) * (y[i] - y_mean)

    lower_sum1 = 0
    for i in range(0, len(x)):
        lower_sum1 += (x[i] - x_mean) ** 2

    lower_sum2 = 0
    for i in range(0, len(y)):
        lower_sum2 += (y[i] - y_mean) ** 2

    lower_product = np.sqrt(lower_sum1) * np.sqrt(lower_sum2)

    return upper_sum / lower_product

# (b)
# Calculate the correlation coefficient for all pairs.
coefficients = np.zeros(109)

for i in range(1, 109):
    xy = load_pair(i)
    coefficients[i] = my_corrcoef(xy[:,0], xy[:,1])

hist(coefficients, bins=50);

from hsic import hsic
# (c)
# Calculate the HSIC criterion for all pairs.

hsics = np.zeros(10)

for i in range(1, 9):
    xy = load_pair(i)
    #coefficients[i] = my_corrcoef(xy[:,0], xy[:,1])
    hsics[i] = hsic(xy[:,0], xy[:,1])[0]

hist(hsics, bins=50);

# (d)
# Make a scatter plot where each point is a pairs dataset and the axes show my_corrcoef vs hsic.

scatter(coefficients[:10], hsics)