Untitled

import math
from scipy.special import expit

#Function for finding the mean of a list of numbers
def mean(data):
    mean = []
    sum = 0

    for list in data:
        for val in list:
            sum += val

        #The added values is divided by the amount of values
        mean.append(sum / len(list))
        sum = 0
    print("Mean", mean)
    return mean

#Function for finding the standard deviation of a list of numbers
def std(data):
    #The mean is calculated from the mean function
    meanList = mean(data)
    deviation = []

    sum = 0

    #All the values have the mean subtracted
    #then squared and added together
    for i in range(len(data)):
        for val in data[i]:
            sum += (val - meanList[i])**2

        #The added value is then divided by the amount of values
        #and at last the squareroot is found
        deviation.append(math.sqrt(sum / len(data)))
        sum = 0
    print("Deviation", deviation)
    return deviation

#Get distance between two vectors (manhattan distance)
def distance(v1, v2):

    if (len(v1) != len(v2)):
        print("CALC ERROR in commonFunctions.py:" + "\n" +
              "The two vectors does not have the same dimension")
        return

    dist = 0
    for i in range(len(v1)):
        dist += (v1[i] - v2[i])**2

    return math.sqrt(dist)


#function for finding the equidistant middle point
def middlePoint(points):
    d = dimensions = len(points[0])

    midPoint = []
    for i in range(d):
        sum = 0
        for point in points:
            sum += point[i]

        midPoint.append(sum / len(points))

    return midPoint


#Function for arranging data with more dimensions
#Where dimenstions is the dimensions in the data (columns)
#And Data is the data, arranged in an array
def arrangeData(dimensions, data):
    dataArranged = []
    for i in range(len(data[0])):
        tmp = []
        for j in range(dimensions):
            tmp.append(data[j][i])

        dataArranged.append(tmp)

    return dataArranged


#function for showing data in a graph
#Data needs to be 1 array with array of data
#
def show2DGraph(data):
    dataSet = np.array(data)

    colors = [['b'], ['r'], ['g'], ['y'], ['p']]
    color_array = []
    area = []

    if len(dataSet[0]) >= 3:
        for i in range(len(dataSet)):
            color_array += colors[clusters[i]]
            area.append(((dataSet[i][2]) + 2) ** 5)
    else:
        for i in range(len(clusters)):
            color_array += colors[clusters[i]]
            area.append(10)

    plt.scatter(data[:, 0], data[:, 1], s=area, c=color_array, alpha=0.5)
    plt.show()


def sigmoid(x):
    return 1 / (1 + (math.e **(-x)))


def getZ(a, b, c, n):
    zArray = []
    for x in range(n):
        for y in range(n):
            zArray.append([x, y, a + (x*b) + (y*c) ])

    return zArray


testArray = getZ(123.47, -4.49, 16.26, 10)


def prepare_data(data, prepare_for_cross_validation, prepare_for_KNN):

    features = []
    labels = []

    if not prepare_for_KNN:

        for i in range(len(data)):
            features.append(data[i][:-1])
            labels.append(data[i][-1])

    featuers_div = []
    labels_div = []

    if prepare_for_cross_validation and not prepare_for_KNN:
        n = len(labels)
        k = (n / 10)


        x = 0
        for i in range(10):
            tmp_f = []
            tmp_l = []
            for j in range(50):
                tmp_f.append(features[j + x])
                tmp_l.append(labels[j + x])

            featuers_div.append(tmp_f)
            labels_div.append(tmp_l)
            x += 50


        features = featuers_div
        labels = labels_div

    data_div = []

    if prepare_for_cross_validation and prepare_for_KNN:
        n = len(labels)
        k = (n / 10)

        x = 0
        for i in range(10):
            tmp = []
            for j in range(50):
                tmp.append(data[j + x])

            data_div.append(tmp)

            x += 50

        data = data_div
        return data

    return features, labels