Untitled

#Finding accuracy and other metrics over a range of cut off values

def test_accuracy_metrics_plot(test_mtx):


    cache = []
    cache_same = []      #Cache that collects all values in diagonal region, corresponding to  same individuals
    cache_diff = []      #Cache that collects all values rest region, corresponding to  diff individuals
    means =[]            #means and max values are used to set range over which accuracies are calculated
    maxs = []
    counts = int(test_mtx.shape[0]/10)
    for i in range(counts):
        for j  in range(counts):

            var_name = 'new' + str(i) +str(j)

            var_name_mtx = test_mtx[i*10:i*10 + 10, j*10:j*10 +10]

            cache.append([var_name, var_name_mtx])
            if i ==j:
                cache_same.append([var_name, var_name_mtx])
                means.append(np.mean(var_name_mtx))
                maxs.append(np.max(var_name_mtx))
            else:
                cache_diff.append([var_name, var_name_mtx])

    lower_bound = np.min(means)
    upper_bound = np.max(maxs)
    steps = (upper_bound - lower_bound)/100

    #list of cut off values for which all metrics calculated, used to select best margin
    margins = list(np.arange(lower_bound, upper_bound, steps))


    metrics = []
    samples = test_mtx.shape[0]*test_mtx.shape[0]
    a_true = np.sqrt(samples)*10                #Actual true,  here 300/900 are actual true
    a_neg = samples - a_true                    #Actual false, here 600/900 are actually false

    for j in range(len(margins)):

        f_pos = 0                                   #false pos
        for i in range(len(cache_diff)):
            f_pos += np.sum(cache_diff[i][1] < margins[j])

        true_test_pos = 0                           #true pos
        for i in range(len(cache_same)):
            true_test_pos += np.sum(cache_same[i][1] < margins[j])


        cor_neg = a_neg-f_pos                       #correctly identified negative or true negative
        accu = (true_test_pos + cor_neg)/samples    #(true pos + true neg) / sample size

        f_neg = a_true - true_test_pos              #false neg
        prec= true_test_pos/(true_test_pos+f_pos)   #precision
        rec = true_test_pos/(true_test_pos +f_neg)  #reacall
        f1_score = 2 * prec * rec/(prec+rec)        #f1 score
        metrics.append([margins[j], accu, prec, rec, f1_score])


    return (metrics)