Untitled

#!/usr/bin/env python
import operator
import math
import urllib2
import sys
import scipy
import numpy
import matplotlib
import pandas
import sklearn
import matplotlib.pyplot as plt
from decimal import Decimal
from pandas.plotting import scatter_matrix
from functools import reduce
from scipy.stats import kurtosis as kurtosis_scipy, skew as skewness_scipy
from scipy.stats import moment
from statistics import mode

#########
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names)

A = []
B = []
C = []
D = []
Description = []

data = urllib2.urlopen(url)
for line in data:
    x = line.split(',')
    if(len(x) == 5):
        A.append(float(x[0]))
        B.append(float(x[1]))
        C.append(float(x[2]))
        D.append(float(x[3]))
        Description.append((x[4]))

def bubble_sort(X):
    length = len(X)
    for i in range(length):
        for j in range(length-1):
            if X[j] > X[j+1]:
                X[j], X[j+1] = X[j+1], X[j]

    return X

######### Miary srednie klasyczne

#srednia arytmetyczna
def mean(X):
    return sum(X)/ len(X)

#srednia harmoniczna
def harmonic_mean(X):
    sum = 0.0
    for i in X:
        sum += 1.0 / i
    return len(X) / sum

print("Srednia harmoniczna: " + str(harmonic_mean(A)))

#srednia geometryczna
def geomean(X):
    power = 1.0 / len(X)
    return reduce(operator.mul, X) ** power

print("Srednia: " + str(mean(A)))
print("Srednia geometryczna: " + str(geomean(A)))

######### Miary srednie pozycyjne

#mediana
def median(X):
    sortedX = bubble_sort(X)
    #print(X)
    mid = len(sortedX) // 2  # podloga jako ze chcemy liczbe calkowita
    if (len(sortedX) % 2 == 0):
        # parzysta
        return (sortedX[mid-1] + sortedX[mid]) / 2.0
    else:
        # nieparzysta
        return sortedX[mid]

X = [10.3, 4.1, 12, 15.5, 20.2, 5.5, 15.5, 4.1]
print("Mediana: " + str(median(X)))

#modalna (dominanta)
def dominant(X):
    value = None
    count = 0
    for i in X:
        if count == 0:
            value = i
            count += 1
        elif i == value:
            count += 1
        else:
            count -= 1
    return (value)

print("Dominanta: " + str(dominant(A)))

#kwartyle
def quartiles(X):

    sortedX = bubble_sort(X)
    #print(bubble_sort(sortedX))
    mid = len(sortedX) // 2 # podloga jako ze chcemy liczbe calkowita
    #print(mid)
    #print(sortedX[:mid])
    #print(sortedX[mid:])
    if (len(sortedX) % 2 == 0):
        # parzysta
        Q1 = median(sortedX[:mid])
        Q3 = median(sortedX[mid:])
    else:
        # nieparzysta
        Q1 = median(sortedX[:mid])
        Q3 = median(sortedX[mid+1:])

    return (Q1, Q3)

print("Kwartyl Q1: " + str(quartiles(A)[0])+" oraz kwartyl Q3: " + str(quartiles(A)[1]))
#decyle

#percyle

######### Miary rozproszenia

# rozstep
# odchylenie standardowe
def stdev(X):
    m = mean(X)
    return math.sqrt(sum((x-m)**2 for x in X) / len(X))

# wariancje
# rozstep
# odchylenie standardowe
# wariancja
# wspolczynnik zmiennosci
# gestosc prawdopodobienstwa
def normal(x):
    return exp((-x*x/2) - math.sqrt(math.pi*2))

######### Miary# wspolczynnik zmiennosci

# moment centralny
def CentralMoment(X, level):
    centralMoment = 0
    for innerX in X:
        centralMoment += ((innerX - mean(X))**level)
    return (Decimal(1/(Decimal(len(X))))*Decimal(centralMoment))

# Wspolczynnik asymetrii
def asymmetryCoefficient(X):
    return (CentralMoment(X) / Decimal((stdev(X) ** 3)))

# Wspolczynnik skosnosci
def skewness(X):
	toReturn = 0
	for innerX in X:
		toReturn += ((innerX - mean(X))**3)/len(X)
	toReturn = toReturn / (stdev**3)
	return toReturn

######### Miary koncentracji

# Kurtoza
def kurtosis(X):
    return (CentralMoment(X, 4) / Decimal(stdev(X)**4) - 3)
print('Kurtoza: ' + str(kurtosis(A)))

# Wspolczynnik Giniego
def gini(list_of_values):
    sorted_list = sorted(list_of_values)
    height, area = 0, 0
    for value in sorted_list:
        height += value
        area += height - value / 2.
    fair_area = height * len(list_of_values) / 2.
    return (fair_area - area) / fair_area

######### Normalizacja zmiennej losowej. Dopasowanie rozkladu i analiza danych w jego kontekscie.

######### Formulowanie i weryfikacja hipotez statystycznych.

######### Obliczanie histogramu.