Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import math
- import matplotlib.patches as mpatches
- from mpl_toolkits.mplot3d import Axes3D
- from matplotlib.pyplot import figure, xticks, plot, title, xlabel, ylabel, show, xlim, ylim, legend, scatter, subplot, \
- tight_layout, grid, annotate
- def readFile(fileString, numberOfAttributes, numberOfObservations):
- returnArray = np.full((numberOfObservations, numberOfAttributes), '', dtype='object')
- counter = 0 # Is a incrementing counter, keepin track of position in current row
- for i in range(numberOfObservations): # Goes through all observations of the data set
- for j in range(numberOfAttributes): # Goes through all attributes of the data set
- while (True):
- if counter == len(fileString):
- return returnArray
- if fileString[counter] == ',': # checks, if next attribute is present
- counter += 1
- break # skips to next attribute
- else:
- returnArray[i, j] = str(returnArray[i, j]) + str(fileString[counter])
- counter += 1
- return returnArray
- # Loading and reading file
- observations = 194
- attributes = 30
- test = open('../Data/flag.txt')
- text = test.read()
- dataInArray = readFile(text, attributes, observations)
- def outOfK(inputArray, numberOfAttributes, numberOfObservations):
- lengthOfOutOfK = np.ones(30)
- lengthOfOutOfK[1] = 6
- lengthOfOutOfK[2] = 4
- lengthOfOutOfK[5] = 10
- lengthOfOutOfK[6] = 7
- lengthOfOutOfK[17] = 7
- lengthOfOutOfK[28] = 7
- lengthOfOutOfK[29] = 7
- returnArray = np.zeros((194, 71)) # creates returnArray
- returnCount = 0
- for i in range(numberOfAttributes):
- if i == 0:
- returnCount = returnCount
- elif i in (1, 2, 5, 6, 17, 28, 29): # defines the positions to do 1-out-of-K
- for j in range(int(lengthOfOutOfK[i])):
- if i in (1, 2, 5, 6): # already defined
- if i == 6:
- for h in range(194):
- returnArray[h, returnCount + int((inputArray[h, i]))] = 1
- else:
- for h in range(194):
- returnArray[h, returnCount + int((inputArray[h, i])) - 1] = 1
- elif i in (17, 28, 29): # colors
- for h in range(194): # goes through all observations
- if inputArray[h, i] == 'red':
- returnArray[h, returnCount] = 1
- if inputArray[h, i] == 'green':
- returnArray[h, returnCount + 1] = 1
- if inputArray[h, i] == 'blue':
- returnArray[h, returnCount + 2] = 1
- if inputArray[h, i] == 'gold':
- returnArray[h, returnCount + 3] = 1
- if inputArray[h, i] == 'white':
- returnArray[h, returnCount + 4] = 1
- if inputArray[h, i] == 'black':
- returnArray[h, returnCount + 5] = 1
- if inputArray[h, i] in ('orange', 'brown'):
- returnArray[h, returnCount + 6] = 1
- else:
- returnArray[:, returnCount] = inputArray[:, i]
- if i == 1:
- returnCount += 5
- elif i == 2:
- returnCount += 3
- elif i == 5:
- returnCount += 9
- elif i in (6, 17, 28, 29):
- returnCount += 6
- returnCount += 1
- returnArray = np.delete(returnArray, 0, 1) # removes attribute of countries
- returnArray = np.delete(returnArray, np.array([5, 6, 7, 8, 9, 10, 11]), 1) # removes attribute of religion
- returnArray = np.delete(returnArray, 3, 1) # removes attribute of population
- return returnArray
- dataOutOfK_ALL = outOfK(dataInArray, attributes, observations)
- outOfKAttributes = np.array(
- ['Landmass', 'Geographic quadrant', 'Area[km2]', 'Population[millions]', 'Language', 'Religion', 'Bars'
- , 'Stripes', 'colours', 'red', 'green', 'blue', 'gold', 'white', 'black', 'orange/brown', 'predominant colour',
- 'Circles', 'Crosses', 'Saltires', 'Quarters', 'Sunstars', 'Crecent moon', 'Triangle', 'Icon', 'Animate', 'text',
- 'Topleft-colour', 'Bottomright-colour'])
- definingLandmass = np.array(['North America', 'South America', 'Europe', 'Africa', 'Asia', 'Oceania'])
- definingZone = np.array(['NE', 'SE', 'SW', 'NW'])
- # definingArea
- definingPopulation = np.array(['<1m', '1-5m', '6-10m', '11-20m', '21-50m', '>50m'])
- definingLanguage = np.array(['English', 'Spanish', 'French', 'German', 'Slavic', 'Other European', 'Chineses', 'Arabic',
- 'Japenese, \nTurkish, \nMagyar', 'Others'])
- definingReligion = np.array(['Catholic', 'Other Christian', 'Muslim', 'Buddhist', 'Hindu', 'Ethnic', 'Marxist'])
- ### SETUP FOR STATISTICS ###
- # Set 'columnnumber' to the desired column to investigate
- # Set 'tal' to:
- # 1: if number
- # 0: if text
- columnnumber = 1
- tal = 1
- # Ejects the desired data
- if (tal == 1):
- vektorTest = dataInArray[:, columnnumber].astype(np.int)
- else:
- vektorTest = dataInArray[:, columnnumber]
- # print means/variance etc.
- ### SETUP FOR PCA FOR ALL ###
- # Doing PCA with normalising for 'Flag'
- # 1) Subtracts mean from each and devides med standard deviation
- def normalizeSet(data, M):
- temp = M -1
- for i in range(temp):
- if (i >= 0 and i <= 5): # Landmass
- data[:, i] = (data[:, i]) / math.sqrt(6)
- elif (i >= 6 and i <= 9): # zone
- data[:, i] = (data[:, i]) / math.sqrt(4)
- elif (i >= 11 and i <= 20): # language
- data[:, i] = (data[:, i]) / math.sqrt(10)
- elif (i >= 30 and i <= 36): # Mainhue
- data[:, i] = (data[:, i]) / math.sqrt(7)
- elif (i >= 59):
- data[:, i] = (data[:, i]) / math.sqrt(7)
- else:
- data[:, i] = (data[:, i]) / np.std(data[:, i])
- return data
- #dataOutOfK_ALL[:,10] = dataOutOfK_ALL[:,10] / np.std(dataOutOfK_ALL[:, 10])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement