Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import math
- import matplotlib.patches as mpatches
- from mpl_toolkits.mplot3d import Axes3D
- from matplotlib.pyplot import figure, xticks,plot, title, xlabel, ylabel, show, xlim, ylim, legend, scatter, subplot, tight_layout, grid, annotate
- def readFile (fileString, numberOfAttributes, numberOfObservations):
- returnArray = np.full((numberOfObservations, numberOfAttributes), '',dtype = 'object')
- counter = 0 # Is a incrementing counter, keepin track of position in current row
- for i in range (numberOfObservations): # Goes through all observations of the data set
- for j in range (numberOfAttributes): # Goes through all attributes of the data set
- while (True):
- if counter == len(fileString):
- return returnArray
- if fileString[counter] == ',': # checks, if next attribute is present
- counter += 1
- break # skips to next attribute
- else:
- returnArray[i,j] = str(returnArray[i,j]) + str(fileString[counter])
- counter += 1
- return returnArray
- #Loading and reading file
- observations = 194
- attributes = 30
- test = open('flag.txt')
- text = test.read()
- dataInArray = readFile(text, attributes, observations)
- def outOfK (inputArray, numberOfAttributes, numberOfObservations):
- lengthOfOutOfK = np.ones(30)
- lengthOfOutOfK[1]= 6
- lengthOfOutOfK[2] = 4
- lengthOfOutOfK[5] = 10
- lengthOfOutOfK[6] = 7
- lengthOfOutOfK[17] = 7
- lengthOfOutOfK[28] = 7
- lengthOfOutOfK[29] = 7
- returnArray = np.zeros((194,71)) # creates returnArray
- returnCount = 0
- for i in range (numberOfAttributes):
- if i == 0:
- returnCount = returnCount
- elif i in (1,2,5,6,17,28,29): # defines the positions to do 1-out-of-K
- for j in range (int(lengthOfOutOfK[i])):
- if i in (1,2,5,6): # already defined
- if i == 6:
- for h in range (194):
- returnArray[h,returnCount + int((inputArray[h,i]))] = 1
- else:
- for h in range (194):
- returnArray[h,returnCount + int((inputArray[h,i])) - 1] = 1
- elif i in (17,28,29): # colors
- for h in range (194): # goes through all observations
- if inputArray[h,i] == 'red':
- returnArray[h,returnCount] = 1
- if inputArray[h,i] == 'green':
- returnArray[h,returnCount+1] = 1
- if inputArray[h,i] == 'blue':
- returnArray[h,returnCount+2] = 1
- if inputArray[h,i] == 'gold':
- returnArray[h,returnCount+3] = 1
- if inputArray[h,i] == 'white':
- returnArray[h,returnCount+4] = 1
- if inputArray[h,i] == 'black':
- returnArray[h,returnCount+5] = 1
- if inputArray[h,i] in ('orange', 'brown'):
- returnArray[h,returnCount+6] = 1
- else:
- returnArray[:,returnCount] = inputArray[:,i]
- if i == 1:
- returnCount += 5
- elif i == 2:
- returnCount += 3
- elif i == 5:
- returnCount += 9
- elif i in (6,17,28,29):
- returnCount += 6
- returnCount += 1
- returnArray = np.delete(returnArray, 0, 1) # removes attribute of countries
- returnArray = np.delete(returnArray, np.array([5,6,7,8,9,10,11]), 1) # removes attribute of religion
- returnArray = np.delete(returnArray, 3, 1) # removes attribute of population
- return returnArray
- dataOutOfK_ALL = outOfK(dataInArray, attributes, observations)
- outOfKAttributes = np.array(['Landmass','Geographic quadrant','Area[km2]','Population[millions]','Language','Religion','Bars'
- , 'Stripes','colours','red','green','blue','gold','white','black','orange/brown','predominant colour',
- 'Circles','Crosses','Saltires','Quarters','Sunstars','Crecent moon','Triangle','Icon','Animate','text',
- 'Topleft-colour','Bottomright-colour'])
- definingLandmass = np.array(['North America','South America','Europe','Africa','Asia','Oceania'])
- definingZone = np.array(['NE','SE','SW','NW'])
- #definingArea
- definingPopulation = np.array(['<1m', '1-5m','6-10m','11-20m','21-50m','>50m'])
- definingLanguage = np.array(['English','Spanish','French','German','Slavic','Other European','Chineses','Arabic','Japenese, \nTurkish, \nMagyar','Others'])
- definingReligion = np.array(['Catholic','Other Christian','Muslim','Buddhist','Hindu','Ethnic','Marxist'])
- ### SETUP FOR STATISTICS ###
- # Set 'columnnumber' to the desired column to investigate
- # Set 'tal' to:
- # 1: if number
- # 0: if text
- columnnumber = 1
- tal = 1
- # Ejects the desired data
- if (tal == 1):
- vektorTest = dataInArray[:, columnnumber].astype(np.int)
- else:
- vektorTest = dataInArray[:, columnnumber]
- # print means/variance etc.
- ### SETUP FOR PCA FOR ALL ###
- # Doing PCA with normalising for 'Flag'
- #1) Subtracts mean from each and devides med standard deviation
- for i in range(len(dataOutOfK_ALL[0, :])):
- if (i >= 0 and i <= 5): # Landmass
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(6)
- elif (i >= 6 and i <= 9): # zone
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(4)
- elif (i >= 11 and i <= 20): # language
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(10)
- elif (i >= 30 and i <= 36): # Mainhue
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(7)
- elif (i >= 59):
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(7)
- else:
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / np.std(dataOutOfK_ALL[:, i])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement