Advertisement
Guest User

Untitled

a guest
Mar 18th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.17 KB | None | 0 0
  1. import numpy as np
  2. import math
  3. import matplotlib.patches as mpatches
  4. from mpl_toolkits.mplot3d import Axes3D
  5. from matplotlib.pyplot import figure, xticks,plot, title, xlabel, ylabel, show, xlim, ylim, legend, scatter, subplot, tight_layout, grid, annotate
  6.  
  7. def readFile (fileString, numberOfAttributes, numberOfObservations):
  8. returnArray = np.full((numberOfObservations, numberOfAttributes), '',dtype = 'object')
  9. counter = 0 # Is a incrementing counter, keepin track of position in current row
  10. for i in range (numberOfObservations): # Goes through all observations of the data set
  11. for j in range (numberOfAttributes): # Goes through all attributes of the data set
  12. while (True):
  13. if counter == len(fileString):
  14. return returnArray
  15. if fileString[counter] == ',': # checks, if next attribute is present
  16. counter += 1
  17. break # skips to next attribute
  18. else:
  19. returnArray[i,j] = str(returnArray[i,j]) + str(fileString[counter])
  20. counter += 1
  21. return returnArray
  22.  
  23. #Loading and reading file
  24. observations = 194
  25. attributes = 30
  26. test = open('flag.txt')
  27. text = test.read()
  28. dataInArray = readFile(text, attributes, observations)
  29.  
  30. def outOfK (inputArray, numberOfAttributes, numberOfObservations):
  31. lengthOfOutOfK = np.ones(30)
  32. lengthOfOutOfK[1]= 6
  33. lengthOfOutOfK[2] = 4
  34. lengthOfOutOfK[5] = 10
  35. lengthOfOutOfK[6] = 7
  36. lengthOfOutOfK[17] = 7
  37. lengthOfOutOfK[28] = 7
  38. lengthOfOutOfK[29] = 7
  39. returnArray = np.zeros((194,71)) # creates returnArray
  40. returnCount = 0
  41. for i in range (numberOfAttributes):
  42. if i == 0:
  43. returnCount = returnCount
  44. elif i in (1,2,5,6,17,28,29): # defines the positions to do 1-out-of-K
  45. for j in range (int(lengthOfOutOfK[i])):
  46. if i in (1,2,5,6): # already defined
  47. if i == 6:
  48. for h in range (194):
  49. returnArray[h,returnCount + int((inputArray[h,i]))] = 1
  50. else:
  51. for h in range (194):
  52. returnArray[h,returnCount + int((inputArray[h,i])) - 1] = 1
  53. elif i in (17,28,29): # colors
  54. for h in range (194): # goes through all observations
  55. if inputArray[h,i] == 'red':
  56. returnArray[h,returnCount] = 1
  57. if inputArray[h,i] == 'green':
  58. returnArray[h,returnCount+1] = 1
  59. if inputArray[h,i] == 'blue':
  60. returnArray[h,returnCount+2] = 1
  61. if inputArray[h,i] == 'gold':
  62. returnArray[h,returnCount+3] = 1
  63. if inputArray[h,i] == 'white':
  64. returnArray[h,returnCount+4] = 1
  65. if inputArray[h,i] == 'black':
  66. returnArray[h,returnCount+5] = 1
  67. if inputArray[h,i] in ('orange', 'brown'):
  68. returnArray[h,returnCount+6] = 1
  69. else:
  70. returnArray[:,returnCount] = inputArray[:,i]
  71. if i == 1:
  72. returnCount += 5
  73. elif i == 2:
  74. returnCount += 3
  75. elif i == 5:
  76. returnCount += 9
  77. elif i in (6,17,28,29):
  78. returnCount += 6
  79. returnCount += 1
  80. returnArray = np.delete(returnArray, 0, 1) # removes attribute of countries
  81. returnArray = np.delete(returnArray, np.array([5,6,7,8,9,10,11]), 1) # removes attribute of religion
  82. returnArray = np.delete(returnArray, 3, 1) # removes attribute of population
  83.  
  84. return returnArray
  85.  
  86. dataOutOfK_ALL = outOfK(dataInArray, attributes, observations)
  87.  
  88. outOfKAttributes = np.array(['Landmass','Geographic quadrant','Area[km2]','Population[millions]','Language','Religion','Bars'
  89. , 'Stripes','colours','red','green','blue','gold','white','black','orange/brown','predominant colour',
  90. 'Circles','Crosses','Saltires','Quarters','Sunstars','Crecent moon','Triangle','Icon','Animate','text',
  91. 'Topleft-colour','Bottomright-colour'])
  92.  
  93. definingLandmass = np.array(['North America','South America','Europe','Africa','Asia','Oceania'])
  94. definingZone = np.array(['NE','SE','SW','NW'])
  95. #definingArea
  96. definingPopulation = np.array(['<1m', '1-5m','6-10m','11-20m','21-50m','>50m'])
  97. definingLanguage = np.array(['English','Spanish','French','German','Slavic','Other European','Chineses','Arabic','Japenese, \nTurkish, \nMagyar','Others'])
  98. definingReligion = np.array(['Catholic','Other Christian','Muslim','Buddhist','Hindu','Ethnic','Marxist'])
  99.  
  100.  
  101.  
  102.  
  103. ### SETUP FOR STATISTICS ###
  104. # Set 'columnnumber' to the desired column to investigate
  105. # Set 'tal' to:
  106. # 1: if number
  107. # 0: if text
  108.  
  109. columnnumber = 1
  110. tal = 1
  111.  
  112. # Ejects the desired data
  113. if (tal == 1):
  114. vektorTest = dataInArray[:, columnnumber].astype(np.int)
  115. else:
  116. vektorTest = dataInArray[:, columnnumber]
  117.  
  118. # print means/variance etc.
  119.  
  120.  
  121.  
  122.  
  123.  
  124. ### SETUP FOR PCA FOR ALL ###
  125.  
  126. # Doing PCA with normalising for 'Flag'
  127. #1) Subtracts mean from each and devides med standard deviation
  128. for i in range(len(dataOutOfK_ALL[0, :])):
  129. if (i >= 0 and i <= 5): # Landmass
  130. dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(6)
  131. elif (i >= 6 and i <= 9): # zone
  132. dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(4)
  133. elif (i >= 11 and i <= 20): # language
  134. dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(10)
  135. elif (i >= 30 and i <= 36): # Mainhue
  136. dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(7)
  137. elif (i >= 59):
  138. dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(7)
  139. else:
  140. dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / np.std(dataOutOfK_ALL[:, i])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement