Advertisement
Guest User

Untitled

a guest
Mar 20th, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.12 KB | None | 0 0
  1. import numpy as np
  2. import math
  3. import matplotlib.patches as mpatches
  4. from mpl_toolkits.mplot3d import Axes3D
  5. from matplotlib.pyplot import figure, xticks, plot, title, xlabel, ylabel, show, xlim, ylim, legend, scatter, subplot, \
  6. tight_layout, grid, annotate
  7.  
  8.  
  9. def readFile(fileString, numberOfAttributes, numberOfObservations):
  10. returnArray = np.full((numberOfObservations, numberOfAttributes), '', dtype='object')
  11. counter = 0 # Is a incrementing counter, keepin track of position in current row
  12. for i in range(numberOfObservations): # Goes through all observations of the data set
  13. for j in range(numberOfAttributes): # Goes through all attributes of the data set
  14. while (True):
  15. if counter == len(fileString):
  16. return returnArray
  17. if fileString[counter] == ',': # checks, if next attribute is present
  18. counter += 1
  19. break # skips to next attribute
  20. else:
  21. returnArray[i, j] = str(returnArray[i, j]) + str(fileString[counter])
  22. counter += 1
  23. return returnArray
  24.  
  25.  
  26. # Loading and reading file
  27. observations = 194
  28. attributes = 30
  29. test = open('../Data/flag.txt')
  30. text = test.read()
  31. dataInArray = readFile(text, attributes, observations)
  32.  
  33.  
  34. def outOfK(inputArray, numberOfAttributes, numberOfObservations):
  35. lengthOfOutOfK = np.ones(30)
  36. lengthOfOutOfK[1] = 6
  37. lengthOfOutOfK[2] = 4
  38. lengthOfOutOfK[5] = 10
  39. lengthOfOutOfK[6] = 7
  40. lengthOfOutOfK[17] = 7
  41. lengthOfOutOfK[28] = 7
  42. lengthOfOutOfK[29] = 7
  43. returnArray = np.zeros((194, 71)) # creates returnArray
  44. returnCount = 0
  45. for i in range(numberOfAttributes):
  46. if i == 0:
  47. returnCount = returnCount
  48. elif i in (1, 2, 5, 6, 17, 28, 29): # defines the positions to do 1-out-of-K
  49. for j in range(int(lengthOfOutOfK[i])):
  50. if i in (1, 2, 5, 6): # already defined
  51. if i == 6:
  52. for h in range(194):
  53. returnArray[h, returnCount + int((inputArray[h, i]))] = 1
  54. else:
  55. for h in range(194):
  56. returnArray[h, returnCount + int((inputArray[h, i])) - 1] = 1
  57. elif i in (17, 28, 29): # colors
  58. for h in range(194): # goes through all observations
  59. if inputArray[h, i] == 'red':
  60. returnArray[h, returnCount] = 1
  61. if inputArray[h, i] == 'green':
  62. returnArray[h, returnCount + 1] = 1
  63. if inputArray[h, i] == 'blue':
  64. returnArray[h, returnCount + 2] = 1
  65. if inputArray[h, i] == 'gold':
  66. returnArray[h, returnCount + 3] = 1
  67. if inputArray[h, i] == 'white':
  68. returnArray[h, returnCount + 4] = 1
  69. if inputArray[h, i] == 'black':
  70. returnArray[h, returnCount + 5] = 1
  71. if inputArray[h, i] in ('orange', 'brown'):
  72. returnArray[h, returnCount + 6] = 1
  73. else:
  74. returnArray[:, returnCount] = inputArray[:, i]
  75. if i == 1:
  76. returnCount += 5
  77. elif i == 2:
  78. returnCount += 3
  79. elif i == 5:
  80. returnCount += 9
  81. elif i in (6, 17, 28, 29):
  82. returnCount += 6
  83. returnCount += 1
  84. returnArray = np.delete(returnArray, 0, 1) # removes attribute of countries
  85. returnArray = np.delete(returnArray, np.array([5, 6, 7, 8, 9, 10, 11]), 1) # removes attribute of religion
  86. returnArray = np.delete(returnArray, 3, 1) # removes attribute of population
  87.  
  88. return returnArray
  89.  
  90.  
  91. dataOutOfK_ALL = outOfK(dataInArray, attributes, observations)
  92.  
  93. outOfKAttributes = np.array(
  94. ['Landmass', 'Geographic quadrant', 'Area[km2]', 'Population[millions]', 'Language', 'Religion', 'Bars'
  95. , 'Stripes', 'colours', 'red', 'green', 'blue', 'gold', 'white', 'black', 'orange/brown', 'predominant colour',
  96. 'Circles', 'Crosses', 'Saltires', 'Quarters', 'Sunstars', 'Crecent moon', 'Triangle', 'Icon', 'Animate', 'text',
  97. 'Topleft-colour', 'Bottomright-colour'])
  98.  
  99. definingLandmass = np.array(['North America', 'South America', 'Europe', 'Africa', 'Asia', 'Oceania'])
  100. definingZone = np.array(['NE', 'SE', 'SW', 'NW'])
  101. # definingArea
  102. definingPopulation = np.array(['<1m', '1-5m', '6-10m', '11-20m', '21-50m', '>50m'])
  103. definingLanguage = np.array(['English', 'Spanish', 'French', 'German', 'Slavic', 'Other European', 'Chineses', 'Arabic',
  104. 'Japenese, \nTurkish, \nMagyar', 'Others'])
  105. definingReligion = np.array(['Catholic', 'Other Christian', 'Muslim', 'Buddhist', 'Hindu', 'Ethnic', 'Marxist'])
  106.  
  107. ### SETUP FOR STATISTICS ###
  108. # Set 'columnnumber' to the desired column to investigate
  109. # Set 'tal' to:
  110. # 1: if number
  111. # 0: if text
  112.  
  113. columnnumber = 1
  114. tal = 1
  115.  
  116. # Ejects the desired data
  117. if (tal == 1):
  118. vektorTest = dataInArray[:, columnnumber].astype(np.int)
  119. else:
  120. vektorTest = dataInArray[:, columnnumber]
  121.  
  122. # print means/variance etc.
  123.  
  124.  
  125. ### SETUP FOR PCA FOR ALL ###
  126.  
  127. # Doing PCA with normalising for 'Flag'
  128. # 1) Subtracts mean from each and devides med standard deviation
  129. def normalizeSet(data, M):
  130. temp = M -1
  131. for i in range(temp):
  132. if (i >= 0 and i <= 5): # Landmass
  133. data[:, i] = (data[:, i]) / math.sqrt(6)
  134. elif (i >= 6 and i <= 9): # zone
  135. data[:, i] = (data[:, i]) / math.sqrt(4)
  136. elif (i >= 11 and i <= 20): # language
  137. data[:, i] = (data[:, i]) / math.sqrt(10)
  138. elif (i >= 30 and i <= 36): # Mainhue
  139. data[:, i] = (data[:, i]) / math.sqrt(7)
  140. elif (i >= 59):
  141. data[:, i] = (data[:, i]) / math.sqrt(7)
  142. else:
  143. data[:, i] = (data[:, i]) / np.std(data[:, i])
  144. return data
  145.  
  146. #dataOutOfK_ALL[:,10] = dataOutOfK_ALL[:,10] / np.std(dataOutOfK_ALL[:, 10])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement