Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def outOfK (inputArray, numberOfAttributes, numberOfObservations):
- lengthOfOutOfK = np.ones(30)
- lengthOfOutOfK[1]= 6
- lengthOfOutOfK[2] = 4
- lengthOfOutOfK[5] = 10
- lengthOfOutOfK[6] = 7
- lengthOfOutOfK[17] = 7
- lengthOfOutOfK[28] = 7
- lengthOfOutOfK[29] = 7
- returnArray = np.zeros((194,71)) # creates returnArray
- returnCount = 0
- for i in range (numberOfAttributes):
- if i == 0:
- returnCount = returnCount
- elif i in (1,2,5,6,17,28,29): # defines the positions to do 1-out-of-K
- for j in range (int(lengthOfOutOfK[i])):
- if i in (1,2,5,6): # already defined
- if i == 6:
- for h in range (194):
- returnArray[h,returnCount + int((inputArray[h,i]))] = 1
- else:
- for h in range (194):
- returnArray[h,returnCount + int((inputArray[h,i])) - 1] = 1
- elif i in (17,28,29): # colors
- for h in range (194): # goes through all observations
- if inputArray[h,i] == 'red':
- returnArray[h,returnCount] = 1
- if inputArray[h,i] == 'green':
- returnArray[h,returnCount+1] = 1
- if inputArray[h,i] == 'blue':
- returnArray[h,returnCount+2] = 1
- if inputArray[h,i] == 'gold':
- returnArray[h,returnCount+3] = 1
- if inputArray[h,i] == 'white':
- returnArray[h,returnCount+4] = 1
- if inputArray[h,i] == 'black':
- returnArray[h,returnCount+5] = 1
- if inputArray[h,i] in ('orange', 'brown'):
- returnArray[h,returnCount+6] = 1
- else:
- returnArray[:,returnCount] = inputArray[:,i]
- if i == 1:
- returnCount += 5
- elif i == 2:
- returnCount += 3
- elif i == 5:
- returnCount += 9
- elif i in (6,17,28,29):
- returnCount += 6
- returnCount += 1
- returnArray = np.delete(returnArray, 0, 1) # removes attribute of countries
- returnArray = np.delete(returnArray, np.array([5,6,7,8,9,10,11]), 1) # removes attribute of religion
- returnArray = np.delete(returnArray, 3, 1) # removes attribute of population
- return returnArray
- dataOutOfK_ALL = outOfK(dataInArray, attributes, observations)
- #1) Subtracts mean from each and devides med standard deviation
- for i in range(len(dataOutOfK_ALL[0, :])):
- if (i >= 0 and i <= 5): # Landmass
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(6)
- elif (i >= 6 and i <= 9): # zone
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(4)
- elif (i >= 11 and i <= 20): # language
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(10)
- elif (i >= 30 and i <= 36): # Mainhue
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(7)
- elif (i >= 59):
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / math.sqrt(7)
- else:
- dataOutOfK_ALL[:, i] = (dataOutOfK_ALL[:, i] - np.mean(dataOutOfK_ALL[:, i])) / np.std(dataOutOfK_ALL[:, i])
- # 2) Doing SVD
- U, S, V = np.linalg.svd(dataOutOfK_ALL, full_matrices=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement