import pandas as pd import cv2 from sklearn import preprocessing from sklearn.neighbors import KNeighborsClassifier from scipy import stats from sklearn.naive_bayes import GaussianNB from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC import statistics from sklearn.metrics import pairwise_distances_argmin_min import numpy as np from random import randint import matplotlib.pyplot as plt import numpy as np import cv2 from sklearn.decomposition import PCA import glob from sklearn.metrics import pairwise_distances_argmin_min from sklearn.metrics import accuracy_score from operator import itemgetter # função que calcula a quantidade de elementos por classe # a partir do data frame original, o nome da classe e a classificação def quantidade_por_classe (dados, nome_classe, classe): cont = 0 for x in range(len(dados.index)): if (dados[nome_classe].iloc[x] == classe): cont += 1 return cont # função de inicialização do algoritmo KNN , recebe o k # e a distância que vai ser usada como referência def inicializacao_KNN (k): knn = KNeighborsClassifier(n_neighbors=k, metric='euclidean') return knn # função de normalização dos dados usando o modelo de # normalização por reescala def normalizar(dados): x = dados.values min_max_scaler = preprocessing.MinMaxScaler() x_scaled = min_max_scaler.fit_transform(x) dados_norm = pd.DataFrame(x_scaled) return dados_norm # função que calcula os valores de média, moda , mediana e desvio padrão # para os vetores de acerto de um algoritmo, recebe como parâmetro o vetor # de acerto e o nome do algoritmo def tendencia_central (nomeAlgoritmo, vetorAcerto, vetorTempo): print('________________________________________________\n') print(nomeAlgoritmo) print('Tempo Média = ', np.mean(vetorTempo)) print('Tempo Desvio padrão = ', statistics.pstdev(vetorTempo)) print('Tempo Moda = ', stats.mode(vetorTempo, axis=None)) print('Tempo Mediana =', np.median(vetorTempo)) print('----------------------------------------------') print('Acurácia Média = ', np.mean(vetorAcerto)) print('Acurácia Desvio padrão = ', statistics.pstdev(vetorAcerto)) print('Acurácia Moda = ', stats.mode(vetorAcerto, axis=None)) print('Acurácia Mediana = ', np.median(vetorAcerto)) print('________________________________________________\n') # função que cria amostras estratificadas a partir # dos Data frame, o tamanho desejado para a amostra # e a classe dos dados def amostra_estrat(dados, tamanho_amostra, classe): classes = dados[classe].unique() qtde_por_classe = round(tamanho_amostra / len(classes)) amostras_por_classe = [] for c in classes: indices_c = dados[classe] == c obs_c = dados[indices_c] amostra_c = obs_c.sample(qtde_por_classe) amostras_por_classe.append(amostra_c) amostra_estratificada = pd.concat(amostras_por_classe) return amostra_estratificada # função que realiza o treinamento dos algoritmos usados na base de dados def treinaralgoritmos(noclass_train, class_train , tree, knnp1 , knnp2 , knnp3, knn1 , knn2 , knn3, naive, svmlinear , svmrbf): knn1.fit(noclass_train, class_train) knn2.fit(noclass_train, class_train) knn3.fit(noclass_train, class_train) naive.fit(noclass_train, class_train) tree.fit(noclass_train, class_train) knnp1.fit(noclass_train, class_train) knnp2.fit(noclass_train, class_train) knnp3.fit(noclass_train, class_train) svmlinear.fit(noclass_train, class_train) svmrbf.fit(noclass_train, class_train) # função de inicialização do algoritmo KNN Ponderado # recebe como parâmentro o valor do k def inicializando_KNNW (k): knnp = KNeighborsClassifier(n_neighbors=k, weights='distance', metric='euclidean') return knnp def geneticlabels(dataframe,centers): return pairwise_distances_argmin_min(dataframe,centers,metric='minkowski') def find_clusters(X, n_clusters, rng, max_it): i = rng.permutation(X.shape[0])[:n_clusters] centers = X[i] max_iterator = 0 distances = [] while True: labels,distance = pairwise_distances_argmin_min(X,centers,metric='minkowski') distances.append(distance) new_centers = np.array([X[labels == i].mean(0) for i in range(n_clusters)]) if np.all(centers == new_centers) or max_iterator > max_it: break centers = new_centers max_iterator += 1 return centers, labels, distances def find_clustersGENETIC(X, n_clusters, max_it, array): centers = array max_iterator = 0 distances = [] while True: labels,distance = pairwise_distances_argmin_min(X,centers,metric='minkowski') distances.append(distance) new_centers = np.array([X[labels == i].mean(0) for i in range(n_clusters)]) if np.all(centers == new_centers) or max_iterator > max_it: break centers = new_centers max_iterator += 1 return centers, labels, distances # Carregando fotos da pasta def loadFiles(path, array): for i in glob.glob(path): img = cv2.imread(i) array.append(img) return array # Função que aplic o filtro blur nas fotos do array def blurConversion(arrayphotos ,val1, val2): for x in range(len(arrayphotos)): arrayphotos[x] = cv2.GaussianBlur(arrayphotos[x],(val1,val1), val2) return arrayphotos #Função que faz a binarização das fotos def binaryConversion(arrayphotos,threshold,val1): for x in range(len(arrayphotos)): arrayphotos[x] = cv2.adaptiveThreshold(arrayphotos[x],threshold,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,val1,10) return arrayphotos #Função que inverte as fotos binárias def invertConversion(arrayphotos): for x in range(len(arrayphotos)): arrayphotos[x] = cv2.bitwise_not(arrayphotos[x]) return arrayphotos # Função que faz o filtro cinza nas fotos def grayConversion(arrayphotos): size = len(arrayphotos) for x in range (0,size): arrayphotos[x] = cv2.cvtColor(arrayphotos[x], cv2.COLOR_BGR2GRAY) return arrayphotos # Função de extração de características def extractCarac (imagensVector): shapesTotal = [] for i in imagensVector: shape1 = [] im2, contours, hierarchy = cv2.findContours(i, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) perimeter = cv2.arcLength(contours[0], True) approx = cv2.approxPolyDP(contours[0], 0.04*perimeter, True) #area = cv2.contourArea(contours[0]) #shape1.append(area) shape1.append(perimeter) shape1.append(len(approx)) m = cv2.moments(i) #cX = int((m['m10']/m['m00'])) #cY = int((m['m01']/m['m00'])) #shape1.append(cX) #shape1.append(cY) moments = cv2.HuMoments(m, True).flatten() for x in moments: shape1.append(x) shapesTotal.append(shape1) return shapesTotal def WCSSgenetic(x, population): arrayint = [] for a in x: arrayint.append(int(a)) print(arrayint) soma = 0 for b in arrayint: labels, distances = pairwise_distances_argmin_min(population[b],population, metric='minkowski') for x in distances: soma += x**2 return soma def generatepopulation(X,numberpopu, K, rng): population = [] for x in range(numberpopu): first = rng.permutation(X.shape[0])[:K] print(first) population.append(np.concatenate(X[first])) return population def find_clusters(X, n_clusters, rng, max_it): i = rng.permutation(X.shape[0])[:n_clusters] centers = X[i] max_iterator = 0 distances = [] while True: labels,distance = pairwise_distances_argmin_min(X,centers,metric='minkowski') distances.append(distance) new_centers = np.array([X[labels == i].mean(0) for i in range(n_clusters)]) if np.all(centers == new_centers) or max_iterator > max_it: break centers = new_centers max_iterator += 1 return centers, labels, distances def find_clustersgenetic(X, n_clusters, max_it, array): centers = array max_iterator = 0 distances = [] while True: labels,distance = pairwise_distances_argmin_min(X,centers,metric='minkowski') distances.append(distance) new_centers = np.array([X[labels == i].mean(0) for i in range(n_clusters)]) if np.all(centers == new_centers) or max_iterator > max_it: break centers = new_centers max_iterator += 1 return centers, labels, distances #FUNÇÃO DE NORMALIZAÇÃO def normalize(df1): x = df1.values.astype(float) min_max_scaler = preprocessing.MinMaxScaler() scaled = min_max_scaler.fit_transform(x) df_normalized = pd.DataFrame(scaled) return df_normalized def normalizearray(array): scaled = preprocessing.MinMaxScaler().fit_transform(array) return scaled def normalizeArrayofArrays(arrayphotos): size = len(arrayphotos) for x in range (0,size): arrayphotos[x] = preprocessing.MinMaxScaler().fit_transform(arrayphotos[x]) return arrayphotos def PCAarray(pcanumber, arrayphotos): pca = PCA(n_components=pcanumber) size = len(arrayphotos) for x in range (0,size): arrayphotos[x] = pca.fit_transform(arrayphotos[x]) return arrayphotos def PCAarrayONLY(pcanumber, arrayphotos): pca = PCA(n_components=pcanumber) arrayphotos = pca.fit_transform(arrayphotos) return arrayphotos def PCAdataframe(pcanumber,dataframe): pca = PCA(n_components=pcanumber) dataframe = pca.fit_transform(dataframe) return dataframe def pca1darray(pcanumber,array): pca = PCA(n_components=pcanumber) pca.fit(array) array = pca.transform(array) return array def Turntogray(arrayphotos): size = len(arrayphotos) for x in range (0,size): arrayphotos[x] = cv2.cvtColor(arrayphotos[x], cv2.COLOR_BGR2GRAY) return arrayphotos def reshape2dto1d(arrayphotos): size = len(arrayphotos) for x in range (0,size): arrayphotos[x] = arrayphotos[x].ravel() return arrayphotos def resizephotos(arrayphotos, size1, size2): size = len(arrayphotos) for x in range (0,size): arrayphotos[x] = cv2.resize(arrayphotos[x], (size1,size2)) return arrayphotos def gaussianblurArray(arrayphotos,val1,val2): for x in range(len(arrayphotos)): arrayphotos[x] = cv2.GaussianBlur(arrayphotos[x],(val1,val1), val2) return arrayphotos def binaryadaptive(arrayphotos,threshold,val1): for x in range(len(arrayphotos)): arrayphotos[x] = cv2.adaptiveThreshold(arrayphotos[x],threshold,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,val1,10) return arrayphotos def invertbinaryphotos(arrayphotos): for x in range(len(arrayphotos)): arrayphotos[x] = cv2.bitwise_not(arrayphotos[x]) return arrayphotos def loadfolderimgs(path): arrayphotos = [] for img in glob.glob(path): n = cv2.imread(img) arrayphotos.append(n) return arrayphotos def reshape3dto2d(arrayphotos): size = len(arrayphotos) for x in range (0 , size): arrayphotos[x] = np.reshape(arrayphotos[x], (arrayphotos[x].shape[0], (arrayphotos[x].shape[1]*arrayphotos[x].shape[2]))) return arrayphotos def imgtoarray(arrayphotos): size = len(arrayphotos) for x in range (0,size): arrayphotos[x] = np.array(arrayphotos[x] , dtype=float) return arrayphotos def sliding_window(image, stepSize, windowSize): for y in range(0, image.shape[0], stepSize): for x in range(0, image.shape[1], stepSize): yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]]) def graphicPCA(pca): plt.figure() plt.plot(np.cumsum(pca.explained_variance_ratio_)) plt.xlabel('Number of Components') plt.ylabel('Variance (%)') # for each component plt.title('Dataset Explained Variance') plt.show() def gethumoments(arrayphotos): for x in range(len(arrayphotos)): arrayphotos[x] = cv2.HuMoments(cv2.moments(arrayphotos[x]), True).flatten() return arrayphotos def getHOG(arrayphotos): for x in range(len(arrayphotos)): hog = cv2.HOGDescriptor() arrayphotos[x] = hog.compute(arrayphotos[x]).flatten() return arrayphotos def getHOGplusHU(arrayphotos): hog = cv2.HOGDescriptor() for x in range(len(arrayphotos)): aux = [] h = hog.compute(arrayphotos[x]).flatten() for ho in h: aux.append(ho) hu = cv2.HuMoments(cv2.moments(arrayphotos[x]), True).flatten() for huu in hu: aux.append(huu) arrayphotos[x] = aux return arrayphotos def getHOGplusHU2(arrayphotos): hogarray = [] huarray = [] hog = cv2.HOGDescriptor() for x in range(len(arrayphotos)): hogarray.append(hog.compute(arrayphotos[x]).flatten()) hogarray = pd.DataFrame(hogarray) hogarray = PCAdataframe(50,hogarray) for y in range(len(arrayphotos)): huarray.append(cv2.HuMoments(cv2.moments(arrayphotos[y]),True).flatten()) for h in range(len(arrayphotos)): arrayphotos[h] = np.concatenate((hogarray[h],huarray[h])) return arrayphotos def extratorcaracteristicafiggeometrica(arrayimgs): squarescarac = [] for x in arrayimgs: aux = [] im2, countours, hierachy = cv2.findContours(x, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) peri = cv2.arcLength(countours[0], True) # perimetro aux.append(peri) aproxx = cv2.approxPolyDP(countours[0], 0.04 * peri, True) # vertices vertc = len(aproxx) aux.append(vertc) area = cv2.contourArea(countours[0]) # area aux.append(area) momentum = cv2.moments(x) #cX = int(momentum["m10"] / momentum["m00"]) #cY = int(momentum["m01"] / momentum["m00"]) #aux.append(cX) #aux.append(cY) moments = cv2.HuMoments(momentum, True).flatten() for i in moments: aux.append(i) squarescarac.append(aux) return squarescarac def initialize_dic_majority(classes): majority = {} for c in classes: majority[c] = 0 return majority def accuracy_majority_vote(base, predict_labels, real_labels, n_clusters): classes = real_labels.unique() majority = [] groups = [] k = 0 for i in range(n_clusters): group = [] for a in range(len(base)): if predict_labels[a] == i: group.append(real_labels[a]) groups.append(group) majority.append(initialize_dic_majority(classes)) for real_label in group: majority[k][real_label] += 1 k += 1 label_groups = [] for m in majority: label_groups.append(max(m.items(), key=itemgetter(1))[0]) pred_labels = [] true_labels = [] for g in range(len(groups)): pred_labels = pred_labels + ([label_groups[g]]*len(groups[g])) true_labels = true_labels + [a for a in groups[g]] return accuracy_score(pred_labels, true_labels)