from sklearn.metrics.pairwise import cosine_similarity from tqdm import tqdm def calc_scores(y_pred, y_true): scores = [] for i in range(y_pred.shape[0]): scores.append( cosine_similarity( y_pred[np.newaxis, i, :] , y_true[np.newaxis, i, :])[0][0] ) return scores def seek_ratio(embeddings1, embeddings2, embeddings3, embeddings4, name1, name2, name3, name4): ratios = np.arange(0, 1.02, 0.02) CVs = [] for r1 in tqdm(ratios): for r2 in ratios: for r3 in ratios: r4 = 1 - r1 - r2 - r3 if r4 < 0: continue embeddings = embeddings1 * r1 + embeddings2 * r2 + embeddings3 * r3 + embeddings4 * r4 CVs.append( np.array( calc_scores(embeddings, y_true) ).mean() ) best_index = np.argmax( np.array(CVs) ) best_CV = CVs[best_index] best_ratio1 = ratios[best_index // (len(ratios) ** 3)] best_ratio2 = ratios[(best_index // (len(ratios) ** 2)) % len(ratios)] best_ratio3 = ratios[(best_index // len(ratios)) % len(ratios)] best_ratio4 = ratios[best_index % len(ratios)] print(f"Best CV: {best_CV:0.5f} [emb1[{name1}]={best_ratio1:.2f}, emb2[{name2}]={best_ratio2:.2f}, " f"emb3[{name3}]={best_ratio3:.2f}, emb4[{name4}]={best_ratio4:.2f}]") return best_ratio1, best_ratio2, best_ratio3, best_ratio4, best_CV