Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.metrics.pairwise import cosine_similarity
- from tqdm import tqdm
- def calc_scores(y_pred, y_true):
- scores = []
- for i in range(y_pred.shape[0]):
- scores.append( cosine_similarity( y_pred[np.newaxis, i, :] , y_true[np.newaxis, i, :])[0][0] )
- return scores
- def seek_ratio(embeddings1, embeddings2, embeddings3, embeddings4, name1, name2, name3, name4):
- ratios = np.arange(0, 1.02, 0.02)
- CVs = []
- for r1 in tqdm(ratios):
- for r2 in ratios:
- for r3 in ratios:
- r4 = 1 - r1 - r2 - r3
- if r4 < 0:
- continue
- embeddings = embeddings1 * r1 + embeddings2 * r2 + embeddings3 * r3 + embeddings4 * r4
- CVs.append( np.array( calc_scores(embeddings, y_true) ).mean() )
- best_index = np.argmax( np.array(CVs) )
- best_CV = CVs[best_index]
- best_ratio1 = ratios[best_index // (len(ratios) ** 3)]
- best_ratio2 = ratios[(best_index // (len(ratios) ** 2)) % len(ratios)]
- best_ratio3 = ratios[(best_index // len(ratios)) % len(ratios)]
- best_ratio4 = ratios[best_index % len(ratios)]
- print(f"Best CV: {best_CV:0.5f} [emb1[{name1}]={best_ratio1:.2f}, emb2[{name2}]={best_ratio2:.2f}, "
- f"emb3[{name3}]={best_ratio3:.2f}, emb4[{name4}]={best_ratio4:.2f}]")
- return best_ratio1, best_ratio2, best_ratio3, best_ratio4, best_CV
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement