Advertisement
Guest User

Untitled

a guest
Sep 18th, 2024
27
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.76 KB | None | 0 0
  1. #!/bin/python3
  2.  
  3. # Example code for saving embeddings to CSV
  4. import sys
  5. import csv
  6. import openai
  7. import pandas as pd
  8. import numpy as np
  9. from sklearn.metrics.pairwise import cosine_similarity
  10.  
  11. csv_filename = sys.argv[1]
  12. search_phrase = sys.argv[2]
  13. top_n = sys.argv[3]
  14. top_n = int(top_n)
  15.  
  16. search_phrase = search_phrase.lower()
  17. client = openai.OpenAI(base_url="http://localhost:9090/v1", api_key="lm-studio")
  18.  
  19.  
  20. def load_embeddings_from_csv(csv_filename):
  21.     df = pd.read_csv(csv_filename, header=None)
  22.     texts = df[0].tolist()
  23.     embeddings = np.array(df[1].apply(eval).tolist())
  24.     return texts, embeddings
  25.  
  26. def find_relevant_texts(new_text, texts, embeddings, client, top_n):
  27.     # Generate embedding for new text
  28.     response = client.embeddings.create(
  29.         model="nomic-embed-text-v1.5.q8_0",
  30.         input=[new_text]
  31.     )
  32.     new_embedding = np.array(response.data[0].embedding).reshape(1, -1)
  33.    
  34.     # Compute cosine similarities
  35.     similarities = cosine_similarity(new_embedding, embeddings).flatten()
  36.    
  37.     # Find the most similar text
  38. #    best_match_idx = np.argmax(similarities)
  39. #    return texts[best_match_idx], similarities[0, best_match_idx]
  40.     top_indices = similarities.argsort()[-top_n:][::-1]
  41.    
  42.     # Collect the top N results
  43.     top_results = [(texts[idx], similarities[idx]) for idx in top_indices]
  44.    
  45.     return top_results
  46.  
  47. texts, embeddings = load_embeddings_from_csv(csv_filename)
  48. #relevant_text, similarity = find_relevant_texts(search_phrase, texts, embeddings, client, top_n)
  49. #print(f"{relevant_text} & {similarity}")
  50. top_results = find_relevant_texts(search_phrase, texts, embeddings, client, top_n)
  51. for i, (text, similarity) in enumerate(top_results):
  52.     print(f"{i + 1}:\"{text}\":{similarity:.2f}")
  53.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement