Advertisement
Guest User

Untitled

a guest
Oct 18th, 2019
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.27 KB | None | 0 0
  1. def semantic_search_sif(search_string, sif_model, data, em_model, indexed_docs, n_top=10):
  2. process_search_str = search_string.split()
  3. matching_idx = []
  4. matched_data = sif_model.sv.similar_by_sentence(sentence=process_search_str,
  5. model=sif_model, indexable=indexed_docs.items,
  6. topn=n_top)
  7. for match in matched_data:
  8. matching_idx.append((match[1], match[2]))
  9.  
  10. result_df = pd.DataFrame(columns=['headlines_matched', 'relevant_words', 'similarity'], index=range(n_top))
  11. for i in range(n_top):
  12. result_df['headlines_matched'][i] = data.headline_text[matching_idx[i][0]]
  13. result_df['relevant_words'][i] = get_relevant_words(search_tok=process_search_str,
  14. doc_tok=data.processed_headlines[matching_idx[i][0]],
  15. model=em_model)
  16. result_df['similarity'][i] = matching_idx[i][1]
  17.  
  18. return result_df
  19.  
  20. search_headline = input("Search headline: ")
  21. results = semantic_search_sif(search_string=search_headline, sif_model=sif_model, data=data,
  22. em_model=em_model, indexed_docs=headlines_idx)
  23. print(results)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement