Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def semantic_search_sif(search_string, sif_model, data, em_model, indexed_docs, n_top=10):
- process_search_str = search_string.split()
- matching_idx = []
- matched_data = sif_model.sv.similar_by_sentence(sentence=process_search_str,
- model=sif_model, indexable=indexed_docs.items,
- topn=n_top)
- for match in matched_data:
- matching_idx.append((match[1], match[2]))
- result_df = pd.DataFrame(columns=['headlines_matched', 'relevant_words', 'similarity'], index=range(n_top))
- for i in range(n_top):
- result_df['headlines_matched'][i] = data.headline_text[matching_idx[i][0]]
- result_df['relevant_words'][i] = get_relevant_words(search_tok=process_search_str,
- doc_tok=data.processed_headlines[matching_idx[i][0]],
- model=em_model)
- result_df['similarity'][i] = matching_idx[i][1]
- return result_df
- search_headline = input("Search headline: ")
- results = semantic_search_sif(search_string=search_headline, sif_model=sif_model, data=data,
- em_model=em_model, indexed_docs=headlines_idx)
- print(results)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement