Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from lsirina.lsi.similarity import calc_similarity
- d1 = "Shipment of gold damaged in a fire"
- d2 = "Delivery of silver arrived in a silver truck"
- d3 = "Shipment of gold arrived in a truck"
- query = "gold silver truck"
- docs = [d1,d2,d3]
- tokenized_doc = [d.split() for d in docs]
- sim = calc_similarity(query, tokenized_doc)
- sort_by_most_valid = filter(lambda x: x[1] > 0, sorted(enumerate(sim), key=lambda item: -item[1]))
- for index, sim in sort_by_most_valid:
- print "dokumen %s: %s, nilai similaritas: %s" % (index+1, docs[index], sim)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement