Advertisement
Guest User

Untitled

a guest
Feb 20th, 2019
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.50 KB | None | 0 0
  1. bigram_measures = collocations.BigramAssocMeasures()
  2.  
  3. finder = nltk.BigramCollocationFinder.from_words(full_text)
  4. finder.apply_word_filter(lambda x: x in stopwords)
  5. scored = finder.score_ngrams(bigram_measures.pmi)
  6. for bscore in scored[:30]:
  7. print (bscore)
  8.  
  9. (('x02tñx7f¼éx1aaùõx8d¶rwìiìñó', 'x10œø'), 22.60745494022481)
  10. (('x10Ͽ', 'x17'), 22.60745494022481)
  11. (('x17', 'y.¾ƒe'), 22.60745494022481)
  12. (("'07", "'08"), 22.60745494022481)
  13. (("'20s", "'30s"), 22.60745494022481)
  14. (("'24-jan-2018", "'24/01/2018"), 22.60745494022481)
  15. (("'42", 'salko'), 22.60745494022481)
  16. (("'acclaimed", 'musician/'), 22.60745494022481)
  17. (("'adiye", 'manam'), 22.60745494022481)
  18. (("'afflict", "'inflict"), 22.60745494022481)
  19. (("'allegretto", 'tranquillo'), 22.60745494022481)
  20. (("'amar", 'maruf'), 22.60745494022481)
  21. (("'anekantwad", "'syadvada"), 22.60745494022481)
  22. (("'anger", "'anticipation"), 22.60745494022481)
  23. (("'annum", "'year"), 22.60745494022481)
  24. (("'anti-fracking", 'anti-pipeline'), 22.60745494022481)
  25. (("'anyway", "'anyways"), 22.60745494022481)
  26. (("'apoapsis", "'periapsis"), 22.60745494022481)
  27. (("'association", "'sponsors"), 22.60745494022481)
  28. (("'audacious", "'audacity"), 22.60745494022481)
  29. (("'babu", "'shona"), 22.60745494022481)
  30. (("'baklava", "'balaclava"), 22.60745494022481)
  31. (("'baniya", "'ambani"), 22.60745494022481)
  32. (("'bet", "'cast"), 22.60745494022481)
  33. (("'bhakt", "'chamcha"), 22.60745494022481)
  34. (("'bheege", 'honth'), 22.60745494022481)
  35. (("'blinded", 'beleif'), 22.60745494022481)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement