Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- bigram_measures = collocations.BigramAssocMeasures()
- finder = nltk.BigramCollocationFinder.from_words(full_text)
- finder.apply_word_filter(lambda x: x in stopwords)
- scored = finder.score_ngrams(bigram_measures.pmi)
- for bscore in scored[:30]:
- print (bscore)
- (('x02tñx7f¼éx1aaùõx8d¶rwìiìñó', 'x10œø'), 22.60745494022481)
- (('x10Ͽ', 'x17'), 22.60745494022481)
- (('x17', 'y.¾ƒe'), 22.60745494022481)
- (("'07", "'08"), 22.60745494022481)
- (("'20s", "'30s"), 22.60745494022481)
- (("'24-jan-2018", "'24/01/2018"), 22.60745494022481)
- (("'42", 'salko'), 22.60745494022481)
- (("'acclaimed", 'musician/'), 22.60745494022481)
- (("'adiye", 'manam'), 22.60745494022481)
- (("'afflict", "'inflict"), 22.60745494022481)
- (("'allegretto", 'tranquillo'), 22.60745494022481)
- (("'amar", 'maruf'), 22.60745494022481)
- (("'anekantwad", "'syadvada"), 22.60745494022481)
- (("'anger", "'anticipation"), 22.60745494022481)
- (("'annum", "'year"), 22.60745494022481)
- (("'anti-fracking", 'anti-pipeline'), 22.60745494022481)
- (("'anyway", "'anyways"), 22.60745494022481)
- (("'apoapsis", "'periapsis"), 22.60745494022481)
- (("'association", "'sponsors"), 22.60745494022481)
- (("'audacious", "'audacity"), 22.60745494022481)
- (("'babu", "'shona"), 22.60745494022481)
- (("'baklava", "'balaclava"), 22.60745494022481)
- (("'baniya", "'ambani"), 22.60745494022481)
- (("'bet", "'cast"), 22.60745494022481)
- (("'bhakt", "'chamcha"), 22.60745494022481)
- (("'bheege", 'honth'), 22.60745494022481)
- (("'blinded", 'beleif'), 22.60745494022481)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement