Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import MeCab, re
- def getMorph(sentence):
- tagger=MeCab.Tagger()
- node=tagger.parseToNode(sentence).next
- morph=[]
- while node:
- morph.append(unicode(node.surface, "utf8"))
- node=node.next
- return morph
- def cntword(words):
- word_count=[]
- for w in list(set(words)):
- word_count.append([w, words.count(w)])
- return sorted(word_count, key=lambda x: x[1], reverse=True)
- with open("neko.txt", "r") as rfp:data=rfp.readlines()
- morph=getMorph("".join(data))
- cnt=cntword(morph)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement