Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf8 -*-
- #post: PoS Tagger
- '''Based on Overture2112's Sentence Gloss plugin, which was ported to the new Anki version by Kenishi.
- Makes use of Masato Hagiwara's Rakuten MA, which was ported to Python by Yukino Ikegami (https://pypi.python.org/pypi/rakutenma).
- For both versions: Apache License version 2.0
- Rakuten MA Python (c) 2015- Yukino Ikegami. All Rights Reserved.
- Rakuten MA (original) (c) 2014 Rakuten NLP Project. All Rights Reserved.
- '''
- import subprocess, re, os
- from rakutenma import RakutenMA
- from PyQt4.QtCore import *
- from PyQt4.QtGui import *
- from anki.hooks import addHook
- from anki.notes import Note
- from anki.utils import stripHTML
- from aqt import mw
- from aqt.utils import showText
- def calcPOS( expr ):
- rma = RakutenMA()
- rma = RakutenMA(phi=1024, c=0.007812)
- tD, tF = os.path.split(__file__)
- # "model_ja.json" is available at https://github.com/rakuten-nlp/rakutenma
- # "model_ja.min.jason" (provided in this add-on) is minified version (feature quantization applied)
- jSon = os.path.join(tD, 'model_ja.min.json')
- rma.load(jSon)
- result = rma.tokenize( expr )
- newString = ''
- for i in result:
- wPoS = i[0] + ' [' + i[1] + ']'
- newString += u'\u3000' + wPoS
- resultPoS = newString[1:]
- return resultPoS
- def calcNote( f ):
- if f['PoS']: return
- f['PoS'] = calcPOS( f['Expression'] )
- def setupMenu( ed ):
- ed.form.menuEdit.addSeparator()
- a = QAction( 'Get PoS', ed )
- ed.connect( a, SIGNAL( 'triggered()' ), lambda e=ed: onCalc( e ) )
- ed.form.menuEdit.addSeparator()
- ed.form.menuEdit.addAction( a )
- def onCalc( ed ):
- n = 'Get PoS'
- ed.editor.saveNow()
- calcGet(ed, ed.selectedNotes() )
- mw.requireReset()
- def refreshSession():
- mw.col.s.flush()
- def calcGet( ed, fids ):
- mw.progress.start( max=len( fids ) , immediate=True)
- for (i,fid) in enumerate( fids ):
- mw.progress.update( label='Processing... ', value=i )
- f = mw.col.getNote(id=fid)
- try: calcNote( f )
- except:
- import traceback
- print 'Processing failed: '
- traceback.print_exc()
- try: f.flush()
- except:
- raise Exception()
- ed.onRowChanged(f,f)
- mw.progress.finish()
- addHook( 'browser.setupMenus', setupMenu )
- '''
- PoS tag list in Japanese and correspondence to BCCWJ tags
- Tag Original JA name English
- A-c 形容詞-一般 Adjective-Common
- A-dp 形容詞-非自立可能 Adjective-Dependent
- C 接続詞 Conjunction
- D 代名詞 Pronoun
- E 英単語 English word
- F 副詞 Adverb
- I-c 感動詞-一般 Interjection-Common
- J-c 形状詞-一般 Adjectival Noun-Common
- J-tari 形状詞-タリ Adjectival Noun-Tari
- J-xs 形状詞-助動詞語幹 Adjectival Noun-AuxVerb stem
- M-aa 補助記号-AA Auxiliary sign-AA
- M-c 補助記号-一般 Auxiliary sign-Common
- M-cp 補助記号-括弧閉 Auxiliary sign-Open Parenthesis
- M-op 補助記号-括弧開 Auxiliary sign-Close Parenthesis
- M-p 補助記号-句点 Auxiliary sign-Period
- N-n 名詞-名詞的 Noun-Noun
- N-nc 名詞-普通名詞 Noun-Common Noun
- N-pn 名詞-固有名詞 Noun-Proper Noun
- N-xs 名詞-助動詞語幹 Noun-AuxVerb stem
- O その他 Others
- P 接頭辞 Prefix
- P-fj 助詞-副助詞 Particle-Adverbial
- P-jj 助詞-準体助詞 Particle-Phrasal
- P-k 助詞-格助詞 Particle-Case Marking
- P-rj 助詞-係助詞 Particle-Binding
- P-sj 助詞-接続助詞 Particle-Conjunctive
- Q-a 接尾辞-形容詞的 Suffix-Adjective
- Q-j 接尾辞-形状詞的 Suffix-Adjectival Noun
- Q-n 接尾辞-名詞的 Suffix-Noun
- Q-v 接尾辞-動詞的 Suffix-Verb
- R 連体詞 Adnominal adjective
- S-c 記号-一般 Sign-Common
- S-l 記号-文字 Sign-Letter
- U URL URL
- V-c 動詞-一般 Verb-Common
- V-dp 動詞-非自立可能 Verb-Dependent
- W 空白 Whitespace
- X 助動詞 AuxVerb
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement