Advertisement
Guest User

PoS-only vers. of postLexical

a guest
Feb 18th, 2016
46
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.87 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf8 -*-
  3. #post: PoS Tagger
  4.  
  5. '''Based on Overture2112's Sentence Gloss plugin, which was ported to the new Anki version by Kenishi.
  6.   Makes use of Masato Hagiwara's Rakuten MA, which was ported to Python by Yukino Ikegami (https://pypi.python.org/pypi/rakutenma).
  7.   For both versions: Apache License version 2.0
  8.   Rakuten MA Python (c) 2015- Yukino Ikegami. All Rights Reserved.
  9.   Rakuten MA (original) (c) 2014 Rakuten NLP Project. All Rights Reserved.
  10. '''
  11.  
  12. import subprocess, re, os
  13. from rakutenma import RakutenMA
  14. from PyQt4.QtCore import *
  15. from PyQt4.QtGui import *
  16. from anki.hooks import addHook
  17. from anki.notes import Note
  18. from anki.utils import stripHTML
  19. from aqt import mw
  20. from aqt.utils import showText
  21.  
  22. def calcPOS( expr  ):
  23.         rma = RakutenMA()
  24.         rma = RakutenMA(phi=1024, c=0.007812)
  25.         tD, tF = os.path.split(__file__)
  26.         # "model_ja.json" is available at https://github.com/rakuten-nlp/rakutenma
  27.         # "model_ja.min.jason" (provided in this add-on) is minified version (feature quantization applied)
  28.         jSon = os.path.join(tD, 'model_ja.min.json')
  29.         rma.load(jSon)
  30.     result = rma.tokenize( expr )
  31.         newString = ''
  32.         for i in result:
  33.             wPoS = i[0] + ' [' + i[1] + ']'
  34.             newString += u'\u3000' + wPoS
  35.         resultPoS = newString[1:]
  36.     return resultPoS
  37.  
  38. def calcNote( f ):
  39.    if f['PoS']: return
  40.    f['PoS'] = calcPOS( f['Expression'] )
  41.  
  42. def setupMenu( ed ):
  43.     ed.form.menuEdit.addSeparator()
  44.     a = QAction( 'Get PoS', ed )
  45.     ed.connect( a, SIGNAL( 'triggered()' ), lambda e=ed: onCalc( e ) )
  46.     ed.form.menuEdit.addSeparator()
  47.     ed.form.menuEdit.addAction( a )
  48.  
  49. def onCalc( ed ):
  50.     n = 'Get PoS'
  51.     ed.editor.saveNow()
  52.     calcGet(ed, ed.selectedNotes() )  
  53.     mw.requireReset()
  54.  
  55. def refreshSession():
  56.     mw.col.s.flush()
  57.    
  58. def calcGet( ed, fids ):
  59.     mw.progress.start( max=len( fids ) , immediate=True)
  60.     for (i,fid) in enumerate( fids ):
  61.         mw.progress.update( label='Processing... ', value=i )
  62.         f = mw.col.getNote(id=fid)
  63.         try: calcNote( f )
  64.         except:
  65.             import traceback
  66.             print 'Processing failed: '
  67.             traceback.print_exc()
  68.         try: f.flush()
  69.         except:
  70.             raise Exception()
  71.         ed.onRowChanged(f,f)
  72.     mw.progress.finish()
  73.  
  74. addHook( 'browser.setupMenus', setupMenu )
  75.  
  76. '''
  77. PoS tag list in Japanese and correspondence to BCCWJ tags
  78. Tag     Original JA name    English
  79. A-c     形容詞-一般    Adjective-Common
  80. A-dp    形容詞-非自立可能   Adjective-Dependent
  81. C   接続詞   Conjunction
  82. D   代名詞   Pronoun
  83. E   英単語   English word
  84. F   副詞  Adverb
  85. I-c     感動詞-一般    Interjection-Common
  86. J-c     形状詞-一般    Adjectival Noun-Common
  87. J-tari  形状詞-タリ    Adjectival Noun-Tari
  88. J-xs    形状詞-助動詞語幹   Adjectival Noun-AuxVerb stem
  89. M-aa    補助記号-AA     Auxiliary sign-AA
  90. M-c     補助記号-一般     Auxiliary sign-Common
  91. M-cp    補助記号-括弧閉  Auxiliary sign-Open Parenthesis
  92. M-op    補助記号-括弧開  Auxiliary sign-Close Parenthesis
  93. M-p     補助記号-句点     Auxiliary sign-Period
  94. N-n     名詞-名詞的    Noun-Noun
  95. N-nc    名詞-普通名詞     Noun-Common Noun
  96. N-pn    名詞-固有名詞     Noun-Proper Noun
  97. N-xs    名詞-助動詞語幹  Noun-AuxVerb stem
  98. O   その他   Others
  99. P   接頭辞   Prefix
  100. P-fj    助詞-副助詞    Particle-Adverbial
  101. P-jj    助詞-準体助詞     Particle-Phrasal
  102. P-k     助詞-格助詞    Particle-Case Marking
  103. P-rj    助詞-係助詞    Particle-Binding
  104. P-sj    助詞-接続助詞     Particle-Conjunctive
  105. Q-a     接尾辞-形容詞的  Suffix-Adjective
  106. Q-j     接尾辞-形状詞的  Suffix-Adjectival Noun
  107. Q-n     接尾辞-名詞的     Suffix-Noun
  108. Q-v     接尾辞-動詞的     Suffix-Verb
  109. R   連体詞   Adnominal adjective
  110. S-c     記号-一般   Sign-Common
  111. S-l     記号-文字   Sign-Letter
  112. U   URL     URL
  113. V-c     動詞-一般   Verb-Common
  114. V-dp    動詞-非自立可能  Verb-Dependent
  115. W   空白  Whitespace
  116. X   助動詞   AuxVerb
  117. '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement