Advertisement
Lethay

Updated meanings.py

Jul 25th, 2017
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.34 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. # Copyright: Henning Sperr
  3. # License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
  4. #
  5. # Automatic reading and meaning generation using Yomichan dictionary
  6. # Inspired by Japanese Support Plugin
  7. #
  8.  
  9. import re
  10. import os
  11.  
  12. from PyQt4.QtCore import *
  13. from PyQt4.QtGui import *
  14. from anki.hooks import addHook
  15. from aqt import mw
  16.  
  17. import yomi_dict
  18.  
  19.  
  20. source_fields = ['Expression','Vocab-Expression']
  21. reading_fields = ['Reading','Vocab-Furigana']
  22. meaning_fields = ['Meaning','Vocab-Meaning']
  23. type_fields = ['Type','Vocab-Type']
  24. NOTE_TYPE_NAME = 'japanese'
  25. separate_types=True
  26.  
  27. try:
  28.     import japanese.reading
  29.     DO_READING = False
  30.     MENU_NAME = 'Bulk-add Meanings'
  31. except:
  32.     DO_READING = True
  33.     MENU_NAME = 'Bulk-add Readings/Meanings'
  34.  
  35.  
  36. class YomichanDictionary(object):
  37.     def __init__(self):
  38.         self.lang = yomi_dict.initLanguage()
  39.  
  40.     def lookup(self, expr):
  41.         expr = expr.strip()
  42.         meaning_expr = expr
  43.         final_meanings = []
  44.         done = set()
  45.         while meaning_expr:
  46.             #returns [list_of_entries,num_entries]
  47.             meanings = self.lang.findTerm(meaning_expr)
  48.             if meanings and meanings[1]>0:
  49.                 meaning = meanings[0][0]
  50.  
  51.                 src = meaning['source'] or 'None'
  52.                 #if we process a sentence, the same vocab might appear twice
  53.                 if not src in done:
  54.                     done.add(src)
  55.                     read = meaning['reading'] or src
  56.                     mn = meaning['glossary'] or 'No Meaning Found'
  57.  
  58.                     final_meanings.append({'Expression': src, 'Reading': read, 'Meaning': mn})
  59.  
  60.                     #see if the same source (kanji) has different meanings/readings
  61.                     for meaning in meanings[0][1:]:
  62.                         if not meaning['source'] == src:
  63.                             break
  64.  
  65.                         read = meaning['reading'] or src
  66.                         mn = meaning['glossary'] or 'No Meaning Found'
  67.                         final_meanings.append({'Expression': src, 'Reading': read, 'Meaning': mn})
  68.  
  69.  
  70.                 #remove current vocab from expression
  71.                 meaning_expr = meaning_expr[len(src):]
  72.             else:
  73.                 #we didn't find a vocab so just move one character forward
  74.                 meaning_expr = meaning_expr[1:]
  75.  
  76.         expression_string = expr
  77.         meaning_string = []
  78.         type_string = []
  79.         #move through the final meanings list and sort them by length of the expression
  80.         #we do this because we replace a kanji in the original sentence with kanji[reading]
  81.         #some kanji could be substring of another kanji
  82.         for entries in sorted(final_meanings, key=lambda x: -len(x['Expression'])):
  83.             if entries['Reading']:
  84.                 #search if the expression actually is a kanji
  85.                 if re.search(ur'[\u4e00-\u9faf]', entries['Expression']):
  86.                     expression_string = expression_string.replace(entries['Expression'], entries['Expression']+'['+entries['Reading']+']')
  87.             if entries['Meaning']:
  88.                 #if we have only one vocab and reading then we do not want to
  89.                 #output reading - meaning but just the meaning
  90.                 if len(final_meanings) == 1:
  91.                     meaning_string.append(entries['Meaning'])
  92.                 else:
  93.                     #only output meanings for kanji or hiragana/katakana longer than 2 letters
  94.                     if expr == entries['Expression'] or len(entries['Expression']) > 2 or re.search(ur'[\u4e00-\u9faf]', entries['Expression']):
  95.                         meaning_string.append(entries['Reading']+' - '+entries['Meaning'])
  96.  
  97.         #If desired, remove word-type strings from the meaning (e.g. "(n,vs)")
  98.         if separate_types:
  99.             for m,meaning in enumerate(meaning_string):
  100.                 if meaning[0]=="(":
  101.                     if ") " in meaning:  end=") "
  102.                     elif ")" in meaning: end=")"
  103.                     else: continue
  104.                     type_string.append( meaning[1 : meaning.find(end)] ) #regardless of length of "end", truncate type at the closing bracket
  105.                     meaning_string[m]=meaning[meaning.find(end)+len(end):] #remove the type from the start of the string, taking care to remove the space if present, too
  106.                
  107.                 if meaning.endswith(" (P)"): meaning_string[m]=meaning_string[m][:-4] #remove trailing "(P)". Note not using "meaning here", to prevent overwriting our first change.
  108.                 elif meaning.endswith("(P)"): meaning_string[m]=meaning_string[m][:-3]
  109.  
  110.         return expression_string, '<br>'.join(meaning_string), '<br>'.join(type_string)
  111.  
  112. def get_fields(note,candidate_fnames): #shamelessly stolen from the downloadaudio plugin, Roland Sieker, <ospalh@gmail.com>
  113.     field_names = [item[0] for item in note.items()]
  114.     f_names = [fn.lower() for fn in field_names]
  115.     for cn in candidate_fnames:
  116.         if cn.lower() in f_names: return cn
  117.     return None
  118.  
  119. def update_note(note):
  120.     """
  121.    :param note: note to be checked whether meaning and reading needs an update
  122.    :return: True if updated, False if not updated
  123.    """
  124.  
  125.     #Get source and target field names in the current note
  126.     source_field=get_fields(note,source_fields)
  127.     reading_field=get_fields(note,reading_fields)
  128.     meaning_field=get_fields(note,meaning_fields)
  129.     type_field=get_fields(note,type_fields)
  130.     if source_field==None or reading_field==None or meaning_field==None: return False
  131.  
  132.     if NOTE_TYPE_NAME not in note.model()['name'].lower():
  133.         return False
  134.  
  135.     if not source_field in note:
  136.         return False
  137.  
  138.     if not meaning_field in note or not reading_field in note:
  139.         return False
  140.  
  141.     if note[meaning_field].strip() and note[reading_field].strip():
  142.         return False
  143.  
  144.     text = mw.col.media.strip(note[source_field])
  145.     if not text.strip():
  146.         return False
  147.  
  148.     try:
  149.         reading, meaning, wordtype = yomidict.lookup(text)
  150.         if not note[reading_field].strip() and DO_READING:
  151.             note[reading_field] = reading
  152.         if not note[meaning_field].strip():
  153.             note[meaning_field] = meaning
  154.         if not note[type_field].strip() and separate_types and wordtype!="":
  155.             note[type_field] = wordtype
  156.  
  157.     except Exception, e:
  158.         raise e
  159.  
  160.     return True
  161.  
  162.  
  163. def on_focus_lost(flag, note, fidx):
  164.  
  165.     if not yomidict:
  166.         return flag
  167.  
  168.     if NOTE_TYPE_NAME not in note.model()['name'].lower():
  169.         return flag
  170.  
  171.     #check whether the event comes from the source field
  172.     source_field=get_fields(note,source_fields)
  173.     if fidx != mw.col.models.fieldNames(note.model()).index(source_field):
  174.         return flag
  175.  
  176.     if update_note(note):
  177.         return True
  178.     return flag
  179.  
  180.  
  181. def regenerate_bulk_readings(note_ids):
  182.     if not yomidict:
  183.         raise Exception('Yomidict not working.')
  184.  
  185.     mw.checkpoint(MENU_NAME)
  186.     mw.progress.start()
  187.  
  188.     for nid in note_ids:
  189.         note = mw.col.getNote(nid)
  190.         update_note(note)
  191.         note.flush()
  192.  
  193.     mw.progress.finish()
  194.     mw.reset()
  195.  
  196. def setup_menu_item(browser):
  197.     a = QAction(MENU_NAME, browser)
  198.     browser.connect(a, SIGNAL("triggered()"), lambda e=browser: on_regenerate(e))
  199.     browser.form.menuEdit.addSeparator()
  200.     browser.form.menuEdit.addAction(a)
  201.  
  202. def on_regenerate(browser):
  203.     regenerate_bulk_readings(browser.selectedNotes())
  204.  
  205. yomidict = YomichanDictionary()
  206. addHook('editFocusLost', on_focus_lost)
  207. addHook("browser.setupMenus", setup_menu_item)
  208.  
  209. if __name__ == "__main__":
  210.     #examples are shamelessly taken from Japanese Support Anki Plugin
  211.     expr = u"カリン、自分でまいた種は自分で刈り取れ"
  212.     print yomidict.lookup(expr).encode("utf-8")
  213.     expr = u"昨日、林檎を2個買った。"
  214.     print yomidict.lookup(expr).encode("utf-8")
  215.     expr = u"真莉、大好きだよん^^"
  216.     print yomidict.lookup(expr).encode("utf-8")
  217.     expr = u"彼2000万も使った。"
  218.     print yomidict.lookup(expr).encode("utf-8")
  219.     expr = u"彼二千三百六十円も使った。"
  220.     print yomidict.lookup(expr).encode("utf-8")
  221.     expr = u"千葉"
  222.     print yomidict.lookup(expr).encode("utf-8")
  223.     expr = u"滅"
  224.     print yomidict.lookup(expr).encode("utf-8")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement