Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- # Copyright: Henning Sperr
- # License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
- #
- # Automatic reading and meaning generation using Yomichan dictionary
- # Inspired by Japanese Support Plugin
- #
- import re
- import os
- from PyQt4.QtCore import *
- from PyQt4.QtGui import *
- from anki.hooks import addHook
- from aqt import mw
- import yomi_dict
- source_fields = ['Expression','Vocab-Expression']
- reading_fields = ['Reading','Vocab-Furigana']
- meaning_fields = ['Meaning','Vocab-Meaning']
- type_fields = ['Type','Vocab-Type']
- NOTE_TYPE_NAME = 'japanese'
- separate_types=True
- try:
- import japanese.reading
- DO_READING = False
- MENU_NAME = 'Bulk-add Meanings'
- except:
- DO_READING = True
- MENU_NAME = 'Bulk-add Readings/Meanings'
- class YomichanDictionary(object):
- def __init__(self):
- self.lang = yomi_dict.initLanguage()
- def lookup(self, expr):
- expr = expr.strip()
- meaning_expr = expr
- final_meanings = []
- done = set()
- while meaning_expr:
- #returns [list_of_entries,num_entries]
- meanings = self.lang.findTerm(meaning_expr)
- if meanings and meanings[1]>0:
- meaning = meanings[0][0]
- src = meaning['source'] or 'None'
- #if we process a sentence, the same vocab might appear twice
- if not src in done:
- done.add(src)
- read = meaning['reading'] or src
- mn = meaning['glossary'] or 'No Meaning Found'
- final_meanings.append({'Expression': src, 'Reading': read, 'Meaning': mn})
- #see if the same source (kanji) has different meanings/readings
- for meaning in meanings[0][1:]:
- if not meaning['source'] == src:
- break
- read = meaning['reading'] or src
- mn = meaning['glossary'] or 'No Meaning Found'
- final_meanings.append({'Expression': src, 'Reading': read, 'Meaning': mn})
- #remove current vocab from expression
- meaning_expr = meaning_expr[len(src):]
- else:
- #we didn't find a vocab so just move one character forward
- meaning_expr = meaning_expr[1:]
- expression_string = expr
- meaning_string = []
- type_string = []
- #move through the final meanings list and sort them by length of the expression
- #we do this because we replace a kanji in the original sentence with kanji[reading]
- #some kanji could be substring of another kanji
- for entries in sorted(final_meanings, key=lambda x: -len(x['Expression'])):
- if entries['Reading']:
- #search if the expression actually is a kanji
- if re.search(ur'[\u4e00-\u9faf]', entries['Expression']):
- expression_string = expression_string.replace(entries['Expression'], entries['Expression']+'['+entries['Reading']+']')
- if entries['Meaning']:
- #if we have only one vocab and reading then we do not want to
- #output reading - meaning but just the meaning
- if len(final_meanings) == 1:
- meaning_string.append(entries['Meaning'])
- else:
- #only output meanings for kanji or hiragana/katakana longer than 2 letters
- if expr == entries['Expression'] or len(entries['Expression']) > 2 or re.search(ur'[\u4e00-\u9faf]', entries['Expression']):
- meaning_string.append(entries['Reading']+' - '+entries['Meaning'])
- #If desired, remove word-type strings from the meaning (e.g. "(n,vs)")
- if separate_types:
- for m,meaning in enumerate(meaning_string):
- if meaning[0]=="(":
- if ") " in meaning: end=") "
- elif ")" in meaning: end=")"
- else: continue
- type_string.append( meaning[1 : meaning.find(end)] ) #regardless of length of "end", truncate type at the closing bracket
- meaning_string[m]=meaning[meaning.find(end)+len(end):] #remove the type from the start of the string, taking care to remove the space if present, too
- if meaning.endswith(" (P)"): meaning_string[m]=meaning_string[m][:-4] #remove trailing "(P)". Note not using "meaning here", to prevent overwriting our first change.
- elif meaning.endswith("(P)"): meaning_string[m]=meaning_string[m][:-3]
- return expression_string, '<br>'.join(meaning_string), '<br>'.join(type_string)
- def get_fields(note,candidate_fnames): #shamelessly stolen from the downloadaudio plugin, Roland Sieker, <ospalh@gmail.com>
- field_names = [item[0] for item in note.items()]
- f_names = [fn.lower() for fn in field_names]
- for cn in candidate_fnames:
- if cn.lower() in f_names: return cn
- return None
- def update_note(note):
- """
- :param note: note to be checked whether meaning and reading needs an update
- :return: True if updated, False if not updated
- """
- #Get source and target field names in the current note
- source_field=get_fields(note,source_fields)
- reading_field=get_fields(note,reading_fields)
- meaning_field=get_fields(note,meaning_fields)
- type_field=get_fields(note,type_fields)
- if source_field==None or reading_field==None or meaning_field==None: return False
- if NOTE_TYPE_NAME not in note.model()['name'].lower():
- return False
- if not source_field in note:
- return False
- if not meaning_field in note or not reading_field in note:
- return False
- if note[meaning_field].strip() and note[reading_field].strip():
- return False
- text = mw.col.media.strip(note[source_field])
- if not text.strip():
- return False
- try:
- reading, meaning, wordtype = yomidict.lookup(text)
- if not note[reading_field].strip() and DO_READING:
- note[reading_field] = reading
- if not note[meaning_field].strip():
- note[meaning_field] = meaning
- if not note[type_field].strip() and separate_types and wordtype!="":
- note[type_field] = wordtype
- except Exception, e:
- raise e
- return True
- def on_focus_lost(flag, note, fidx):
- if not yomidict:
- return flag
- if NOTE_TYPE_NAME not in note.model()['name'].lower():
- return flag
- #check whether the event comes from the source field
- source_field=get_fields(note,source_fields)
- if fidx != mw.col.models.fieldNames(note.model()).index(source_field):
- return flag
- if update_note(note):
- return True
- return flag
- def regenerate_bulk_readings(note_ids):
- if not yomidict:
- raise Exception('Yomidict not working.')
- mw.checkpoint(MENU_NAME)
- mw.progress.start()
- for nid in note_ids:
- note = mw.col.getNote(nid)
- update_note(note)
- note.flush()
- mw.progress.finish()
- mw.reset()
- def setup_menu_item(browser):
- a = QAction(MENU_NAME, browser)
- browser.connect(a, SIGNAL("triggered()"), lambda e=browser: on_regenerate(e))
- browser.form.menuEdit.addSeparator()
- browser.form.menuEdit.addAction(a)
- def on_regenerate(browser):
- regenerate_bulk_readings(browser.selectedNotes())
- yomidict = YomichanDictionary()
- addHook('editFocusLost', on_focus_lost)
- addHook("browser.setupMenus", setup_menu_item)
- if __name__ == "__main__":
- #examples are shamelessly taken from Japanese Support Anki Plugin
- expr = u"カリン、自分でまいた種は自分で刈り取れ"
- print yomidict.lookup(expr).encode("utf-8")
- expr = u"昨日、林檎を2個買った。"
- print yomidict.lookup(expr).encode("utf-8")
- expr = u"真莉、大好きだよん^^"
- print yomidict.lookup(expr).encode("utf-8")
- expr = u"彼2000万も使った。"
- print yomidict.lookup(expr).encode("utf-8")
- expr = u"彼二千三百六十円も使った。"
- print yomidict.lookup(expr).encode("utf-8")
- expr = u"千葉"
- print yomidict.lookup(expr).encode("utf-8")
- expr = u"滅"
- print yomidict.lookup(expr).encode("utf-8")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement