Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import json
- import sys
- import getopt
- import argparse
- import os
- import re
- from DictionaryServices import *
- # The API key for your WK account. Found here:
- # https://www.wanikani.com/account
- APIKey = ''
- def get_filename(type, level):
- """
- Create a file name like "L04 Kanji.txt"
- Args:
- type: the type of content (radicals, kanji etc)
- level: the WK level of content the file will contain.
- Returns:
- The file name.
- """
- if level < 10:
- prefix = 'L0'
- else:
- prefix = 'L'
- fileName = prefix + str(level) + ' ' + type + '.txt'
- return fileName
- def get_wkcontent(type, level):
- """
- Request the specified content from WK then decode and deserialize
- it.
- Args:
- type: the type of content (radicals, kanji etc)
- level: the WK level of content to retrieve.
- Returns:
- A Python object containing the WK data
- """
- type = type.lower()
- if type not in ['radicals', 'kanji', 'vocabulary']:
- print("type must be one of: radicals, kanji, vocabulary\n")
- raise ValueError(type)
- try:
- request = requests.get('https://www.wanikani.com/api/user/'
- + APIKey + '/' + type + '/' + str(level))
- except requests.ConnectionError:
- print("ERROR: Can't connect to WaniKani.\n")
- raise
- try:
- data = json.loads(request.content.decode('utf-8'))
- except ValueError:
- print("ERROR: Couldn't load JSON content. Check the request URL. \n")
- raise
- else:
- if 'error' in data:
- print("ERROR: There was a problem with the request.\n")
- raise Exception(data['error']['message'] + ' ' +
- data['error']['code'])
- return data
- def create_radicals_file(level):
- """
- Create a file containing the radicals of the specified level and
- their meanings in a format compatible with Flashcards Deluxe.
- Args:
- level: the WK level of content to retrieve
- Returns:
- None
- """
- radicals = get_wkcontent('radicals', level)
- with open(get_filename('Radicals', level), 'w') as radicals_file:
- for item in radicals['requested_information']:
- if item['character'] is not None:
- print(item['character'], item['meaning'], sep='\t',
- file=radicals_file)
- def create_kanji_file(level):
- """
- Create a file containing the kanji of the specified level along
- with their important readings and their meanings in a format
- compatible with Flashcards Deluxe.
- Args:
- level: the WK level of content to retrieve
- Returns:
- None
- """
- kanji = get_wkcontent('kanji', level)
- with open(get_filename('Kanji', level), 'w') as kanji_file:
- for item in kanji['requested_information']:
- print(item['character'], item[item['important_reading']],
- item['meaning'], sep='\t', file=kanji_file)
- def create_vocabulary_file(level):
- """
- Create a file containing the vocabulary of the specified level
- along with their readings and meanings in a format compatible with
- Flashcards Deluxe.
- Args:
- level: the WK level of content to retrieve
- Returns:
- None
- """
- vocabulary = get_wkcontent('vocabulary', level)
- with open(get_filename('Vocabulary', level), 'w') as vocabulary_file:
- for item in vocabulary['requested_information']:
- print(item['character'], item['kana'], item['meaning'], sep='\t',
- file=vocabulary_file)
- def create_MCD_file(level):
- """
- Create a file containing something similar to MCDs (see:
- http://learnanylanguage.wikia.com/wiki/Massive-Context_Cloze_Deletion)
- composed of examples sentences and their translations sourced from
- Dictionary.app's ウィズダム英和辞典 (The Wisdom English-Japanese
- Dictionary) using the vocabulary words of the specified level. The
- various splitting and formatting done in this function are specific
- to this dictionary, and will likely fail for others.
- Args:
- level: the WK level of content to retrieve
- Returns:
- None
- """
- def make_card(example):
- """
- Massage the example into a format that is presentable in FCD.
- Args:
- example: the example sentence (and its translation)
- Returns:
- The formatted card
- """
- # Remove pronouncers for place names.
- example = re.sub('/[^ ].*[^ ]/ ', '', example)
- # Find where words begin bunched up next to some other
- # character (kanji, kana, periods et al. Not parenthesis,
- # quotes etc) and insert a space there.
- example = re.sub('([^a-zA-Z0-9 “╳…〘【\'"!\-([{\n])([a-zA-Z])',
- r'\1 \2', example)
- example = re.sub('(\.)([^ \n])', r'\1 \2', example)
- # An example's translation begins immediately after the example
- # sentence. Split the two apart and store them in a list.
- sentences = re.sub('“|[a-zA-Z]', r'%\g<0>', example, count=1).split('%')
- # Some sentences don't include translations. Let's skip those
- # since we want bilingual cards.
- try:
- sentences[1]
- except IndexError:
- return None
- # word_looseMatch lets us find conjugated forms of the word for
- # highlighting and removal. It replaces okurigana with
- # [あ-ん] and is used as a regex.
- word_looseMatch = re.sub('[あ-ん]', '[あ-ん]', word)
- # Skip the various meanings, and just print the examples. [0]
- # is the example, [1] is the translation. If (a loose match of)
- # the word isn't found (the example may use a different kanji),
- # skip it.
- if re.match('[1-9] 【', sentences[0]) or not re.search(word_looseMatch,
- sentences[0]):
- return None
- # Replace the word we are studying with _____
- mcd = re.sub(word_looseMatch, '_' * len(word), sentences[0])
- mcd_highlighted = re.sub(word_looseMatch, r'<color red>\g<0></color>',
- sentences[0])
- # Return the formatted string
- card = '{0}<br><size .5><color gray>{1}</color></size>\t{2}<size .5>' \
- '<br><br><b>{3} ({4})</b><br>{5}</size>'.format(
- mcd, sentences[1], mcd_highlighted, word, kana, definition)
- return card
- # Check to see if we already have the vocabulary data. If not,
- # retrieve it.
- try:
- vocabulary
- except NameError:
- vocabulary = get_wkcontent('vocabulary', level)
- with open(get_filename('MCDs', level), 'w') as mcd_file:
- # Setup the text alignment (left) and deck type (non-SRS)
- print('* text-align L,L,L,A,A', file=mcd_file)
- print('* card-order 1', file=mcd_file)
- # Cycle through the words.
- for item in vocabulary['requested_information']:
- word = item['character']
- kana = item['kana']
- definition = item['meaning']
- # Get the dictionary entry for the word. Move on to the
- # next iteration if there is no entry.
- wordrange = (0, len(word))
- entry = DCSCopyTextDefinition(None, word, wordrange)
- if not entry:
- continue
- # Split the dictionary entry into its separate meanings.
- meanings = re.sub('[1-9]?[1-9] 【', r'%\g<0>', entry).split('%')
- # Split each meaning into its individual examples
- for meaning in meanings:
- # Split each meaning at the example sentence marker,
- # removing the conversation marker if needed.
- examples = re.sub('(会話)?(▸ )', r'%', meaning).split('%')
- for example in examples:
- # The first 'example' is the definition and not an
- # example sentence so let's skip it.
- if example is examples[0]:
- # definition = example
- continue
- # Format each example then print it to the file.
- card = make_card(example)
- if card is None:
- continue
- print(card, file=mcd_file)
- def main():
- # Parse command line arguments
- parser = argparse.ArgumentParser(description='Grabs data from WaniKani' \
- ' and outputs files formatted for the Flashcards Deluxe iOS app.')
- subparsers = parser.add_subparsers(title='Subcommands',
- description='Commands to create individual files.')
- # Global arguments
- parser.add_argument('-l', '--level', metavar='LEVEL', type=int,
- dest='level_all', required=False,
- help='Create files for all content types of the specified level')
- # Subcommand: radicals
- parser_radicals = subparsers.add_parser('radicals',
- help='Create a radicals file of this level')
- parser_radicals.set_defaults(func=create_radicals_file)
- parser_radicals.add_argument('level', type=int)
- # Subcommand: kanji
- parser_kanji = subparsers.add_parser('kanji',
- help='Create a kanji file of this level')
- parser_kanji.set_defaults(func=create_kanji_file)
- parser_kanji.add_argument('level', type=int)
- # Subcommand: vocabulary
- parser_vocabulary = subparsers.add_parser('vocabulary',
- help='Create a vocabulary file of this level')
- parser_vocabulary.set_defaults(func=create_vocabulary_file)
- parser_vocabulary.add_argument('level', type=int)
- # Subcommand: mcd
- parser_mcd = subparsers.add_parser('mcd',
- help='Create an MCD file of this level')
- parser_mcd.set_defaults(func=create_MCD_file)
- parser_mcd.add_argument('level', type=int)
- # If no arguments were passed, print the help message.
- if len(sys.argv) < 2:
- parser.print_help()
- return
- argv = parser.parse_args()
- # If the user specificed the -a option and gave a level, create all
- # the files. Otherwise, create the file specified by the given
- # subcommand.
- if argv.level_all:
- create_radicals_file(argv.level_all)
- create_kanji_file(argv.level_all)
- create_vocabulary_file(argv.level_all)
- create_MCD_file(argv.level_all)
- else:
- argv.func(argv.level)
- if __name__ == "__main__":
- sys.exit(main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement