Advertisement
Guest User

WK to FCD

a guest
Jul 9th, 2014
306
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.54 KB | None | 0 0
  1. import requests
  2. import json
  3. import sys
  4. import getopt
  5. import argparse
  6. import os
  7. import re
  8. from DictionaryServices import *
  9.  
  10.  
  11. # The API key for your WK account. Found here:
  12. # https://www.wanikani.com/account
  13. APIKey = ''
  14.  
  15.  
  16. def get_filename(type, level):
  17.     """
  18.    Create a file name like "L04 Kanji.txt"
  19.  
  20.    Args:
  21.        type: the type of content (radicals, kanji etc)
  22.        level: the WK level of content the file will contain.
  23.    Returns:
  24.        The file name.
  25.    """
  26.     if level < 10:
  27.         prefix = 'L0'
  28.     else:
  29.         prefix = 'L'
  30.  
  31.     fileName = prefix + str(level) + ' ' + type + '.txt'
  32.  
  33.     return fileName
  34.  
  35.  
  36. def get_wkcontent(type, level):
  37.     """
  38.    Request the specified content from WK then decode and deserialize
  39.    it.
  40.  
  41.    Args:
  42.        type: the type of content (radicals, kanji etc)
  43.        level: the WK level of content to retrieve.
  44.  
  45.    Returns:
  46.        A Python object containing the WK data
  47.    """
  48.     type = type.lower()
  49.  
  50.     if type not in ['radicals', 'kanji', 'vocabulary']:
  51.         print("type must be one of: radicals, kanji, vocabulary\n")
  52.         raise ValueError(type)
  53.  
  54.     try:
  55.         request = requests.get('https://www.wanikani.com/api/user/'
  56.             + APIKey + '/' + type + '/' + str(level))
  57.     except requests.ConnectionError:
  58.         print("ERROR: Can't connect to WaniKani.\n")
  59.         raise
  60.     try:
  61.         data = json.loads(request.content.decode('utf-8'))
  62.     except ValueError:
  63.         print("ERROR: Couldn't load JSON content. Check the request URL. \n")
  64.         raise
  65.     else:
  66.         if 'error' in data:
  67.             print("ERROR: There was a problem with the request.\n")
  68.             raise Exception(data['error']['message'] + ' ' +
  69.                 data['error']['code'])
  70.  
  71.     return data
  72.  
  73.  
  74. def create_radicals_file(level):
  75.     """
  76.    Create a file containing the radicals of the specified level and
  77.    their meanings in a format compatible with Flashcards Deluxe.
  78.  
  79.    Args:
  80.        level: the WK level of content to retrieve
  81.  
  82.    Returns:
  83.        None
  84.    """
  85.     radicals = get_wkcontent('radicals', level)
  86.     with open(get_filename('Radicals', level), 'w') as radicals_file:
  87.         for item in radicals['requested_information']:
  88.             if item['character'] is not None:
  89.                 print(item['character'], item['meaning'], sep='\t',
  90.                     file=radicals_file)
  91.  
  92.  
  93. def create_kanji_file(level):
  94.     """
  95.    Create a file containing the kanji of the specified level along
  96.    with their important readings and their meanings in a format
  97.    compatible with Flashcards Deluxe.
  98.  
  99.    Args:
  100.        level: the WK level of content to retrieve
  101.  
  102.    Returns:
  103.        None
  104.    """
  105.     kanji = get_wkcontent('kanji', level)
  106.     with open(get_filename('Kanji', level), 'w') as kanji_file:
  107.         for item in kanji['requested_information']:
  108.             print(item['character'], item[item['important_reading']],
  109.                 item['meaning'], sep='\t', file=kanji_file)
  110.  
  111.  
  112. def create_vocabulary_file(level):
  113.     """
  114.    Create a file containing the vocabulary of the specified level
  115.    along with their readings and meanings in a format compatible with
  116.    Flashcards Deluxe.
  117.  
  118.    Args:
  119.        level: the WK level of content to retrieve
  120.  
  121.    Returns:
  122.        None
  123.    """
  124.     vocabulary = get_wkcontent('vocabulary', level)
  125.     with open(get_filename('Vocabulary', level), 'w') as vocabulary_file:
  126.         for item in vocabulary['requested_information']:
  127.             print(item['character'], item['kana'], item['meaning'], sep='\t',
  128.                 file=vocabulary_file)
  129.  
  130.  
  131. def create_MCD_file(level):
  132.     """
  133.    Create a file containing something similar to MCDs (see:
  134.    http://learnanylanguage.wikia.com/wiki/Massive-Context_Cloze_Deletion)
  135.    composed of examples sentences and their translations sourced from
  136.    Dictionary.app's ウィズダム英和辞典 (The Wisdom English-Japanese
  137.    Dictionary) using the vocabulary words of the specified level. The
  138.    various splitting and formatting done in this function are specific
  139.    to this dictionary, and will likely fail for others.
  140.  
  141.    Args:
  142.        level: the WK level of content to retrieve
  143.  
  144.    Returns:
  145.        None
  146.    """
  147.     def make_card(example):
  148.         """
  149.        Massage the example into a format that is presentable in FCD.
  150.  
  151.        Args:
  152.            example: the example sentence (and its translation)
  153.        Returns:
  154.            The formatted card
  155.        """
  156.         # Remove pronouncers for place names.
  157.         example = re.sub('/[^ ].*[^ ]/ ', '', example)
  158.  
  159.         # Find where words begin bunched up next to some other
  160.         # character (kanji, kana, periods et al. Not parenthesis,
  161.         # quotes etc) and insert a space there.
  162.         example = re.sub('([^a-zA-Z0-9   “╳…〘【\'"!\-([{\n])([a-zA-Z])',
  163.             r'\1 \2', example)
  164.         example = re.sub('(\.)([^ \n])', r'\1 \2', example)
  165.  
  166.         # An example's translation begins immediately after the example
  167.         # sentence. Split the two apart and store them in a list.
  168.         sentences = re.sub('“|[a-zA-Z]', r'%\g<0>', example, count=1).split('%')
  169.  
  170.         # Some sentences don't include translations. Let's skip those
  171.         # since we want bilingual cards.
  172.         try:
  173.             sentences[1]
  174.         except IndexError:
  175.             return None
  176.  
  177.         # word_looseMatch lets us find conjugated forms of the word for
  178.         # highlighting and removal. It replaces okurigana with
  179.         # [あ-ん] and is used as a regex.
  180.         word_looseMatch = re.sub('[あ-ん]', '[あ-ん]', word)
  181.  
  182.         # Skip the various meanings, and just print the examples. [0]
  183.         # is the example, [1] is the translation. If (a loose match of)
  184.         # the word isn't found (the example may use a different kanji),
  185.         # skip it.
  186.         if re.match('[1-9] 【', sentences[0]) or not re.search(word_looseMatch,
  187.             sentences[0]):
  188.             return None
  189.  
  190.         # Replace the word we are studying with _____
  191.         mcd = re.sub(word_looseMatch, '_' * len(word), sentences[0])
  192.         mcd_highlighted = re.sub(word_looseMatch, r'<color red>\g<0></color>',
  193.             sentences[0])
  194.  
  195.         # Return the formatted string
  196.         card = '{0}<br><size .5><color gray>{1}</color></size>\t{2}<size .5>' \
  197.             '<br><br><b>{3} ({4})</b><br>{5}</size>'.format(
  198.             mcd, sentences[1], mcd_highlighted, word, kana, definition)
  199.  
  200.         return card
  201.  
  202.    
  203.     # Check to see if we already have the vocabulary data. If not,
  204.     # retrieve it.
  205.     try:
  206.         vocabulary
  207.     except NameError:
  208.         vocabulary = get_wkcontent('vocabulary', level)
  209.  
  210.     with open(get_filename('MCDs', level), 'w') as mcd_file:
  211.         # Setup the text alignment (left) and deck type (non-SRS)
  212.         print('*    text-align  L,L,L,A,A', file=mcd_file)
  213.         print('*    card-order  1', file=mcd_file)
  214.        
  215.         # Cycle through the words.
  216.         for item in vocabulary['requested_information']:
  217.             word = item['character']
  218.             kana = item['kana']
  219.             definition = item['meaning']
  220.            
  221.             # Get the dictionary entry for the word. Move on to the
  222.             # next iteration if there is no entry.
  223.             wordrange = (0, len(word))
  224.             entry = DCSCopyTextDefinition(None, word, wordrange)
  225.             if not entry:
  226.                 continue
  227.            
  228.             # Split the dictionary entry into its separate meanings.
  229.             meanings = re.sub('[1-9]?[1-9] 【', r'%\g<0>', entry).split('%')
  230.            
  231.             # Split each meaning into its individual examples
  232.             for meaning in meanings:
  233.                 # Split each meaning at the example sentence marker,
  234.                 # removing the conversation marker if needed.
  235.                 examples = re.sub('(会話)?(▸ )', r'%', meaning).split('%')
  236.                
  237.                 for example in examples:
  238.                     # The first 'example' is the definition and not an
  239.                     # example sentence so let's skip it.
  240.                     if example is examples[0]:
  241.                         # definition = example
  242.                         continue
  243.                     # Format each example then print it to the file.
  244.                     card = make_card(example)
  245.                     if card is None:
  246.                         continue
  247.                     print(card, file=mcd_file)
  248.  
  249.  
  250. def main():
  251.     # Parse command line arguments
  252.     parser = argparse.ArgumentParser(description='Grabs data from WaniKani' \
  253.         ' and outputs files formatted for the Flashcards Deluxe iOS app.')
  254.     subparsers = parser.add_subparsers(title='Subcommands',
  255.         description='Commands to create individual files.')
  256.    
  257.     # Global arguments
  258.     parser.add_argument('-l', '--level', metavar='LEVEL', type=int,
  259.         dest='level_all', required=False,
  260.         help='Create files for all content types of the specified level')
  261.    
  262.     # Subcommand: radicals
  263.     parser_radicals = subparsers.add_parser('radicals',
  264.         help='Create a radicals file of this level')
  265.     parser_radicals.set_defaults(func=create_radicals_file)
  266.     parser_radicals.add_argument('level', type=int)
  267.    
  268.     # Subcommand: kanji
  269.     parser_kanji = subparsers.add_parser('kanji',
  270.         help='Create a kanji file of this level')
  271.     parser_kanji.set_defaults(func=create_kanji_file)
  272.     parser_kanji.add_argument('level', type=int)
  273.    
  274.     # Subcommand: vocabulary
  275.     parser_vocabulary = subparsers.add_parser('vocabulary',
  276.         help='Create a vocabulary file of this level')
  277.     parser_vocabulary.set_defaults(func=create_vocabulary_file)
  278.     parser_vocabulary.add_argument('level', type=int)
  279.    
  280.     # Subcommand: mcd
  281.     parser_mcd = subparsers.add_parser('mcd',
  282.         help='Create an MCD file of this level')
  283.     parser_mcd.set_defaults(func=create_MCD_file)
  284.     parser_mcd.add_argument('level', type=int)
  285.    
  286.     # If no arguments were passed, print the help message.
  287.     if len(sys.argv) < 2:
  288.         parser.print_help()
  289.         return
  290.  
  291.     argv = parser.parse_args()
  292.  
  293.     # If the user specificed the -a option and gave a level, create all
  294.     # the files. Otherwise, create the file specified by the given
  295.     # subcommand.
  296.     if argv.level_all:
  297.         create_radicals_file(argv.level_all)
  298.         create_kanji_file(argv.level_all)
  299.         create_vocabulary_file(argv.level_all)
  300.         create_MCD_file(argv.level_all)
  301.     else:
  302.         argv.func(argv.level)
  303.  
  304.  
  305. if __name__ == "__main__":
  306.     sys.exit(main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement