Advertisement
Guest User

Untitled

a guest
Oct 15th, 2019
294
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.23 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import json
  4. import urllib.request
  5.  
  6. KANJI_MINIMUM = 0 #INCLUDED, In case you just want to have a slice of the kanjis
  7. KANJI_MAXIMUM = 50000#INCLUDED, In case you want a slice. 50000 basically means I want them all
  8. WIKI_URL = "https://en.wikipedia.org/wiki/Ky%C5%8Diku_kanji"
  9.  
  10. def listKanjis():#Extract kanjis from the tables
  11.     req = requests.get(WIKI_URL)
  12.     soup = BeautifulSoup(req.content, 'lxml')#scraper module, need install bs4 via pip3
  13.     table_classes = {"class": ["sortable", "plainrowheaders"]}
  14.     wikitables = soup.findAll("table", table_classes)#Get the tables in page
  15.     kanjis = []
  16.     for row in soup.findAll("tr", wikitables[1]):
  17.         cells = row.findAll(["th", "td"])
  18.         fl = cells[0].text
  19.         try :
  20.             id = int(fl)
  21.             if id >= KANJI_MINIMUM :
  22.                 kanjis.append(cells[1].text)
  23.         except ValueError :
  24.             #Not a number
  25.             continue
  26.     return kanjis
  27.  
  28. def writeKanjisInFile(kanjis): #Maybe you want a nice file ? Helped me for debug
  29.     with open('kyouiku_kanjis.txt', 'w+') as file :
  30.         for kanji in kanjis :
  31.             file.write(kanji)
  32.  
  33.  
  34. def request(action, **params):
  35.     return {'action': action, 'params': params, 'version': 6}
  36.  
  37. def invoke(action, **params):
  38.     requestJson = json.dumps(request(action, **params)).encode('utf-8')
  39.     response = json.load(urllib.request.urlopen(urllib.request.Request('http://localhost:8765', requestJson)))#!!!!! YOU NEEED THE ANKICONNECT PLUGIN AND ANKI UP AND RUNNING
  40.     if len(response) != 2:
  41.         raise Exception('response has an unexpected number of fields')
  42.     if 'error' not in response:
  43.         raise Exception('response is missing required error field')
  44.     if 'result' not in response:
  45.         raise Exception('response is missing required result field')
  46.     if response['error'] is not None:
  47.         raise Exception(response['error'])
  48.     return response['result']
  49.  
  50.  
  51.  
  52.  
  53. if __name__ == "__main__" :
  54.     kanjs = listKanjis()
  55.     writeKanjisInFile(kanjs)
  56.     for kanji in kanjs :
  57.         sFN = invoke("findNotes", query="kanji:"+kanji)[0]
  58.         sAT = invoke("addTags", notes = [sFN], tags = 'Kyouiku')#Or whatever tag seems important to you
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement