Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import json
- import urllib.request
- KANJI_MINIMUM = 0 #INCLUDED, In case you just want to have a slice of the kanjis
- KANJI_MAXIMUM = 50000#INCLUDED, In case you want a slice. 50000 basically means I want them all
- WIKI_URL = "https://en.wikipedia.org/wiki/Ky%C5%8Diku_kanji"
- def listKanjis():#Extract kanjis from the tables
- req = requests.get(WIKI_URL)
- soup = BeautifulSoup(req.content, 'lxml')#scraper module, need install bs4 via pip3
- table_classes = {"class": ["sortable", "plainrowheaders"]}
- wikitables = soup.findAll("table", table_classes)#Get the tables in page
- kanjis = []
- for row in soup.findAll("tr", wikitables[1]):
- cells = row.findAll(["th", "td"])
- fl = cells[0].text
- try :
- id = int(fl)
- if id >= KANJI_MINIMUM :
- kanjis.append(cells[1].text)
- except ValueError :
- #Not a number
- continue
- return kanjis
- def writeKanjisInFile(kanjis): #Maybe you want a nice file ? Helped me for debug
- with open('kyouiku_kanjis.txt', 'w+') as file :
- for kanji in kanjis :
- file.write(kanji)
- def request(action, **params):
- return {'action': action, 'params': params, 'version': 6}
- def invoke(action, **params):
- requestJson = json.dumps(request(action, **params)).encode('utf-8')
- response = json.load(urllib.request.urlopen(urllib.request.Request('http://localhost:8765', requestJson)))#!!!!! YOU NEEED THE ANKICONNECT PLUGIN AND ANKI UP AND RUNNING
- if len(response) != 2:
- raise Exception('response has an unexpected number of fields')
- if 'error' not in response:
- raise Exception('response is missing required error field')
- if 'result' not in response:
- raise Exception('response is missing required result field')
- if response['error'] is not None:
- raise Exception(response['error'])
- return response['result']
- if __name__ == "__main__" :
- kanjs = listKanjis()
- writeKanjisInFile(kanjs)
- for kanji in kanjs :
- sFN = invoke("findNotes", query="kanji:"+kanji)[0]
- sAT = invoke("addTags", notes = [sFN], tags = 'Kyouiku')#Or whatever tag seems important to you
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement