Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import json
- import os
- from bs4 import BeautifulSoup, NavigableString
- url = "https://poedb.tw/us/HarvestSeed"
- r = requests.get(url)
- # layout = {original: "text", keyword: [], seed: ""}
- soup = BeautifulSoup(r.content, 'html.parser')
- out = []
- table = soup.find('table')
- body = table.find('tbody')
- rows = body.find_all('tr')
- for row in rows:
- cols = row.find_all('td')
- seed_tier = cols[0].text
- monster_name = cols[1].text
- crafts = cols[2]
- description = "".join([e.text for e in crafts if e.name != 'li'])
- craft_data = {
- 'tier': seed_tier,
- 'monster_name': monster_name,
- 'description': description,
- 'options': []
- }
- for option in crafts.select('li'):
- keywords = [e.text for e in option.select('span') if e.text != ""]
- description = option.text
- craft_data['options'].append({
- 'keywords': keywords,
- 'description': description
- })
- out.append(craft_data)
- print(json.dumps(out, indent=4))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement