Advertisement
deathbeam

Untitled

Jan 26th, 2022
844
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import requests
  2. import json
  3. import os
  4. from bs4 import BeautifulSoup, NavigableString
  5.  
  6. url = "https://poedb.tw/us/HarvestSeed"
  7.  
  8. r = requests.get(url)
  9.  
  10. # layout = {original: "text", keyword: [], seed: ""}
  11. soup = BeautifulSoup(r.content, 'html.parser')
  12.  
  13. out = []
  14.  
  15. table = soup.find('table')
  16. body = table.find('tbody')
  17. rows = body.find_all('tr')
  18.  
  19. for row in rows:
  20.     cols = row.find_all('td')
  21.  
  22.     seed_tier = cols[0].text
  23.     monster_name = cols[1].text
  24.     crafts = cols[2]
  25.     description = "".join([e.text for e in crafts if e.name != 'li'])
  26.  
  27.     craft_data = {
  28.         'tier': seed_tier,
  29.         'monster_name': monster_name,
  30.         'description': description,
  31.         'options': []
  32.     }
  33.  
  34.     for option in crafts.select('li'):
  35.         keywords = [e.text for e in option.select('span') if e.text != ""]
  36.         description = option.text
  37.  
  38.         craft_data['options'].append({
  39.             'keywords': keywords,
  40.             'description': description
  41.         })
  42.  
  43.     out.append(craft_data)
  44.  
  45. print(json.dumps(out, indent=4))
  46.  
Advertisement
Advertisement
Advertisement
RAW Paste Data Copied
Advertisement