Advertisement
PapstJL4U

scrapping the dotabuff hero meta page

Oct 21st, 2017
260
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.92 KB | None | 0 0
  1. """read dotabuff hero meta data"""
  2.  
  3. from bs4 import BeautifulSoup
  4. import urllib.request
  5.  
  6. dotabuff = "https://www.dotabuff.com/heroes/meta"
  7. local = "dotabuff.html"
  8. with open(local, encoding="utf-8") as f:
  9.     stream = f.read()
  10.     soup = BeautifulSoup(stream, "lxml")
  11.     data = soup.table
  12.     chars = data.find_all("tr")
  13.  
  14.     header1 = "blank, <2k, <2k , 2k-3k, 2k-3k, 3k-4k, 3k-4k, 4k-5k, 4k-5k, 5k+, 5k+"
  15.     header2 = "hero, Pick%, Win%, Pick%, Win%, Pick%, Win% , Pick%, Win%, Pick%, Win%"
  16.     content = ""
  17.     for i in range (1, len(chars)):
  18.         output = ""
  19.         row = chars[i].find_all("td")
  20.         for j in range(1,len(row)):
  21.             output+=","+row[j].get_text()
  22.  
  23.         content += output[1:]+"\n"
  24.         #print(output)
  25.  
  26.         with open("dotaheroes.csv", "w", encoding="utf-8") as file:
  27.             file.write(header1+"\n")
  28.             file.write(header2)
  29.             file.write(content)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement