Advertisement
TAImatem

blocks_forked scrapper

Aug 5th, 2021
1,140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.63 KB | None | 0 0
  1. import requests
  2. from urllib.request import Request, urlopen
  3. from bs4 import BeautifulSoup
  4. from collections import OrderedDict
  5. import sys
  6.  
  7. #i.e. py blockScrapper.py etherscan.io
  8.  
  9. URL = "https://"+sys.argv[1]+"/blocks_forked?ps=100&p="
  10. print(URL)
  11.  
  12. def getData(sess, page):
  13.     url = URL + page
  14.     print("Retrieving page", page)
  15.     #return BeautifulSoup(sess.get(url).text, 'html.parser')
  16.     req = Request(url, headers={'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'})
  17.     response = urlopen(req, timeout=20).read()
  18.     response_close = urlopen(req, timeout=20).close()
  19.     return BeautifulSoup(response, "html.parser")
  20.  
  21. def getPage(sess, page):
  22.     table = getData(sess, str(page)).find("table", {"class": "table-hover"})
  23.     rows = table.find('tbody').find_all('tr')
  24.     columns =[X.text.strip() for X in table.find('thead').find_all('th')]
  25.     index = columns.index('ReorgDepth')
  26.     vals =  [[X.text.strip() for X in row.find_all('td')][index+1] for row in rows]
  27.     #print(vals)
  28.     return vals
  29.  
  30. def main():
  31.     resp = requests.get(URL)
  32.     sess = requests.Session()
  33.  
  34.     counts = OrderedDict()
  35.  
  36.     page = 0
  37.     while True:
  38.         page += 1
  39.         try:
  40.             data = getPage(sess, page)
  41.  
  42.             for item in data:
  43.                     if item in counts:
  44.                         counts[item] += 1
  45.                     else:
  46.                         counts[item] = 1
  47.         except:
  48.             break
  49.  
  50.     for k, v in counts.items():
  51.         print("Forks of length {}: {}".format(k, v))
  52.  
  53. if __name__ == "__main__":
  54.     main()
  55.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement