TAImatem

blocks_forked scrapper

Aug 5th, 2021
784
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import requests
  2. from urllib.request import Request, urlopen
  3. from bs4 import BeautifulSoup
  4. from collections import OrderedDict
  5. import sys
  6.  
  7. #i.e. py blockScrapper.py etherscan.io
  8.  
  9. URL = "https://"+sys.argv[1]+"/blocks_forked?ps=100&p="
  10. print(URL)
  11.  
  12. def getData(sess, page):
  13.     url = URL + page
  14.     print("Retrieving page", page)
  15.     #return BeautifulSoup(sess.get(url).text, 'html.parser')
  16.     req = Request(url, headers={'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'})
  17.     response = urlopen(req, timeout=20).read()
  18.     response_close = urlopen(req, timeout=20).close()
  19.     return BeautifulSoup(response, "html.parser")
  20.  
  21. def getPage(sess, page):
  22.     table = getData(sess, str(page)).find("table", {"class": "table-hover"})
  23.     rows = table.find('tbody').find_all('tr')
  24.     columns =[X.text.strip() for X in table.find('thead').find_all('th')]
  25.     index = columns.index('ReorgDepth')
  26.     vals =  [[X.text.strip() for X in row.find_all('td')][index+1] for row in rows]
  27.     #print(vals)
  28.     return vals
  29.  
  30. def main():
  31.     resp = requests.get(URL)
  32.     sess = requests.Session()
  33.  
  34.     counts = OrderedDict()
  35.  
  36.     page = 0
  37.     while True:
  38.         page += 1
  39.         try:
  40.             data = getPage(sess, page)
  41.  
  42.             for item in data:
  43.                     if item in counts:
  44.                         counts[item] += 1
  45.                     else:
  46.                         counts[item] = 1
  47.         except:
  48.             break
  49.  
  50.     for k, v in counts.items():
  51.         print("Forks of length {}: {}".format(k, v))
  52.  
  53. if __name__ == "__main__":
  54.     main()
  55.  
RAW Paste Data