rodrigosantosbr

[Py] web scrapping - distrowatch

Feb 20th, 2019
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.71 KB | None | 0 0
  1. # pip install MechanicalSoup
  2. import mechanicalsoup
  3. import sys
  4.  
  5. distribution = "ubuntu"
  6.  
  7. try:
  8.     # Create a browser object
  9.     br = mechanicalsoup.Browser()
  10.     url = "https://distrowatch.com/table.php?distribution={}".format(distribution)
  11.     page = br.get(url) # <Response [200]> # ok
  12.     td = page.soup.find("td", class_='TablesTitle')
  13.     title = td.h1.contents[0].strip() #ubuntu
  14.     print("Distro: {}\n".format(title))
  15.     last_update = td.div.contents[0].strip() # Last Update: 2019-02-19 21:20 UTC
  16.     print("last update: {}\n".format(last_update))
  17.     desc = td.ul.next_sibling.strip()
  18.     print("description: {}\n".format(desc))
  19. except Exception as e:
  20.     print("Error: {}".format(e))
  21.     sys.exit()
Add Comment
Please, Sign In to add comment