Advertisement
Guest User

Untitled

a guest
Aug 22nd, 2017
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.34 KB | None | 0 0
  1. #Import the libraries we want to use
  2. from bs4 import BeautifulSoup
  3. import requests
  4.  
  5. #Tell the user we are starting
  6. print("Queensland Court List Scraper")
  7. #Print a blank line
  8. print()
  9.  
  10. #Get the page
  11. url = "http://www.courts.qld.gov.au/__external/CourtsLawList/Brisbane.htm"
  12. result = requests.get(url)
  13. site = result.content
  14. #Parse it into something we can read (go from HTML to "soup")
  15. soup = BeautifulSoup(site,'lxml')
  16.  
  17. #Get a list of all the types of sittings
  18. subListings = soup.find_all(class_="subListing")
  19. #Select only the ones that are sentences
  20. sentences = [x for x in subListings if x.string.strip() == "(Sentence)"]
  21.  
  22. #For every sentence case we find
  23. for sentence in sentences:
  24.     #Print the court name
  25.     print(" ".join(sentence.parent.parent.parent.parent.previous_sibling.previous_sibling.previous_sibling.previous_sibling.string.strip().split(" ")[:2]))
  26.     #Go up a row in the table and get the data
  27.     sentenceRow = sentence.parent.previous_sibling.previous_sibling.contents
  28.     #Print name
  29.     print(sentenceRow[0].contents[1].string.strip())
  30.     #Print name of judge
  31.     print(sentenceRow[2].contents[0].string.strip())
  32.     #Print court and floor
  33.     print(sentenceRow[4].contents[0].string.strip() + ", " + sentenceRow[4].contents[2].string.strip())
  34.     #Print time
  35.     print(sentenceRow[6].contents[0].string.strip())
  36.     #Print a blank line
  37.     print();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement