Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Import the libraries we want to use
- from bs4 import BeautifulSoup
- import requests
- #Tell the user we are starting
- print("Queensland Court List Scraper")
- #Print a blank line
- print()
- #Get the page
- url = "http://www.courts.qld.gov.au/__external/CourtsLawList/Brisbane.htm"
- result = requests.get(url)
- site = result.content
- #Parse it into something we can read (go from HTML to "soup")
- soup = BeautifulSoup(site,'lxml')
- #Get a list of all the types of sittings
- subListings = soup.find_all(class_="subListing")
- #Select only the ones that are sentences
- sentences = [x for x in subListings if x.string.strip() == "(Sentence)"]
- #For every sentence case we find
- for sentence in sentences:
- #Print the court name
- print(" ".join(sentence.parent.parent.parent.parent.previous_sibling.previous_sibling.previous_sibling.previous_sibling.string.strip().split(" ")[:2]))
- #Go up a row in the table and get the data
- sentenceRow = sentence.parent.previous_sibling.previous_sibling.contents
- #Print name
- print(sentenceRow[0].contents[1].string.strip())
- #Print name of judge
- print(sentenceRow[2].contents[0].string.strip())
- #Print court and floor
- print(sentenceRow[4].contents[0].string.strip() + ", " + sentenceRow[4].contents[2].string.strip())
- #Print time
- print(sentenceRow[6].contents[0].string.strip())
- #Print a blank line
- print();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement