BeautifulSoup Python Example

#!/usr/bin/env python3

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
import re


if __name__ == "__main__":

    # full url with htttps://...
    # use a custom user agent, as the python default one is blocked everywhere
    req = Request(
        "https://YOUR_WEBSITE",
        data=None,
        headers={
            'User-Agent': 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-en) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4'
        }
    )

    html = urlopen(req)

    # create a yummy soup from html
    soup = BeautifulSoup(html, 'html.parser')

    # find your table by id
    tbody = soup.find('table', {'id' : 'tableInfoParcels'}).tbody

    # get all td tags
    tds = [tr.td for tr in tbody.find_all('tr')]

    # get all texts between <b>...text</b>
    captions = [td.b.get_text() for td in tds]


    # get that address etc text from within the <b>....text...</b>
    texts = [td.b.get_text() for td in tds]

    info_texts = []
    for text in texts:
        # we want the data that is between (...) the parenthesis ans we want
        # for the . to be able to match new line characters.
        found = re.search(r"<b.*>.*</b>.*\"(.*)\"", text, re.DOTALL)

        if found:
            info_texts.append(found.group(1))


    for caption, info_text in zip(captions, info_texts):
        print(caption, info_text)