Untitled

# import the required libraries
from bs4 import BeautifulSoup
import requests

base_URL = 'https://www.reuters.com'

# get the news category
category_URL = f'{base_URL}/technology/'

page_count = 1

data_links  = []

# specify ZenRows request parameters
params = {
    'url': category_URL,
    'apikey': '<YOUR_ZENROWS_API_KEY>',
    'js_render': 'true',
    'premium_proxy': 'true',
}

# route the request through ZenRows
response = requests.get('https://api.zenrows.com/v1/', params=params)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    print(f'Request successful for {category_URL}')

    # find matching class names to bypass obfuscated class characters
    article_links = soup.find_all('a', class_=lambda value: value and 'media-story-card__headline' in value)

    # iterate through link cards to get article unique hrefs
    for link in article_links:
        href = link.get('href')

        # merge article's unique URL with the base URL
        data_links.append(base_URL + href)
else:
    print(f'Error fetching links for page: {response.status_code}')

print(data_links)