linkToSoup_scrapingAnt

## FIRST, register & get an API Token from https://scrapingant.com/

## facilitates simple requests+bs4 for sites with blockers, but
## !free tier allows a limited number of requests per month
## !it can be very slow (but the proxies can be useful)

## request/cloudscraper/HTMLSession version[s] at https://pastebin.com/rBTr06vy
## [simplified] selenium version at https://pastebin.com/VLZ2vPYK
## sample usage: similar to first example at https://pastebin.com/E3sCEr9r


import requests
from bs4 import BeautifulSoup
import urllib.parse

def linkToSoup_scrapingAnt(url_to_Scrape, pCountry=None, setResid=False
            apiKey=None, loadCss=None, fparser='html.parser', isv=True, returnErr=False):
    defaultKey = 'YOUR_API_TOKEN' # paste here
    sa_api = 'https://api.scrapingant.com/v2/general'
    sa_key = str(apiKey) if apiKey else defaultKey

    qParams = {'url': url_to_Scrape, 'x-api-key': sa_key}
    if setResid: qParams['proxy_type'] = 'residential' # more expensive
    if pCountry: qParams['proxy_country'] = pCountry # more expensive
    if loadCss: qParams['wait_for_selector'] = loadCss

    reqUrl = f'{sa_api}?{urllib.parse.urlencode(qParams)}'
    if isv: print('fetching with ScrapingAnt:', url_to_Scrape, '\nwith ', reqUrl)
    r = requests.get(reqUrl)

    try:
        if [*r.json()] == ['detail']:
            errMsg = f'{r.json()["detail"]} [<response {r.status_code}> {r.reason}] {r.url}'
    except:
        if r.status_code == 200: return BeautifulSoup(r.content, fparser)
        errMsg = f'failed to fetch page [{r.status_code} {r.reason}] {r.url}'
    if isv: print(errMsg)
    return errMsg if returnErr else None