Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # AmBotTools.py
- import certifi
- import ssl
- from urllib.request import urlopen, Request
- from http.client import HTTPResponse
- import bs4
- from bs4 import BeautifulSoup
- class AmBotTools:
- def __init__(self):
- self.mCAFile = certifi.where()
- self.mSSLContext =\
- ssl.create_default_context(
- cafile=self.mCAFile
- )
- # def __init__
- def consumeUrl(
- self,
- pUrl:str,
- pbPreferBytes:bool=False,
- pStrEncoding="UTF-8"
- ):
- response:HTTPResponse =\
- urlopen(
- url = pUrl,
- context=self.mSSLContext
- )
- theBytes = response.read()
- if(pbPreferBytes):
- return theBytes
- else:
- strResponse = str(
- theBytes,
- pStrEncoding
- )
- return strResponse
- # if-else
- # def consumeUrl
- def getAnchors(
- self,
- pUrl:str
- ):
- listOfFoundAnchors = list() #
- strConteudo = \
- self.consumeUrl(
- pUrl=pUrl
- )
- if (strConteudo):
- bs = BeautifulSoup(
- strConteudo,
- "html5lib"
- )
- if (bs):
- theAs = bs.findAll("a")
- if (theAs):
- for anchor in theAs:
- texto:str = anchor.title
- bThereIsHref = "href" in anchor.attrs.keys()
- if (bThereIsHref):
- href = anchor.attrs["href"]
- else:
- href = ""
- # if-else
- listOfFoundAnchors.append(
- {
- "anchor":texto,
- "href":href
- }
- )
- # for every anchor
- # if there are anchors
- # if it was possible to get a bs object
- # if there is content
- return listOfFoundAnchors
- # def getAnchors
- """
- escreva um método que permita filtrar
- uma lista de anchors
- incluindo no retorno apenas aquelas
- cujo atributo href
- contenha certa expressão.
- Por exemplo:
- AmBotTools.getFilteredByHrefAnchors(
- theAnchors, # uma lista
- "4cdn.org" # uma frase de filtro
- )
- """
- # class AmBotTools
- *****
- # bot4chan.py
- # um bot para consumir a rede social 4chan.org
- from AmBotTools import AmBotTools
- import bs4
- from bs4 import BeautifulSoup
- """
- em HTML todo o conteúdo está na forma
- <marca a1=v1 a2=v2>conteúdo</marca>
- <p class="info">A taxa juro XPTO é 5.6%</p>
- <a href="endereço">Texto do âncora</a>
- """
- class Bot4Chan:
- def __init__(
- self,
- pStrBoard:str
- ):
- self.mBoard = pStrBoard
- # def __init__
- # https://boards.4channel.org/<nome da board/
- def getUrlForBoard(self):
- strUrl =\
- f"https://boards.4channel.org/{self.mBoard}/"
- return strUrl
- # def getUrlForBoard
- def getBoardAnchors(self):
- bot = AmBotTools()
- listOfDictsEachOneIsAnAnchor =\
- bot.getAnchors(
- pUrl = self.getUrlForBoard()
- )
- return listOfDictsEachOneIsAnAnchor
- # def getBoardAnchors
- # class Bot4Chan
- bot = Bot4Chan("an")
- theAnchors = bot.getBoardAnchors()
- print (theAnchors)
Advertisement
Add Comment
Please, Sign In to add comment