Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # AmBot.py
- import certifi
- import ssl
- from urllib.request import urlopen, Request
- from http.client import HTTPResponse
- import bs4 # python -m pip install bs4
- from bs4 import BeautifulSoup
- class AmBot:
- KEY_ANCHOR = "anchor"
- KEY_HREF = "href"
- def __init__(self):
- self.mCAFile =\
- certifi.where()
- self.mSSLContext =\
- ssl.create_default_context(
- cafile=self.mCAFile
- )
- # def __init__
- # método dinâmico
- def consumeUrl(
- self,
- pUrl:str,
- pbConsiderBytes:bool=False,
- pStrEncoding:str="UTF-8"
- ):
- response:HTTPResponse = urlopen(
- url=pUrl,
- context=self.mSSLContext
- )
- if(response):
- theBytes = response.read()
- if(pbConsiderBytes):
- return theBytes
- else:
- strDecoded = str(
- theBytes,
- pStrEncoding
- )
- return strDecoded
- # if-else
- # if
- return None
- # def consumeUrl
- def getAnchorsAtUrl(
- self,
- pUrl:str
- ):
- ret = list()
- strHtml = self.consumeUrl(pUrl)
- if(strHtml):
- bs = BeautifulSoup(
- strHtml,
- "html5lib"
- )
- if(bs):
- theAs = bs.findAll("a")
- for a in theAs:
- bContainsHref =\
- "href" in a.attrs.keys()
- anchor = a.text
- if(bContainsHref):
- href = a.attrs["href"]
- else:
- href = ""
- # if-else
- ret.append(
- {
- AmBot.KEY_HREF:href,
- AmBot.KEY_ANCHOR:anchor
- }
- )
- # for every a
- # if bs
- # if html
- return ret
- # def getAnchorsAtUrl
- # class AmBot
- ***********************************
- # FourChanBot.py
- from AmBot import AmBot
- # https://boards.4channel.org/an/
- class FourChanBot:
- def getBoardAddress(
- self,
- pBoardName:str
- ):
- ret = f"https://boards.4channel.org/{pBoardName}"
- return ret
- # def getBoardAddress
- def getBoardAnchors(
- self,
- pBoardName:str # "an"
- ):
- bot = AmBot()
- theAnchors =\
- bot.getAnchorsAtUrl(
- self.getBoardAddress(pBoardName)
- )
- return theAnchors
- # def getBoardAnchors
- # def FourChanBot
- bot = FourChanBot()
- print (bot.getBoardAnchors("an"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement