Advertisement
am_dot_com

IA 2022-12-12

Dec 12th, 2022 (edited)
41
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.76 KB | None | 0 0
  1. # AmBot.py
  2. import certifi
  3. import ssl
  4. from urllib.request import urlopen, Request
  5. from http.client import HTTPResponse
  6.  
  7. import bs4 # python -m pip install bs4
  8. from bs4 import BeautifulSoup
  9.  
  10. class AmBot:
  11. KEY_ANCHOR = "anchor"
  12. KEY_HREF = "href"
  13.  
  14. def __init__(self):
  15. self.mCAFile =\
  16. certifi.where()
  17.  
  18. self.mSSLContext =\
  19. ssl.create_default_context(
  20. cafile=self.mCAFile
  21. )
  22. # def __init__
  23.  
  24. # método dinâmico
  25. def consumeUrl(
  26. self,
  27. pUrl:str,
  28. pbConsiderBytes:bool=False,
  29. pStrEncoding:str="UTF-8"
  30. ):
  31. response:HTTPResponse = urlopen(
  32. url=pUrl,
  33. context=self.mSSLContext
  34. )
  35. if(response):
  36. theBytes = response.read()
  37. if(pbConsiderBytes):
  38. return theBytes
  39. else:
  40. strDecoded = str(
  41. theBytes,
  42. pStrEncoding
  43. )
  44. return strDecoded
  45. # if-else
  46. # if
  47. return None
  48. # def consumeUrl
  49.  
  50. def getAnchorsAtUrl(
  51. self,
  52. pUrl:str
  53. ):
  54. ret = list()
  55.  
  56. strHtml = self.consumeUrl(pUrl)
  57. if(strHtml):
  58. bs = BeautifulSoup(
  59. strHtml,
  60. "html5lib"
  61. )
  62. if(bs):
  63. theAs = bs.findAll("a")
  64. for a in theAs:
  65. bContainsHref =\
  66. "href" in a.attrs.keys()
  67. anchor = a.text
  68. if(bContainsHref):
  69. href = a.attrs["href"]
  70. else:
  71. href = ""
  72. # if-else
  73. ret.append(
  74. {
  75. AmBot.KEY_HREF:href,
  76. AmBot.KEY_ANCHOR:anchor
  77. }
  78. )
  79. # for every a
  80. # if bs
  81. # if html
  82. return ret
  83. # def getAnchorsAtUrl
  84. # class AmBot
  85.  
  86. ***********************************
  87.  
  88. # FourChanBot.py
  89. from AmBot import AmBot
  90.  
  91. # https://boards.4channel.org/an/
  92. class FourChanBot:
  93. def getBoardAddress(
  94. self,
  95. pBoardName:str
  96. ):
  97. ret = f"https://boards.4channel.org/{pBoardName}"
  98. return ret
  99. # def getBoardAddress
  100.  
  101. def getBoardAnchors(
  102. self,
  103. pBoardName:str # "an"
  104. ):
  105. bot = AmBot()
  106. theAnchors =\
  107. bot.getAnchorsAtUrl(
  108. self.getBoardAddress(pBoardName)
  109. )
  110. return theAnchors
  111. # def getBoardAnchors
  112. # def FourChanBot
  113.  
  114. bot = FourChanBot()
  115. print (bot.getBoardAnchors("an"))
  116.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement