am_dot_com

FP 2022-12-12

Dec 12th, 2022 (edited)
146
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.52 KB | None | 0 0
  1. # AmBotTools.py
  2. import certifi
  3. import ssl
  4. from urllib.request import urlopen, Request
  5. from http.client import HTTPResponse
  6.  
  7. import bs4
  8. from bs4 import BeautifulSoup
  9.  
  10. class AmBotTools:
  11.  
  12. def __init__(self):
  13. self.mCAFile = certifi.where()
  14. self.mSSLContext =\
  15. ssl.create_default_context(
  16. cafile=self.mCAFile
  17. )
  18. # def __init__
  19.  
  20. def consumeUrl(
  21. self,
  22. pUrl:str,
  23. pbPreferBytes:bool=False,
  24. pStrEncoding="UTF-8"
  25. ):
  26. response:HTTPResponse =\
  27. urlopen(
  28. url = pUrl,
  29. context=self.mSSLContext
  30. )
  31. theBytes = response.read()
  32. if(pbPreferBytes):
  33. return theBytes
  34. else:
  35. strResponse = str(
  36. theBytes,
  37. pStrEncoding
  38. )
  39. return strResponse
  40. # if-else
  41. # def consumeUrl
  42.  
  43. def getAnchors(
  44. self,
  45. pUrl:str
  46. ):
  47. listOfFoundAnchors = list() #
  48.  
  49. strConteudo = \
  50. self.consumeUrl(
  51. pUrl=pUrl
  52. )
  53. if (strConteudo):
  54. bs = BeautifulSoup(
  55. strConteudo,
  56. "html5lib"
  57. )
  58. if (bs):
  59. theAs = bs.findAll("a")
  60.  
  61. if (theAs):
  62. for anchor in theAs:
  63. texto:str = anchor.title
  64. bThereIsHref = "href" in anchor.attrs.keys()
  65. if (bThereIsHref):
  66. href = anchor.attrs["href"]
  67. else:
  68. href = ""
  69. # if-else
  70.  
  71. listOfFoundAnchors.append(
  72. {
  73. "anchor":texto,
  74. "href":href
  75. }
  76. )
  77. # for every anchor
  78. # if there are anchors
  79. # if it was possible to get a bs object
  80. # if there is content
  81.  
  82. return listOfFoundAnchors
  83. # def getAnchors
  84.  
  85. """
  86. escreva um método que permita filtrar
  87. uma lista de anchors
  88. incluindo no retorno apenas aquelas
  89. cujo atributo href
  90. contenha certa expressão.
  91. Por exemplo:
  92. AmBotTools.getFilteredByHrefAnchors(
  93. theAnchors, # uma lista
  94. "4cdn.org" # uma frase de filtro
  95. )
  96. """
  97. # class AmBotTools
  98.  
  99.  
  100. *****
  101.  
  102. # bot4chan.py
  103. # um bot para consumir a rede social 4chan.org
  104. from AmBotTools import AmBotTools
  105.  
  106. import bs4
  107. from bs4 import BeautifulSoup
  108.  
  109. """
  110. em HTML todo o conteúdo está na forma
  111. <marca a1=v1 a2=v2>conteúdo</marca>
  112. <p class="info">A taxa juro XPTO é 5.6%</p>
  113.  
  114. <a href="endereço">Texto do âncora</a>
  115. """
  116.  
  117. class Bot4Chan:
  118. def __init__(
  119. self,
  120. pStrBoard:str
  121. ):
  122. self.mBoard = pStrBoard
  123. # def __init__
  124.  
  125. # https://boards.4channel.org/<nome da board/
  126. def getUrlForBoard(self):
  127. strUrl =\
  128. f"https://boards.4channel.org/{self.mBoard}/"
  129. return strUrl
  130. # def getUrlForBoard
  131.  
  132. def getBoardAnchors(self):
  133. bot = AmBotTools()
  134. listOfDictsEachOneIsAnAnchor =\
  135. bot.getAnchors(
  136. pUrl = self.getUrlForBoard()
  137. )
  138. return listOfDictsEachOneIsAnAnchor
  139. # def getBoardAnchors
  140. # class Bot4Chan
  141.  
  142. bot = Bot4Chan("an")
  143. theAnchors = bot.getBoardAnchors()
  144. print (theAnchors)
Advertisement
Add Comment
Please, Sign In to add comment