Advertisement
Guest User

code

a guest
Sep 12th, 2020
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.13 KB | None | 0 0
  1. from PyQt5.QtWebEngineWidgets import QWebEnginePage
  2. from PyQt5.QtWidgets import QApplication
  3. from PyQt5.QtCore import QUrl
  4. #from PyQt5.QtWebEngineWidgets import QtCore, QtGui, QtWebKit
  5. import bs4 as bs
  6. import sys
  7. import re
  8. import time
  9.  
  10. class Page(QWebEnginePage):
  11.  
  12. def __init__(self, url):
  13. self.app = QApplication(sys.argv)
  14. QWebEnginePage.__init__(self)
  15. self.loadd(url)
  16.  
  17. def loadd(self, url):
  18. self.html = ''
  19. self.loadFinished.connect(self._on_load_finished)
  20. self.load(QUrl(url))
  21. self.app.exec_()
  22.  
  23.  
  24. def _on_load_finished(self):
  25. self.html = self.toHtml(self.Callable)
  26. print('Load finished')
  27.  
  28. def Callable(self, html_str):
  29. self.html = html_str
  30. self.app.quit()
  31.  
  32. """
  33. def loadPage(url):
  34. page = QtWebKit.QWebPage()
  35. loop = QtCore.QEventLoop() # Create event loop
  36. page.mainFrame().loadFinished.connect(loop.quit) # Connect loadFinished to loop quit
  37. page.mainFrame().load(url)
  38. loop.exec_() # Run event loop, it will end on loadFinished
  39. return page.mainFrame().toHtml()
  40. """
  41.  
  42. def printShit(page):
  43. soup = bs.BeautifulSoup(page.html, 'html.parser')
  44. myTable = soup.findAll('table')
  45.  
  46.  
  47. leTable = myTable[0]
  48. for table in myTable:
  49. leTable = table
  50. if table.find(text=re.compile("Current International Class")):
  51. leTable = table
  52. print('-----------------------------------------------------------------------------------------------------------------------------------')
  53. break
  54.  
  55.  
  56.  
  57. youza = leTable.findAll('td')
  58. youza2 = youza[5]
  59. youza3 = str(youza2)
  60. youza4 = youza3[1:len(youza3)]
  61.  
  62. begin = youza4.find('>')
  63. end = youza4.find('<')
  64. print(youza4[begin+1:end])
  65.  
  66.  
  67. urls = []
  68. poop = open("links.txt", "r")
  69. urls += poop.readlines()
  70. poop.close()
  71. urls = list(set(urls))
  72.  
  73. page = Page(urls[0])
  74. #count = 0
  75.  
  76. for url in urls:
  77. #time.sleep(1)
  78. page.loadd(url)
  79. #loadPage(url)
  80. #time.sleep(2)
  81. #page = Page(url)
  82. printShit(page)
  83. print()
  84.  
  85.  
  86.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement