Advertisement
Guest User

pyQtWebbrowser

a guest
Jul 21st, 2018
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.76 KB | None | 0 0
  1. import json
  2. import sys
  3. from datetime import time
  4. from time import time, sleep
  5.  
  6. import six
  7. from PyQt4.QtCore import QUrl, QString, QDateTime
  8. from PyQt4.QtCore import Qt, QByteArray, QTimer, QEventLoop, QEvent
  9. from PyQt4.QtGui import QApplication
  10. from PyQt4.QtGui import QImage, QPainter, QMouseEvent, QKeyEvent, QKeySequence
  11. from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkCookieJar, QNetworkCookie
  12. from PyQt4.QtWebKit import QWebElement
  13. from PyQt4.QtWebKit import QWebPage, QWebSettings, QWebView
  14.  
  15.  
  16. class NetworkAccessManager(QNetworkAccessManager):
  17.     def __init__(self):
  18.         """Subclass QNetworkAccessManager for finer control network operations
  19.  
  20.        proxy: the string of a proxy to download through
  21.        use_cache: whether to cache replies so that can load faster with the same content subsequent times
  22.        """
  23.         super(NetworkAccessManager, self).__init__()
  24.         self.sslErrors.connect(self.sslErrorHandler)
  25.         # the requests that are still active
  26.         self.active_requests = []
  27.         self.finished.connect(self._on_reply)
  28.  
  29.     def _on_reply(self, reply):
  30.         self._reply_url = six.u(toString(reply.url()))
  31.         self._reply_status = not bool(reply.error())
  32.    
  33.         def createRequest(self, operation, request, post):
  34.         """Override creating a network request
  35.        """
  36.         url = request.url().toString()
  37.         if str(request.url().path()).endswith('.ttf'):
  38.             # block fonts, which can cause webkit to crash
  39.             request.setUrl(QUrl())
  40.  
  41.         data = post if post is None else post.peek(MAX_POST_SIZE)
  42.  
  43.         reply = QNetworkAccessManager.createRequest(self, operation, request, post)
  44.         reply.error.connect(self.catch_error)
  45.         self.active_requests.append(reply)
  46.         reply.destroyed.connect(self.active_requests.remove)
  47.         # save reference to original request
  48.         reply.content = QByteArray()
  49.         reply.readyRead.connect(self._save_content(reply))
  50.  
  51.         reply.orig_request = request
  52.         reply.data = self.parse_data(data)
  53.         return reply
  54.        
  55.  
  56. class WebPage(QWebPage):
  57.     def __init__(self, user_agent, confirm=True):
  58.         """Override QWebPage to set User-Agent and JavaScript messages
  59.  
  60.        user_agent: the User Agent to submit
  61.        confirm: default response to confirm dialog boxes
  62.        """
  63.         super(WebPage, self).__init__()
  64.         self.user_agent = user_agent
  65.         default_user_agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
  66.         if user_agent is None:
  67.             self.user_agent = default_user_agent
  68.         self.confirm = confirm
  69.         self.setForwardUnsupportedContent(True)
  70.         self.setViewportSize(self.mainFrame().contentsSize())
  71.  
  72. class Browser(QWebView):
  73.     def __init__(self, gui=False, user_agent=None, load_images=True, load_javascript=True, load_java=True,
  74.                  load_plugins=True, timeout=20, delay=5, app=None):
  75.         """Widget class that contains the address bar, webview for rendering webpages, and a table for displaying results
  76.  
  77.        user_agent: the user-agent when downloading content
  78.        proxy: a QNetworkProxy to download through
  79.        load_images: whether to download images
  80.        load_javascript: whether to enable javascript
  81.        load_java: whether to enable java
  82.        load_plugins: whether to enable browser plugins
  83.        timeout: the maximum amount of seconds to wait for a request
  84.        delay: the minimum amount of seconds to wait between requests
  85.        app: QApplication object so that can instantiate multiple browser objects
  86.        use_cache: whether to cache all replies
  87.        """
  88.         # must instantiate the QApplication object before any other Qt objects
  89.         self.app = app or QApplication(sys.argv)
  90.         super(Browser, self).__init__()
  91.  
  92.         html_page = WebPage(user_agent)
  93.         manager = NetworkAccessManager()
  94.         html_page.setNetworkAccessManager(manager)
  95.         self.setPage(html_page)
  96.         html_page.networkAccessManager().finished.connect(self.finished)
  97.         self.adjustSize()
  98.         self.timeout = timeout
  99.         self.delay = delay
  100.         # self.reload()
  101.         if gui:
  102.             self.showNormal()
  103.             self.raise_()
  104.    
  105.         def get(self, url, html=None, headers=None, data=None):
  106.         """Load given url in webkit and return html when loaded
  107.  
  108.        url: the URL to load
  109.        html: optional HTML to set instead of downloading
  110.        headers: the headers to attach to the request
  111.        data: the data to POST
  112.        """
  113.         if isinstance(url, basestring):
  114.             # convert string to Qt's URL object
  115.             url = QUrl(url)
  116.         if html:
  117.             # load pre downloaded HTML
  118.             self.setContent(html, baseUrl=url)
  119.             return html
  120.         t1 = time()
  121.         loop = QEventLoop()
  122.         self.loadFinished.connect(loop.quit)
  123.  
  124.         # need to make network request
  125.         request = QNetworkRequest(url)
  126.         # headers could be list of strings or list of tuples:
  127.         # daca sunt strings se vor converti in list of tuples !
  128.         temp_headers = []
  129.         if headers:
  130.             if type(headers[0]) == tuple:
  131.                 # add headers to request when defined
  132.                 for header, value in headers:
  133.                     request.setRawHeader(header, value)
  134.             elif type(headers[0]) == str:
  135.                 for string_header in headers:
  136.                     temp_headers.append(tuple(string_header.split(":")))
  137.                 headers = temp_headers
  138.                 # add headers to request when defined
  139.                 for header, value in headers:
  140.                     request.setRawHeader(header, value)
  141.             else:
  142.                 print "Headers must be a list of strings OR a list of tuples !"
  143.  
  144.         fn = super(Browser, self)
  145.         if data:
  146.             # POST request
  147.             fn.load(request, QNetworkAccessManager.PostOperation, data)
  148.         else:
  149.             # GET request
  150.             fn.load(request)
  151.  
  152.         # set a timeout on the download loop
  153.         timer = QTimer()
  154.         timer.setSingleShot(True)
  155.         timer.timeout.connect(loop.quit)
  156.         timer.start(self.timeout * 1000)
  157.         loop.exec_()  # delay here until download finished or timeout
  158.  
  159.         if timer.isActive():
  160.             # downloaded successfully
  161.             timer.stop()
  162.             parsed_html = self.current_html()
  163.             self.wait(self.delay - (time() - t1))
  164.         else:
  165.             # did not download in time
  166.             parsed_html = ''
  167.         return parsed_html
  168.  
  169.  
  170. url = "https://www.flashscore.ro"
  171. # once script is working you can disable the gui
  172. MyBrowser = Browser(gui=True)
  173. MyBrowser.get(url)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement