Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import sys
- from datetime import time
- from time import time, sleep
- import six
- from PyQt4.QtCore import QUrl, QString, QDateTime
- from PyQt4.QtCore import Qt, QByteArray, QTimer, QEventLoop, QEvent
- from PyQt4.QtGui import QApplication
- from PyQt4.QtGui import QImage, QPainter, QMouseEvent, QKeyEvent, QKeySequence
- from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkCookieJar, QNetworkCookie
- from PyQt4.QtWebKit import QWebElement
- from PyQt4.QtWebKit import QWebPage, QWebSettings, QWebView
- class NetworkAccessManager(QNetworkAccessManager):
- def __init__(self):
- """Subclass QNetworkAccessManager for finer control network operations
- proxy: the string of a proxy to download through
- use_cache: whether to cache replies so that can load faster with the same content subsequent times
- """
- super(NetworkAccessManager, self).__init__()
- self.sslErrors.connect(self.sslErrorHandler)
- # the requests that are still active
- self.active_requests = []
- self.finished.connect(self._on_reply)
- def _on_reply(self, reply):
- self._reply_url = six.u(toString(reply.url()))
- self._reply_status = not bool(reply.error())
- def createRequest(self, operation, request, post):
- """Override creating a network request
- """
- url = request.url().toString()
- if str(request.url().path()).endswith('.ttf'):
- # block fonts, which can cause webkit to crash
- request.setUrl(QUrl())
- data = post if post is None else post.peek(MAX_POST_SIZE)
- reply = QNetworkAccessManager.createRequest(self, operation, request, post)
- reply.error.connect(self.catch_error)
- self.active_requests.append(reply)
- reply.destroyed.connect(self.active_requests.remove)
- # save reference to original request
- reply.content = QByteArray()
- reply.readyRead.connect(self._save_content(reply))
- reply.orig_request = request
- reply.data = self.parse_data(data)
- return reply
- class WebPage(QWebPage):
- def __init__(self, user_agent, confirm=True):
- """Override QWebPage to set User-Agent and JavaScript messages
- user_agent: the User Agent to submit
- confirm: default response to confirm dialog boxes
- """
- super(WebPage, self).__init__()
- self.user_agent = user_agent
- default_user_agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
- if user_agent is None:
- self.user_agent = default_user_agent
- self.confirm = confirm
- self.setForwardUnsupportedContent(True)
- self.setViewportSize(self.mainFrame().contentsSize())
- class Browser(QWebView):
- def __init__(self, gui=False, user_agent=None, load_images=True, load_javascript=True, load_java=True,
- load_plugins=True, timeout=20, delay=5, app=None):
- """Widget class that contains the address bar, webview for rendering webpages, and a table for displaying results
- user_agent: the user-agent when downloading content
- proxy: a QNetworkProxy to download through
- load_images: whether to download images
- load_javascript: whether to enable javascript
- load_java: whether to enable java
- load_plugins: whether to enable browser plugins
- timeout: the maximum amount of seconds to wait for a request
- delay: the minimum amount of seconds to wait between requests
- app: QApplication object so that can instantiate multiple browser objects
- use_cache: whether to cache all replies
- """
- # must instantiate the QApplication object before any other Qt objects
- self.app = app or QApplication(sys.argv)
- super(Browser, self).__init__()
- html_page = WebPage(user_agent)
- manager = NetworkAccessManager()
- html_page.setNetworkAccessManager(manager)
- self.setPage(html_page)
- html_page.networkAccessManager().finished.connect(self.finished)
- self.adjustSize()
- self.timeout = timeout
- self.delay = delay
- # self.reload()
- if gui:
- self.showNormal()
- self.raise_()
- def get(self, url, html=None, headers=None, data=None):
- """Load given url in webkit and return html when loaded
- url: the URL to load
- html: optional HTML to set instead of downloading
- headers: the headers to attach to the request
- data: the data to POST
- """
- if isinstance(url, basestring):
- # convert string to Qt's URL object
- url = QUrl(url)
- if html:
- # load pre downloaded HTML
- self.setContent(html, baseUrl=url)
- return html
- t1 = time()
- loop = QEventLoop()
- self.loadFinished.connect(loop.quit)
- # need to make network request
- request = QNetworkRequest(url)
- # headers could be list of strings or list of tuples:
- # daca sunt strings se vor converti in list of tuples !
- temp_headers = []
- if headers:
- if type(headers[0]) == tuple:
- # add headers to request when defined
- for header, value in headers:
- request.setRawHeader(header, value)
- elif type(headers[0]) == str:
- for string_header in headers:
- temp_headers.append(tuple(string_header.split(":")))
- headers = temp_headers
- # add headers to request when defined
- for header, value in headers:
- request.setRawHeader(header, value)
- else:
- print "Headers must be a list of strings OR a list of tuples !"
- fn = super(Browser, self)
- if data:
- # POST request
- fn.load(request, QNetworkAccessManager.PostOperation, data)
- else:
- # GET request
- fn.load(request)
- # set a timeout on the download loop
- timer = QTimer()
- timer.setSingleShot(True)
- timer.timeout.connect(loop.quit)
- timer.start(self.timeout * 1000)
- loop.exec_() # delay here until download finished or timeout
- if timer.isActive():
- # downloaded successfully
- timer.stop()
- parsed_html = self.current_html()
- self.wait(self.delay - (time() - t1))
- else:
- # did not download in time
- parsed_html = ''
- return parsed_html
- url = "https://www.flashscore.ro"
- # once script is working you can disable the gui
- MyBrowser = Browser(gui=True)
- MyBrowser.get(url)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement