Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from PyQt4 import QtCore, QtGui
- from spynner import browser, SpynnerTimeout
- import urlparse
- import time
- url = 'http://books.google.kz/books?id=SUabUhG-0W4C&pg=PA19&hl=ru&source=gbs_toc_r&cad=3#v=onepage&q&f=false'
- br = browser.Browser()
- br.user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.43 Safari/537.31'
- links = {}
- def url_filter(operation, url):
- urlp = urlparse.urlparse(url)
- query = urlparse.parse_qs(urlp.query)
- if urlp.netloc == 'books.google.ru' and urlp.path == '/books' and 'pg' in query and 'img' in query and 'w' in query:
- page = int(query['pg'][0][2:])
- if not page in links:
- links[page] = url
- return True
- br.set_url_filter(url_filter)
- br.webpage.setViewportSize(QtCore.QSize(1024, 768))
- br.create_webview()
- br.show()
- br.load(url)
- try:
- br.wait_load(3)
- except SpynnerTimeout:
- pass
- _press = QtGui.QKeyEvent(QtCore.QEvent.KeyPress, QtCore.Qt.Key_PageDown, QtCore.Qt.NoModifier)
- _release = QtGui.QKeyEvent(QtCore.QEvent.KeyRelease, QtCore.Qt.Key_PageDown, QtCore.Qt.NoModifier)
- end_timeout = None
- while True:
- QtGui.qApp.sendEvent(br.webview, _press)
- QtGui.qApp.sendEvent(br.webview, _release)
- for_save = dict((p,l) for p,l in links.iteritems() if l)
- if not for_save:
- if end_timeout is None:
- end_timeout = time.time()
- elif time.time()-end_timeout > 10:
- print 'END'
- break
- else:
- end_timeout = None
- for p,l in for_save.iteritems():
- with open('/tmp/book/page_'+str(p)+'.jpg', 'wb') as f:
- br.download(l, f)
- links[p] = None
- br.wait(0.1)
- br.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement