Advertisement
Gulnara

Bir

Mar 2nd, 2015
224
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.72 KB | None | 0 0
  1. from PyQt4 import QtCore, QtGui
  2. from spynner import browser, SpynnerTimeout
  3. import urlparse
  4. import time
  5.  
  6. url = 'http://books.google.kz/books?id=SUabUhG-0W4C&pg=PA19&hl=ru&source=gbs_toc_r&cad=3#v=onepage&q&f=false'
  7.  
  8. br = browser.Browser()
  9. br.user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.43 Safari/537.31'
  10.  
  11. links = {}
  12.  
  13. def url_filter(operation, url):
  14. urlp = urlparse.urlparse(url)
  15. query = urlparse.parse_qs(urlp.query)
  16. if urlp.netloc == 'books.google.ru' and urlp.path == '/books' and 'pg' in query and 'img' in query and 'w' in query:
  17. page = int(query['pg'][0][2:])
  18. if not page in links:
  19. links[page] = url
  20. return True
  21.  
  22. br.set_url_filter(url_filter)
  23. br.webpage.setViewportSize(QtCore.QSize(1024, 768))
  24.  
  25. br.create_webview()
  26. br.show()
  27.  
  28. br.load(url)
  29. try:
  30. br.wait_load(3)
  31. except SpynnerTimeout:
  32. pass
  33.  
  34. _press = QtGui.QKeyEvent(QtCore.QEvent.KeyPress, QtCore.Qt.Key_PageDown, QtCore.Qt.NoModifier)
  35. _release = QtGui.QKeyEvent(QtCore.QEvent.KeyRelease, QtCore.Qt.Key_PageDown, QtCore.Qt.NoModifier)
  36.  
  37. end_timeout = None
  38.  
  39. while True:
  40. QtGui.qApp.sendEvent(br.webview, _press)
  41. QtGui.qApp.sendEvent(br.webview, _release)
  42.  
  43. for_save = dict((p,l) for p,l in links.iteritems() if l)
  44.  
  45. if not for_save:
  46. if end_timeout is None:
  47. end_timeout = time.time()
  48. elif time.time()-end_timeout > 10:
  49. print 'END'
  50. break
  51. else:
  52. end_timeout = None
  53.  
  54. for p,l in for_save.iteritems():
  55. with open('/tmp/book/page_'+str(p)+'.jpg', 'wb') as f:
  56. br.download(l, f)
  57. links[p] = None
  58.  
  59. br.wait(0.1)
  60.  
  61. br.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement