Advertisement
robots_txt

Untitled

Dec 14th, 2018
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.88 KB | None | 0 0
  1. import asyncio
  2. from pyppeteer import launch
  3.  
  4. link = "https://stackoverflow.com/questions/tagged/web-scraping"
  5.  
  6. async def fetch(url):
  7. browser = await launch(headless=True,autoClose=False)
  8. page = await browser.newPage()
  9. await page.goto(url)
  10. linkstorage = []
  11. elements = await page.querySelectorAll('.summary .question-hyperlink')
  12. for element in elements:
  13. linkstorage.append(await page.evaluate('(element) => element.href', element))
  14. results = await asyncio.gather(*[browse_all_links(link, page) for link in linkstorage])
  15. return results
  16.  
  17. async def browse_all_links(link, page):
  18. await page.goto(link)
  19. title = await page.querySelectorEval('.question-hyperlink','(e => e.innerText)')
  20. print(title)
  21.  
  22. if __name__ == '__main__':
  23. loop = asyncio.get_event_loop()
  24. future = asyncio.ensure_future(fetch(link))
  25. loop.run_until_complete(future)
  26. loop.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement