Guest User

Untitled

a guest
Nov 27th, 2019
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.16 KB | None | 0 0
  1. import aiohttp
  2. import asyncio
  3. import sys
  4. import xml.etree.ElementTree as ET
  5.  
  6. def get_dict_from_file():
  7. loc_dict = []
  8. tree = ET.parse(sys.argv[1])
  9. root = tree.getroot()
  10. for url in root.findall('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
  11. loc = url.find('{http://www.sitemaps.org/schemas/sitemap/0.9}loc')
  12. loc_dict.append(loc.text)
  13. return loc_dict
  14.  
  15. async def fetch(session, url):
  16. async with session.get(url) as response:
  17. print(str(response.status) + " "+ str(response.url) + " " + response.headers['X-Edge-Cache1'])
  18. return await response.text()
  19.  
  20. async def fetch_all(session, urls):
  21. await asyncio.gather(*[asyncio.create_task(fetch(session, url))
  22. for url in urls])
  23.  
  24.  
  25. async def main(a):
  26. headers = {
  27. 'User-Agent': 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)',
  28. }
  29. urls = a
  30. for url in get_dict_from_file():
  31. urls.append(url)
  32. async with aiohttp.ClientSession(headers=headers) as session:
  33. await fetch_all(session, urls)
  34.  
  35.  
  36. if __name__ == '__main__':
  37. asyncio.run(main(get_dict_from_file()))
Advertisement
Add Comment
Please, Sign In to add comment