Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import aiohttp
- import asyncio
- import sys
- import xml.etree.ElementTree as ET
- def get_dict_from_file():
- loc_dict = []
- tree = ET.parse(sys.argv[1])
- root = tree.getroot()
- for url in root.findall('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
- loc = url.find('{http://www.sitemaps.org/schemas/sitemap/0.9}loc')
- loc_dict.append(loc.text)
- return loc_dict
- async def fetch(session, url):
- async with session.get(url) as response:
- print(str(response.status) + " "+ str(response.url) + " " + response.headers['X-Edge-Cache1'])
- return await response.text()
- async def fetch_all(session, urls):
- await asyncio.gather(*[asyncio.create_task(fetch(session, url))
- for url in urls])
- async def main(a):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)',
- }
- urls = a
- for url in get_dict_from_file():
- urls.append(url)
- async with aiohttp.ClientSession(headers=headers) as session:
- await fetch_all(session, urls)
- if __name__ == '__main__':
- asyncio.run(main(get_dict_from_file()))
Advertisement
Add Comment
Please, Sign In to add comment