Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.chrome.options import Options
- import requests
- import asyncio
- from concurrent.futures import ThreadPoolExecutor
- input_f = open('urls.csv', 'r')
- output_f = open('output.txt', 'w')
- options = Options()
- options.add_argument("--headless")
- global count
- count = 0
- def fetch(session, url):
- global count
- count += 1
- print(count)
- driver = webdriver.Chrome(options=options)
- driver.get(url)
- print(url)
- if "<?xml" in driver.page_source:
- output_f.write(url)
- print('Fail\n========================')
- else:
- print('Success\n========================')
- driver.quit()
- async def get_data():
- with ThreadPoolExecutor(max_workers=20) as executor:
- with requests.Session() as session:
- loop = asyncio.get_event_loop()
- tasks = [
- loop.run_in_executor(
- executor,
- fetch,
- *(session, line)
- )
- for line in input_f
- ]
- for response in await asyncio.gather(*tasks):
- pass
- def main():
- loop = asyncio.get_event_loop()
- future = asyncio.ensure_future(get_data())
- loop.run_until_complete(future)
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement