Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2019
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.35 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.webdriver.common.keys import Keys  
  3. from selenium.webdriver.chrome.options import Options
  4. import requests
  5. import asyncio
  6. from concurrent.futures import ThreadPoolExecutor
  7.  
  8. input_f = open('urls.csv', 'r')
  9. output_f = open('output.txt', 'w')
  10.  
  11. options = Options()  
  12. options.add_argument("--headless")
  13. global count
  14. count = 0
  15.  
  16. def fetch(session, url):
  17.     global count
  18.     count += 1
  19.     print(count)
  20.     driver = webdriver.Chrome(options=options)
  21.     driver.get(url)
  22.     print(url)
  23.     if "<?xml" in driver.page_source:
  24.         output_f.write(url)
  25.         print('Fail\n========================')
  26.     else:
  27.         print('Success\n========================')
  28.     driver.quit()
  29.  
  30.  
  31. async def get_data():
  32.     with ThreadPoolExecutor(max_workers=20) as executor:
  33.         with requests.Session() as session:
  34.             loop = asyncio.get_event_loop()
  35.             tasks = [
  36.                 loop.run_in_executor(
  37.                     executor,
  38.                     fetch,
  39.                     *(session, line)
  40.                 )
  41.                 for line in input_f
  42.             ]
  43.             for response in await asyncio.gather(*tasks):
  44.                 pass
  45.  
  46. def main():
  47.     loop = asyncio.get_event_loop()
  48.     future = asyncio.ensure_future(get_data())
  49.     loop.run_until_complete(future)
  50.  
  51. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement