Advertisement
Guest User

Untitled

a guest
Mar 3rd, 2020
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.24 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. from concurrent.futures.thread import ThreadPoolExecutor
  4.  
  5. main = "https://deltaimmigration.com.au/Australia-jobs/"
  6.  
  7.  
  8. def First():
  9.     r = requests.get(main)
  10.     soup = BeautifulSoup(r.text, 'html5lib')
  11.     links = []
  12.     names = []
  13.     with open("links.txt", 'w', newline="", encoding="UTF-8") as f:
  14.         for item in soup.findAll("td", {'width': '250'}):
  15.             name = item.contents[1].text
  16.             item = item.contents[1].get("href")[3:]
  17.             item = f"https://deltaimmigration.com.au/{item}"
  18.             f.write(item+"\n")
  19.             links.append(item)
  20.             names.append(name)
  21.     print(f"We Have Collected {len(links)} urls")
  22.     return links, names
  23.  
  24.  
  25. def Second(link, name):
  26.     r = requests.get(link)
  27.     soup = BeautifulSoup(r.text, 'html5lib')
  28.     for item in soup.findAll("table", {'width': '900'}):
  29.         with open(f"{name}.txt", 'w', newline="", encoding="UTF-8") as f:
  30.             f.write(item.text)
  31.             return(f"Saved {name}")
  32.  
  33.  
  34. with ThreadPoolExecutor(max_workers=10) as executor:
  35.     futures = {executor.submit(Second, link, name): (link, name)
  36.                for link, name in First()}
  37.  
  38. for future in futures:
  39.     print(future.result())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement