Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- from concurrent.futures.thread import ThreadPoolExecutor
- main = "https://deltaimmigration.com.au/Australia-jobs/"
- def First():
- r = requests.get(main)
- soup = BeautifulSoup(r.text, 'html5lib')
- links = []
- names = []
- with open("links.txt", 'w', newline="", encoding="UTF-8") as f:
- for item in soup.findAll("td", {'width': '250'}):
- name = item.contents[1].text
- item = item.contents[1].get("href")[3:]
- item = f"https://deltaimmigration.com.au/{item}"
- f.write(item+"\n")
- links.append(item)
- names.append(name)
- print(f"We Have Collected {len(links)} urls")
- return links, names
- def Second(link, name):
- r = requests.get(link)
- soup = BeautifulSoup(r.text, 'html5lib')
- for item in soup.findAll("table", {'width': '900'}):
- with open(f"{name}.txt", 'w', newline="", encoding="UTF-8") as f:
- f.write(item.text)
- return(f"Saved {name}")
- with ThreadPoolExecutor(max_workers=10) as executor:
- futures = {executor.submit(Second, link, name): (link, name)
- for link, name in First()}
- for future in futures:
- print(future.result())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement