Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- from bs4 import BeautifulSoup
- hrefs = []
- path = '/Volumes/UNTITLED/wimoveis/paginas/'
- for fname in os.listdir(path):
- print(fname)
- if ('.html' in fname) and ('._' not in fname):
- with open(path + fname, mode = 'r') as f:
- html = f.read()
- soup = BeautifulSoup(html)
- h4 = soup.find_all('h4', class_ = 'aviso-data-title')
- href = [e.find('a')['href'] for e in h4]
- hrefs += href
- print(len(hrefs))
- df = pd.DataFrame(hrefs)
- df.to_csv('hrefs.csv', index = False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement