Advertisement
Guest User

Untitled

a guest
Jun 26th, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.53 KB | None | 0 0
  1. import os
  2. from bs4 import BeautifulSoup
  3.  
  4. hrefs = []
  5. path = '/Volumes/UNTITLED/wimoveis/paginas/'
  6. for fname in os.listdir(path):
  7. print(fname)
  8. if ('.html' in fname) and ('._' not in fname):
  9. with open(path + fname, mode = 'r') as f:
  10. html = f.read()
  11. soup = BeautifulSoup(html)
  12. h4 = soup.find_all('h4', class_ = 'aviso-data-title')
  13. href = [e.find('a')['href'] for e in h4]
  14. hrefs += href
  15.  
  16. print(len(hrefs))
  17. df = pd.DataFrame(hrefs)
  18. df.to_csv('hrefs.csv', index = False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement