Advertisement
walkiriaapps

Script datos empresa

Mar 12th, 2022
294
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.69 KB | None | 0 0
  1. from csv import reader
  2. import requests
  3. from bs4 import BeautifulSoup
  4. import csv
  5.  
  6.  
  7. def search( str, table ):
  8.     for elements in table:
  9.         if(elements.find("th").text == str):
  10.             if(str != "Domicilio Social"):
  11.                 return elements.find("td").text.strip().replace("\n\t\t\t\t\t\t\t\t\t\t\t\t(CIF)", "")
  12.             else:
  13.                 div = elements.find("div", class_="adr")
  14.                 address = ""
  15.                 for span in div:
  16.                     address += " "+ span.text.strip()
  17.                 return address
  18.     return "-"
  19.  
  20.  
  21. # open file in read mode
  22. with open('urls_empresas_navarra.csv', 'r') as read_obj:
  23. # pass the file object to reader() to get the reader object
  24.     csv_reader = reader(read_obj)
  25. # Iterate over each row in the csv using reader object
  26.     for row in csv_reader:
  27. # row variable is a list that represents a row in csv
  28.         print(row[0])
  29.         for url in row:
  30.             URL = url
  31.             headers = {'User-Agent': 'Mozilla/6.0'}
  32.             page = requests.get(URL, headers=headers)
  33.             soup = BeautifulSoup(page.content, "html.parser")
  34.             #print(soup)
  35.             #soup = BeautifulSoup(open("detalle.html"), "html.parser")
  36.             companies = {}
  37.             table = soup.find("table", class_="vcard datos_ppales")
  38.             print(table)
  39.            
  40.             if 'denominacion' not in companies:
  41.                 companies['denominacion'] = search("Denominación", table)
  42.             if 'domicilio_social' not in companies:
  43.                 companies['domicilio_social'] = search("Domicilio Social", table)
  44.             if 'telefono' not in companies:
  45.                 companies['telefono'] = search("Teléfono", table)
  46.             if 'urls' not in companies:
  47.                 companies['urls'] = search("URLS", table)
  48.  
  49.             with open('detalle_empresas.csv', 'a', newline='') as csvfile:
  50.                 w = csv.DictWriter(csvfile, companies.keys())
  51.                 w.writerow(companies)
  52.  
  53.             print(companies)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement