Advertisement
walkiriaapps

Script datos empresa

Mar 12th, 2022
43
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from csv import reader
  2. import requests
  3. from bs4 import BeautifulSoup
  4. import csv
  5.  
  6.  
  7. def search( str, table ):
  8.     for elements in table:
  9.         if(elements.find("th").text == str):
  10.             if(str != "Domicilio Social"):
  11.                 return elements.find("td").text.strip().replace("\n\t\t\t\t\t\t\t\t\t\t\t\t(CIF)", "")
  12.             else:
  13.                 div = elements.find("div", class_="adr")
  14.                 address = ""
  15.                 for span in div:
  16.                     address += " "+ span.text.strip()
  17.                 return address
  18.     return "-"
  19.  
  20.  
  21. # open file in read mode
  22. with open('urls_empresas_navarra.csv', 'r') as read_obj:
  23. # pass the file object to reader() to get the reader object
  24.     csv_reader = reader(read_obj)
  25. # Iterate over each row in the csv using reader object
  26.     for row in csv_reader:
  27. # row variable is a list that represents a row in csv
  28.         print(row[0])
  29.         for url in row:
  30.             URL = url
  31.             headers = {'User-Agent': 'Mozilla/6.0'}
  32.             page = requests.get(URL, headers=headers)
  33.             soup = BeautifulSoup(page.content, "html.parser")
  34.             #print(soup)
  35.             #soup = BeautifulSoup(open("detalle.html"), "html.parser")
  36.             companies = {}
  37.             table = soup.find("table", class_="vcard datos_ppales")
  38.             print(table)
  39.            
  40.             if 'denominacion' not in companies:
  41.                 companies['denominacion'] = search("Denominación", table)
  42.             if 'domicilio_social' not in companies:
  43.                 companies['domicilio_social'] = search("Domicilio Social", table)
  44.             if 'telefono' not in companies:
  45.                 companies['telefono'] = search("Teléfono", table)
  46.             if 'urls' not in companies:
  47.                 companies['urls'] = search("URLS", table)
  48.  
  49.             with open('detalle_empresas.csv', 'a', newline='') as csvfile:
  50.                 w = csv.DictWriter(csvfile, companies.keys())
  51.                 w.writerow(companies)
  52.  
  53.             print(companies)
Advertisement
Advertisement
Advertisement
RAW Paste Data Copied
Advertisement