Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import zipfile, urllib.request, shutil
- from lxml import html
- import string
- import os
- import json
- import requests
- import psycopg2
- page = requests.get('https://www.data.gouv.fr/fr/datasets/base-sirene-des-entreprises-et-de-leurs-etablissements-siren-siret/')
- tree = html.fromstring(page.content)
- buyers = tree.xpath('//h4[@class="list-group-item-heading ellipsis"]/a')
- lastUpdate = buyers[0].attrib['href']
- arrayHref = lastUpdate.split('/')
- zipName = arrayHref[-1]
- lastLetter = zipName[-5]
- extractName = ''
- if lastLetter == "Q":
- print('update')
- file_name = 'myzip.zip'
- with urllib.request.urlopen(lastUpdate) as response, open(file_name, 'wb') as out_file:
- shutil.copyfileobj(response, out_file)
- with zipfile.ZipFile(file_name) as zf:
- zf.extractall()
- infoZip = zf.infolist()
- for infoZips in infoZip:
- extractName = infoZips.filename
- else:
- print('no update')
- try:
- conn = psycopg2.connect("host='localhost' dbname='siren' user='postgres' password='password'")
- except:
- json_response = json.dumps({"response": "Error connecting to DB"})
- return json_response
- cur = conn.cursor()
- try:
- cur.execute("COPY siren_all FROM '/Library/PostgreSQL/9.6/%s' WITH DELIMITER ';' CSV HEADER ENCODING 'windows-1251'" % extractName)
- except:
- json_response = json.dumps({"response": "Error parsing data"})
- return json_response
- print(extractName)
- os.remove('myzip.zip')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement