Advertisement
Guest User

Untitled

a guest
Mar 28th, 2017
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.48 KB | None | 0 0
  1. import zipfile, urllib.request, shutil
  2. from lxml import html
  3. import string
  4. import os
  5. import json
  6. import requests
  7. import psycopg2
  8.  
  9. page = requests.get('https://www.data.gouv.fr/fr/datasets/base-sirene-des-entreprises-et-de-leurs-etablissements-siren-siret/')
  10.  
  11. tree = html.fromstring(page.content)
  12.  
  13. buyers = tree.xpath('//h4[@class="list-group-item-heading ellipsis"]/a')
  14.  
  15. lastUpdate = buyers[0].attrib['href']
  16.  
  17. arrayHref = lastUpdate.split('/')
  18.  
  19. zipName = arrayHref[-1]
  20.  
  21. lastLetter = zipName[-5]
  22.  
  23. extractName = ''
  24.  
  25. if lastLetter == "Q":
  26. print('update')
  27. file_name = 'myzip.zip'
  28.  
  29. with urllib.request.urlopen(lastUpdate) as response, open(file_name, 'wb') as out_file:
  30. shutil.copyfileobj(response, out_file)
  31. with zipfile.ZipFile(file_name) as zf:
  32. zf.extractall()
  33. infoZip = zf.infolist()
  34. for infoZips in infoZip:
  35. extractName = infoZips.filename
  36.  
  37.  
  38. else:
  39. print('no update')
  40.  
  41. try:
  42. conn = psycopg2.connect("host='localhost' dbname='siren' user='postgres' password='password'")
  43. except:
  44. json_response = json.dumps({"response": "Error connecting to DB"})
  45. return json_response
  46.  
  47. cur = conn.cursor()
  48.  
  49. try:
  50. cur.execute("COPY siren_all FROM '/Library/PostgreSQL/9.6/%s' WITH DELIMITER ';' CSV HEADER ENCODING 'windows-1251'" % extractName)
  51. except:
  52. json_response = json.dumps({"response": "Error parsing data"})
  53. return json_response
  54.  
  55. print(extractName)
  56. os.remove('myzip.zip')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement