Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import psycopg2
- import pygrametl
- from pygrametl.datasources import CSVSource
- from pygrametl.tables import Dimension, FactTable
- pgconn = psycopg2.connect(dbname='SantePubblic',user='postgres', password='0000')
- connection = pygrametl.ConnectionWrapper(pgconn)
- connection.setasdefault()
- connection.execute('set search_path to "SantePubblic"')
- print('connection établie')
- accident_file_handle = open('SantePublic.csv', 'r', 16384)
- SanteSource = CSVSource(accident_file_handle, delimiter=';')
- print('lecture du fichier réussi')
- # Creation of dimension and fact table abstractions for use in the ETL flow
- TempsSoin_dimension = Dimension(
- name='TempsSoin',
- key='id_TempsSoin',
- attributes=['moisSoin', 'anneeSoin'])
- TempsRem_dimension = Dimension(
- name='TempsRem',
- key='id_TempsRem',
- attributes=['moisRem','anneeRem'])
- Assurance_dimension = Dimension(
- name='Assurance',
- key='id_Assurance',
- attributes=['natureAss','libelleAss']
- )
- Prestation_dimension = Dimension(
- name='Prestation',
- key='id_Prestation',
- attributes=['libellePrestation']
- )
- Remboursement_Fact = FactTable(
- name='Remboursement',
- keyrefs=['id_TempsSoin', 'id_TempsRem', 'id_Prestation','id_Assurance'],
- measures=['montantRem','baseRem'])
- # Python function needed to split the date into its three parts
- def transformerDate(row):
- """Splits a timestamp containing a date into its three parts"""
- # Splitting of the timestamp into parts
- # Assignment of each part to the dictionary
- #date_split = date.split('/')
- # Récupérer chaque élément à part et le rajouter dans le dictionnaire
- row['anneeSoin'] = row['sns_date'][0:4]
- print('annee :' + row['anneeSoin'])
- row['moisSoin'] = row['sns_date'][4:6]
- for row in SanteSource:
- print(row)
- row['id_TempsSoin']=row['sns_date']
- #
- transformerDate(row)
- row['id_TempsSoin'] = TempsSoin_dimension.ensure(row)
- row['id_TempsRem']=row['rem_date']
- row['anneeRem'] = row['rem_date'][0:4]
- row['moisRem'] = row['rem_date'][4:6]
- row['id_TempsRem'] = TempsRem_dimension.ensure(row)
- ##
- row['id_Assurance'] = row['asu_nat']+row['l_asu_nat']
- row['natureAss'] = row['asu_nat']
- row['libelleAss'] = row['l_asu_nat']
- row['id_Assurance'] = Assurance_dimension.ensure(row)
- ##
- row['id_Prestation'] = row['prs_nat']
- row['libellePrestation'] = row['l_prs_nat']
- Prestation_dimension.ensure(row)
- ch=row['rem_mon'].replace(",",".")
- row['montantRem']=pygrametl.getfloat(ch)
- ch2=row['rec_mon'].replace(",",".")
- row['baseRem']=pygrametl.getfloat(ch2)
- ## # The row can then be inserted into the fact table
- Remboursement_Fact.ensure(row)
- # The data warehouse connection is then ordered to commit and close
- connection.commit()
- connection.close()
- # Finally the connection to the sales database is closed
- pgconn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement