Advertisement
Guest User

Untitled

a guest
Dec 16th, 2016
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.98 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import psycopg2
  3. import pygrametl
  4. from pygrametl.datasources import CSVSource
  5. from pygrametl.tables import Dimension, FactTable
  6.  
  7.  
  8. pgconn = psycopg2.connect(dbname='SantePubblic',user='postgres', password='0000')
  9. connection = pygrametl.ConnectionWrapper(pgconn)
  10. connection.setasdefault()
  11. connection.execute('set search_path to "SantePubblic"')
  12. print('connection établie')
  13.  
  14. accident_file_handle = open('SantePublic.csv', 'r', 16384)
  15. SanteSource = CSVSource(accident_file_handle, delimiter=';')
  16. print('lecture du fichier réussi')
  17. # Creation of dimension and fact table abstractions for use in the ETL flow
  18. TempsSoin_dimension = Dimension(
  19. name='TempsSoin',
  20. key='id_TempsSoin',
  21. attributes=['moisSoin', 'anneeSoin'])
  22.  
  23. TempsRem_dimension = Dimension(
  24. name='TempsRem',
  25. key='id_TempsRem',
  26. attributes=['moisRem','anneeRem'])
  27.  
  28.  
  29.  
  30. Assurance_dimension = Dimension(
  31. name='Assurance',
  32. key='id_Assurance',
  33. attributes=['natureAss','libelleAss']
  34. )
  35.  
  36. Prestation_dimension = Dimension(
  37. name='Prestation',
  38. key='id_Prestation',
  39. attributes=['libellePrestation']
  40. )
  41. Remboursement_Fact = FactTable(
  42. name='Remboursement',
  43. keyrefs=['id_TempsSoin', 'id_TempsRem', 'id_Prestation','id_Assurance'],
  44. measures=['montantRem','baseRem'])
  45.  
  46. # Python function needed to split the date into its three parts
  47. def transformerDate(row):
  48. """Splits a timestamp containing a date into its three parts"""
  49.  
  50. # Splitting of the timestamp into parts
  51.  
  52. # Assignment of each part to the dictionary
  53. #date_split = date.split('/')
  54. # Récupérer chaque élément à part et le rajouter dans le dictionnaire
  55. row['anneeSoin'] = row['sns_date'][0:4]
  56. print('annee :' + row['anneeSoin'])
  57. row['moisSoin'] = row['sns_date'][4:6]
  58.  
  59.  
  60. for row in SanteSource:
  61. print(row)
  62. row['id_TempsSoin']=row['sns_date']
  63. #
  64. transformerDate(row)
  65. row['id_TempsSoin'] = TempsSoin_dimension.ensure(row)
  66.  
  67.  
  68. row['id_TempsRem']=row['rem_date']
  69. row['anneeRem'] = row['rem_date'][0:4]
  70. row['moisRem'] = row['rem_date'][4:6]
  71.  
  72.  
  73. row['id_TempsRem'] = TempsRem_dimension.ensure(row)
  74. ##
  75.  
  76. row['id_Assurance'] = row['asu_nat']+row['l_asu_nat']
  77. row['natureAss'] = row['asu_nat']
  78. row['libelleAss'] = row['l_asu_nat']
  79. row['id_Assurance'] = Assurance_dimension.ensure(row)
  80. ##
  81.  
  82. row['id_Prestation'] = row['prs_nat']
  83. row['libellePrestation'] = row['l_prs_nat']
  84. Prestation_dimension.ensure(row)
  85.  
  86. ch=row['rem_mon'].replace(",",".")
  87. row['montantRem']=pygrametl.getfloat(ch)
  88. ch2=row['rec_mon'].replace(",",".")
  89. row['baseRem']=pygrametl.getfloat(ch2)
  90. ## # The row can then be inserted into the fact table
  91. Remboursement_Fact.ensure(row)
  92.  
  93. # The data warehouse connection is then ordered to commit and close
  94. connection.commit()
  95. connection.close()
  96.  
  97. # Finally the connection to the sales database is closed
  98. pgconn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement