Advertisement
Guest User

Convert CSV or TAB delimited txt to SQL Insert script

a guest
Aug 23rd, 2010
876
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.33 KB | None | 0 0
  1. # coding=UTF-8
  2. '''
  3. Created on Aug 21, 2010
  4. @author: bpgergo
  5. convert the excel file (or csv or tab delimited txt) to SQL inserts
  6. '''
  7.  
  8. '''this is the field delimiter in the input file'''
  9. field_delimiter = '\t'
  10.  
  11.  
  12. '''these are the fields of the Database table'''
  13. sql_fields = ['ID',
  14.               'ENTERPRISE_NAME',
  15.               'TAX_NUMBER',
  16.               'FAMILY_NAME',
  17.               'GIVEN_NAME',
  18.               'ENTERPRISE_REGISTRATION_NUMBER',
  19.               'PREMISE_NAME',
  20.               'HOUSE_ID',
  21.               'POSTAL_CODE',
  22.               'PHONE_NUMBER_1',
  23.               'PHONE_NUMBER_2',
  24.               'FAX_NUMBER',
  25.               'BANK_IDENTIFIER',
  26.               'BANK_ACCOUNT_NUMBER',
  27.               'NUMBER_OF_EMPLOYEES',
  28.               'SETTLEMENT_NUMBER',
  29.               'REGISTRATION_NUMBER',
  30.               'PROCESSED_BY_JOB',
  31.               'GENDER',
  32.               'ACTIVITY',
  33.               'REG_HOLDER_ACTIVITY'
  34.               ]
  35.  
  36.  
  37. '''Excel Header: these are the field names in the Excel (that is the values in the first row)'''
  38. excel_fields = ['"red.Br."', \
  39.                 'Name', #/ Naziv pravnog lica ili preduzetnika\
  40.                 'Firstname and last name of legal representative', # / Ime i prezime osobeovlašcene za zastupanje"\
  41.                 'TAX ID', # / PIB\
  42.                 'Enterprise Registry number', # / Maticnibroj\
  43.                 'Seat', # / Sedište\
  44.                 'Street', # / Adresa\
  45.                 'House number', # / Broj\
  46.                 'Seat Zip code', # / Broj pošte\
  47.                 'Municipality',# / Opština\
  48.                 'Phone', # / brojtelefona\
  49.                 'Fax number', # / faks\
  50.                 'Bank name', # / banka\
  51.                 'Bank account number', # / broj racunau banci\
  52.                 'Number of employees', # / brojzaposlenih\
  53.                 'BPG',
  54.                 'Already in IACS',
  55.                 'settlement number',
  56.                 'gender']
  57.  
  58. '''This is the mapping between the Excel field names and the SQL field names
  59. '''
  60. mapping = { 'ID' : None,
  61.             'ENTERPRISE_NAME' : 'Name',
  62.             'TAX_NUMBER' : 'TAX ID',
  63.             'FAMILY_NAME' : 'Firstname and last name of legal representative',
  64.             'GIVEN_NAME' : 'Firstname and last name of legal representative',
  65.             'ENTERPRISE_REGISTRATION_NUMBER' : 'Enterprise Registry number',
  66.             'PREMISE_NAME' : 'Street',
  67.             'HOUSE_ID' : 'House number',
  68.             'POSTAL_CODE' : 'Seat Zip code',
  69.             'PHONE_NUMBER_1' : 'Phone',
  70.             'PHONE_NUMBER_2' : 'Phone',
  71.             'FAX_NUMBER' : 'Fax number',
  72.             'BANK_IDENTIFIER' : 'Bank account number',
  73.             'BANK_ACCOUNT_NUMBER' : 'Bank account number',
  74.             'NUMBER_OF_EMPLOYEES' : 'Number of employees',
  75.             'SETTLEMENT_NUMBER' : 'settlement number',
  76.             'REGISTRATION_NUMBER' : 'settlement number',
  77.             'GENDER' : 'gender',
  78.             'ACTIVITY' : None,
  79.             'REG_HOLDER_ACTIVITY' : None,
  80.             'PROCESSED_BY_JOB' : None
  81.         }
  82.  
  83. '''insert statement constants'''
  84. insert_start = ''.join(['insert into MIG_FARM (', ','.join(sql_fields),  ') values ('])
  85. insert_end = ');'
  86. insert_reg_number = "(select 'X' || (nvl(substr(max(registration_number), 2), 0) + 1) from mig_farm where settlement_number = %s and registration_number like 'X%%')"
  87.  
  88. '''these functions are used for converting Ecxel field values into database field values'''
  89. def wrap_str(str):
  90.     res = str.strip('" ').replace('&', "&'||'")
  91.     if res:
  92.         return ''.join(["'",res,"'"])
  93.     else:
  94.         return 'null'
  95.  
  96. ''' input format:" 017 812 099 063 400 337"
  97. note that these are two telephone numbers
  98. first=true returns first number (017812099)
  99. '''
  100. def get_phone_number(str, first):
  101.     tel_nums = str.strip(' "').split()
  102.     if len(tel_nums) < 6:
  103.         return ''.join(tel_nums)
  104.     else:
  105.         if first:
  106.             return ''.join(tel_nums[:3])
  107.         else:
  108.             return ''.join(tel_nums[3:6])
  109.  
  110. def wrap_null(str):
  111.     if str:
  112.         return str
  113.     else:
  114.         return 'null'
  115.  
  116.  
  117. '''these are the function mapping
  118. for each Database field there is a function
  119. which is applied on the value of the mapped Excel field
  120. to get the actual value that will beinserted in the DB
  121. '''
  122. map_func = { 'ID' : lambda x : 'mig_farm_seq.nextval',
  123.              'ENTERPRISE_NAME' : wrap_str,
  124.              'TAX_NUMBER' : wrap_str,
  125.              'FAMILY_NAME' : lambda x : wrap_str(x.split()[0]),
  126.              'GIVEN_NAME' : lambda x : wrap_str(x.split()[1]),
  127.              'ENTERPRISE_REGISTRATION_NUMBER' : wrap_str,
  128.              'PREMISE_NAME' : wrap_str,
  129.              'HOUSE_ID' : wrap_str,
  130.              'POSTAL_CODE' : wrap_str,
  131.              'PHONE_NUMBER_1' : lambda x : wrap_str(get_phone_number(x, True)),
  132.              'PHONE_NUMBER_2' : lambda x : wrap_str(get_phone_number(x, False)),
  133.              'FAX_NUMBER' : lambda x : wrap_str(x.replace(' ', '')),
  134.              'BANK_IDENTIFIER' : lambda x : wrap_str(x.replace(' ', '').replace('-', '')[:3]),
  135.              'BANK_ACCOUNT_NUMBER' : lambda x : wrap_str(x.replace(' ', '').replace('-', '')),
  136.              'NUMBER_OF_EMPLOYEES' : wrap_null,
  137.              'SETTLEMENT_NUMBER' : wrap_null,
  138.              'REGISTRATION_NUMBER' : lambda x : insert_reg_number % x,
  139.              'GENDER' : wrap_str,
  140.              'ACTIVITY' : lambda x : '0',
  141.              'REG_HOLDER_ACTIVITY' : lambda x : '0',
  142.              'PROCESSED_BY_JOB' : lambda x : '0'
  143.          }
  144.  
  145. '''safe getter for the splittedline'''
  146. def get(name, splittedline):
  147.     if name:
  148.         return splittedline[excel_fields.index(name)]
  149.     else:
  150.         return None
  151.  
  152. ''' filter out wrong lines
  153. '''
  154. def filter_lines(splittedline):
  155.     return (len(splittedline) > 1) \
  156.         and (not ((len(splittedline[0])>0) and (splittedline[0][0]=='#' or splittedline[0][:4]=='"red'))) \
  157.         and splittedline[1].strip() \
  158.         #and (not (get('Already in IACS', splittedline).strip()=='Yes'))
  159.  
  160. ''' output an insert statement from a splitted line
  161. '''
  162. def print_insert(splittedline):
  163.     print ''.join([insert_start, ','.join(map(lambda x : map_func[x](get(mapping[x],splittedline)), sql_fields)), insert_end])
  164.  
  165. import sys
  166. f = open(sys.argv[1], 'r')
  167. map(print_insert, filter(filter_lines, map(lambda x : x.split(field_delimiter), f.readlines())))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement