# coding=UTF-8
'''
Created on Aug 21, 2010
@author: bpgergo
convert the excel file (or csv or tab delimited txt) to SQL inserts
'''
'''this is the field delimiter in the input file'''
field_delimiter = '\t'
'''these are the fields of the Database table'''
sql_fields = ['ID',
'ENTERPRISE_NAME',
'TAX_NUMBER',
'FAMILY_NAME',
'GIVEN_NAME',
'ENTERPRISE_REGISTRATION_NUMBER',
'PREMISE_NAME',
'HOUSE_ID',
'POSTAL_CODE',
'PHONE_NUMBER_1',
'PHONE_NUMBER_2',
'FAX_NUMBER',
'BANK_IDENTIFIER',
'BANK_ACCOUNT_NUMBER',
'NUMBER_OF_EMPLOYEES',
'SETTLEMENT_NUMBER',
'REGISTRATION_NUMBER',
'PROCESSED_BY_JOB',
'GENDER',
'ACTIVITY',
'REG_HOLDER_ACTIVITY'
]
'''Excel Header: these are the field names in the Excel (that is the values in the first row)'''
excel_fields = ['"red.Br."', \
'Name', #/ Naziv pravnog lica ili preduzetnika\
'Firstname and last name of legal representative', # / Ime i prezime osobeovlašcene za zastupanje"\
'TAX ID', # / PIB\
'Enterprise Registry number', # / Maticnibroj\
'Seat', # / Sedište\
'Street', # / Adresa\
'House number', # / Broj\
'Seat Zip code', # / Broj pošte\
'Municipality',# / Opština\
'Phone', # / brojtelefona\
'Fax number', # / faks\
'Bank name', # / banka\
'Bank account number', # / broj racunau banci\
'Number of employees', # / brojzaposlenih\
'BPG',
'Already in IACS',
'settlement number',
'gender']
'''This is the mapping between the Excel field names and the SQL field names
'''
mapping = { 'ID' : None,
'ENTERPRISE_NAME' : 'Name',
'TAX_NUMBER' : 'TAX ID',
'FAMILY_NAME' : 'Firstname and last name of legal representative',
'GIVEN_NAME' : 'Firstname and last name of legal representative',
'ENTERPRISE_REGISTRATION_NUMBER' : 'Enterprise Registry number',
'PREMISE_NAME' : 'Street',
'HOUSE_ID' : 'House number',
'POSTAL_CODE' : 'Seat Zip code',
'PHONE_NUMBER_1' : 'Phone',
'PHONE_NUMBER_2' : 'Phone',
'FAX_NUMBER' : 'Fax number',
'BANK_IDENTIFIER' : 'Bank account number',
'BANK_ACCOUNT_NUMBER' : 'Bank account number',
'NUMBER_OF_EMPLOYEES' : 'Number of employees',
'SETTLEMENT_NUMBER' : 'settlement number',
'REGISTRATION_NUMBER' : 'settlement number',
'GENDER' : 'gender',
'ACTIVITY' : None,
'REG_HOLDER_ACTIVITY' : None,
'PROCESSED_BY_JOB' : None
}
'''insert statement constants'''
insert_start = ''.join(['insert into MIG_FARM (', ','.join(sql_fields), ') values ('])
insert_end = ');'
insert_reg_number = "(select 'X' || (nvl(substr(max(registration_number), 2), 0) + 1) from mig_farm where settlement_number = %s and registration_number like 'X%%')"
'''these functions are used for converting Ecxel field values into database field values'''
def wrap_str(str):
res = str.strip('" ').replace('&', "&'||'")
if res:
return ''.join(["'",res,"'"])
else:
return 'null'
''' input format:" 017 812 099 063 400 337"
note that these are two telephone numbers
first=true returns first number (017812099)
'''
def get_phone_number(str, first):
tel_nums = str.strip(' "').split()
if len(tel_nums) < 6:
return ''.join(tel_nums)
else:
if first:
return ''.join(tel_nums[:3])
else:
return ''.join(tel_nums[3:6])
def wrap_null(str):
if str:
return str
else:
return 'null'
'''these are the function mapping
for each Database field there is a function
which is applied on the value of the mapped Excel field
to get the actual value that will beinserted in the DB
'''
map_func = { 'ID' : lambda x : 'mig_farm_seq.nextval',
'ENTERPRISE_NAME' : wrap_str,
'TAX_NUMBER' : wrap_str,
'FAMILY_NAME' : lambda x : wrap_str(x.split()[0]),
'GIVEN_NAME' : lambda x : wrap_str(x.split()[1]),
'ENTERPRISE_REGISTRATION_NUMBER' : wrap_str,
'PREMISE_NAME' : wrap_str,
'HOUSE_ID' : wrap_str,
'POSTAL_CODE' : wrap_str,
'PHONE_NUMBER_1' : lambda x : wrap_str(get_phone_number(x, True)),
'PHONE_NUMBER_2' : lambda x : wrap_str(get_phone_number(x, False)),
'FAX_NUMBER' : lambda x : wrap_str(x.replace(' ', '')),
'BANK_IDENTIFIER' : lambda x : wrap_str(x.replace(' ', '').replace('-', '')[:3]),
'BANK_ACCOUNT_NUMBER' : lambda x : wrap_str(x.replace(' ', '').replace('-', '')),
'NUMBER_OF_EMPLOYEES' : wrap_null,
'SETTLEMENT_NUMBER' : wrap_null,
'REGISTRATION_NUMBER' : lambda x : insert_reg_number % x,
'GENDER' : wrap_str,
'ACTIVITY' : lambda x : '0',
'REG_HOLDER_ACTIVITY' : lambda x : '0',
'PROCESSED_BY_JOB' : lambda x : '0'
}
'''safe getter for the splittedline'''
def get(name, splittedline):
if name:
return splittedline[excel_fields.index(name)]
else:
return None
''' filter out wrong lines
'''
def filter_lines(splittedline):
return (len(splittedline) > 1) \
and (not ((len(splittedline[0])>0) and (splittedline[0][0]=='#' or splittedline[0][:4]=='"red'))) \
and splittedline[1].strip() \
#and (not (get('Already in IACS', splittedline).strip()=='Yes'))
''' output an insert statement from a splitted line
'''
def print_insert(splittedline):
print ''.join([insert_start, ','.join(map(lambda x : map_func[x](get(mapping[x],splittedline)), sql_fields)), insert_end])
import sys
f = open(sys.argv[1], 'r')
map(print_insert, filter(filter_lines, map(lambda x : x.split(field_delimiter), f.readlines())))