Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding=UTF-8
- '''
- Created on Aug 21, 2010
- @author: bpgergo
- convert the excel file (or csv or tab delimited txt) to SQL inserts
- '''
- '''this is the field delimiter in the input file'''
- field_delimiter = '\t'
- '''these are the fields of the Database table'''
- sql_fields = ['ID',
- 'ENTERPRISE_NAME',
- 'TAX_NUMBER',
- 'FAMILY_NAME',
- 'GIVEN_NAME',
- 'ENTERPRISE_REGISTRATION_NUMBER',
- 'PREMISE_NAME',
- 'HOUSE_ID',
- 'POSTAL_CODE',
- 'PHONE_NUMBER_1',
- 'PHONE_NUMBER_2',
- 'FAX_NUMBER',
- 'BANK_IDENTIFIER',
- 'BANK_ACCOUNT_NUMBER',
- 'NUMBER_OF_EMPLOYEES',
- 'SETTLEMENT_NUMBER',
- 'REGISTRATION_NUMBER',
- 'PROCESSED_BY_JOB',
- 'GENDER',
- 'ACTIVITY',
- 'REG_HOLDER_ACTIVITY'
- ]
- '''Excel Header: these are the field names in the Excel (that is the values in the first row)'''
- excel_fields = ['"red.Br."', \
- 'Name', #/ Naziv pravnog lica ili preduzetnika\
- 'Firstname and last name of legal representative', # / Ime i prezime osobeovlašcene za zastupanje"\
- 'TAX ID', # / PIB\
- 'Enterprise Registry number', # / Maticnibroj\
- 'Seat', # / Sedište\
- 'Street', # / Adresa\
- 'House number', # / Broj\
- 'Seat Zip code', # / Broj pošte\
- 'Municipality',# / Opština\
- 'Phone', # / brojtelefona\
- 'Fax number', # / faks\
- 'Bank name', # / banka\
- 'Bank account number', # / broj racunau banci\
- 'Number of employees', # / brojzaposlenih\
- 'BPG',
- 'Already in IACS',
- 'settlement number',
- 'gender']
- '''This is the mapping between the Excel field names and the SQL field names
- '''
- mapping = { 'ID' : None,
- 'ENTERPRISE_NAME' : 'Name',
- 'TAX_NUMBER' : 'TAX ID',
- 'FAMILY_NAME' : 'Firstname and last name of legal representative',
- 'GIVEN_NAME' : 'Firstname and last name of legal representative',
- 'ENTERPRISE_REGISTRATION_NUMBER' : 'Enterprise Registry number',
- 'PREMISE_NAME' : 'Street',
- 'HOUSE_ID' : 'House number',
- 'POSTAL_CODE' : 'Seat Zip code',
- 'PHONE_NUMBER_1' : 'Phone',
- 'PHONE_NUMBER_2' : 'Phone',
- 'FAX_NUMBER' : 'Fax number',
- 'BANK_IDENTIFIER' : 'Bank account number',
- 'BANK_ACCOUNT_NUMBER' : 'Bank account number',
- 'NUMBER_OF_EMPLOYEES' : 'Number of employees',
- 'SETTLEMENT_NUMBER' : 'settlement number',
- 'REGISTRATION_NUMBER' : 'settlement number',
- 'GENDER' : 'gender',
- 'ACTIVITY' : None,
- 'REG_HOLDER_ACTIVITY' : None,
- 'PROCESSED_BY_JOB' : None
- }
- '''insert statement constants'''
- insert_start = ''.join(['insert into MIG_FARM (', ','.join(sql_fields), ') values ('])
- insert_end = ');'
- insert_reg_number = "(select 'X' || (nvl(substr(max(registration_number), 2), 0) + 1) from mig_farm where settlement_number = %s and registration_number like 'X%%')"
- '''these functions are used for converting Ecxel field values into database field values'''
- def wrap_str(str):
- res = str.strip('" ').replace('&', "&'||'")
- if res:
- return ''.join(["'",res,"'"])
- else:
- return 'null'
- ''' input format:" 017 812 099 063 400 337"
- note that these are two telephone numbers
- first=true returns first number (017812099)
- '''
- def get_phone_number(str, first):
- tel_nums = str.strip(' "').split()
- if len(tel_nums) < 6:
- return ''.join(tel_nums)
- else:
- if first:
- return ''.join(tel_nums[:3])
- else:
- return ''.join(tel_nums[3:6])
- def wrap_null(str):
- if str:
- return str
- else:
- return 'null'
- '''these are the function mapping
- for each Database field there is a function
- which is applied on the value of the mapped Excel field
- to get the actual value that will beinserted in the DB
- '''
- map_func = { 'ID' : lambda x : 'mig_farm_seq.nextval',
- 'ENTERPRISE_NAME' : wrap_str,
- 'TAX_NUMBER' : wrap_str,
- 'FAMILY_NAME' : lambda x : wrap_str(x.split()[0]),
- 'GIVEN_NAME' : lambda x : wrap_str(x.split()[1]),
- 'ENTERPRISE_REGISTRATION_NUMBER' : wrap_str,
- 'PREMISE_NAME' : wrap_str,
- 'HOUSE_ID' : wrap_str,
- 'POSTAL_CODE' : wrap_str,
- 'PHONE_NUMBER_1' : lambda x : wrap_str(get_phone_number(x, True)),
- 'PHONE_NUMBER_2' : lambda x : wrap_str(get_phone_number(x, False)),
- 'FAX_NUMBER' : lambda x : wrap_str(x.replace(' ', '')),
- 'BANK_IDENTIFIER' : lambda x : wrap_str(x.replace(' ', '').replace('-', '')[:3]),
- 'BANK_ACCOUNT_NUMBER' : lambda x : wrap_str(x.replace(' ', '').replace('-', '')),
- 'NUMBER_OF_EMPLOYEES' : wrap_null,
- 'SETTLEMENT_NUMBER' : wrap_null,
- 'REGISTRATION_NUMBER' : lambda x : insert_reg_number % x,
- 'GENDER' : wrap_str,
- 'ACTIVITY' : lambda x : '0',
- 'REG_HOLDER_ACTIVITY' : lambda x : '0',
- 'PROCESSED_BY_JOB' : lambda x : '0'
- }
- '''safe getter for the splittedline'''
- def get(name, splittedline):
- if name:
- return splittedline[excel_fields.index(name)]
- else:
- return None
- ''' filter out wrong lines
- '''
- def filter_lines(splittedline):
- return (len(splittedline) > 1) \
- and (not ((len(splittedline[0])>0) and (splittedline[0][0]=='#' or splittedline[0][:4]=='"red'))) \
- and splittedline[1].strip() \
- #and (not (get('Already in IACS', splittedline).strip()=='Yes'))
- ''' output an insert statement from a splitted line
- '''
- def print_insert(splittedline):
- print ''.join([insert_start, ','.join(map(lambda x : map_func[x](get(mapping[x],splittedline)), sql_fields)), insert_end])
- import sys
- f = open(sys.argv[1], 'r')
- map(print_insert, filter(filter_lines, map(lambda x : x.split(field_delimiter), f.readlines())))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement