Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # Parameters File by Pasei Ansah
- # Version: 0.1 26/01/2012 PNAnsah Initial Version
- # 0.2 20/02/2012 PNAnsah Add parameters for a post diff script
- # 0.3 20/02/2012 PNAnsah Rename post POST_DIFF_DATE_FORMAT parameter
- #
- # Description: File to store execution parameters for the Pre-Processing scripts
- # Usage: Reference this file at the start of the script to load the functions
- # and all the parameters below
- #
- die () {
- echo `date '+%Y-%m-%d %T ' ` "[ERROR] $@" >> $LOG_DIR/$LOG_FILE
- echo >&2 "[ERROR] $@"
- exit 1
- }
- log () {
- [ $MANUAL_RUN ] && echo `date '+%Y-%m-%d %T ' ` "$@"
- echo `date '+%Y-%m-%d %T ' ` "$@" >> $LOG_DIR/$LOG_FILE
- }
- # For development the source files were received in .txt form. In prod this will be .gz - set in properties file?
- EXTENSION=.txt.gz
- # Hadoop code directory location
- HADOOP_BIN_DIR=/home/hadoop/hadoop/bin
- # PIG code directory location
- PIG_BIN_DIR=/home/hadoop/pig/bin
- # Hadoop File System base data directory
- HDFS_HADOOP_DATA_DIR=/user/hadoop
- # LINUX file system base data directory
- FILE_BASE_DIR=/data/newco/data
- # Landing folder where Siebel files are delivered and from where they will be retrieved by this script
- FILE_LANDING_DIR=$FILE_BASE_DIR/$SOURCE
- # Define log directory
- LOG_DIR=/data/newco/logs
- # Define the name and location for the daily log file
- LOG_FILE=Preprocess_Script_$(date '+%Y%m%d').log
- # Create log file for the day if it does not already exist
- [ -f $LOG_DIR/$LOG_FILE ] || touch $LOG_DIR/$LOG_FILE
- # Define length of time in days to store logs before they are purged
- LOG_PURGE=90
- # Regex search and replace pattern to convert the data date in filename in the format of YYYYMMDD
- siebel_DATA_DATE_FORMAT='^\(..\)\(..\)\(..\).*/20\3\2\1'
- uim_DATA_DATE_FORMAT='^\(........\).*/\1'
- DDF_SOURCE="${SOURCE}_DATA_DATE_FORMAT"
- DATA_DATE_FORMAT=${!DDF_SOURCE}
- # List of possible source systems
- SOURCE_LIST="siebel uim"
- # Check that a file containing the following text is present in each daily directory
- siebel_FILE_LIST="S_ADDR_PER S_ASSET S_CONTACT S_CON_ADDR S_INV_PROF S_ORG_EXT S_PROD_INT"
- uim_FILE_LIST="uimsubscription"
- FL_SOURCE="${SOURCE}_FILE_LIST"
- FILE_LIST=${!FL_SOURCE}
- # Join script to use for source system, if no join is required then create a blank variable
- siebel_JOIN_SCRIPT="Asset_Account_Join.pig"
- uim_JOIN_SCRIPT="UIM_Drop_Columns.pig"
- JS_SOURCE="${SOURCE}_JOIN_SCRIPT"
- JOIN_SCRIPT=${!JS_SOURCE}
- # Diff script to use for source system
- siebel_DIFF_SCRIPT="Asset_Account_Diff.sh"
- uim_DIFF_SCRIPT="UIM_Diff.sh"
- DS_SOURCE="${SOURCE}_DIFF_SCRIPT"
- DIFF_SCRIPT=${!DS_SOURCE}
- # Files to diff for each feed
- siebel_DIFF_FILES="BillPayer Owner User"
- uim_DIFF_FILES="uimsubsfiltered"
- DF_SOURCE="${SOURCE}_DIFF_FILES"
- DIFF_FILE_LIST=${!DF_SOURCE}
- # Set in file delimeter
- siebel_DELIMITER='\u00C7'
- uim_DELIMITER='\u00E7'
- D_SOURCE="${SOURCE}_DELIMITER"
- DELIMITER=${!D_SOURCE}
- # Set the name of output file to be ingested by ID.
- # The ? character will be replaced by the date.
- # The # character will be replaced by a suffix if specified
- siebel_OUTPUT_FILENAME="siebelsubscriber_?%"
- uim_OUTPUT_FILENAME="uimsubscriptions_?"
- OF_SOURCE="${SOURCE}_OUTPUT_FILENAME"
- OUTPUT_FILENAME=${!OF_SOURCE}
- # Define date format to be used in the Post Diff Script
- uim_POST_DIFF_DATE_FORMAT='\(....\)\(..\)\(..\).*|\2\/\3\/\1'
- PDDF_SOURCE="${SOURCE}_POST_DIFF_DATE_FORMAT"
- POST_DIFF_DATE_FORMAT=${!PDDF_SOURCE}
- # FTP Details for sending files to ID
- HOST=localhost
- USER=hadoop
- PASS=hadoop
- # Set ID Landing Directory
- siebel_ID_LANDING_DIR='/data/working/landing/siebel/deltas'
- uim_ID_LANDING_DIR='/data/working/landing/UIM/subscription_deltas'
- IDLD_SOURCE="${SOURCE}_ID_LANDING_DIR"
- ID_LANDING_DIR=${!IDLD_SOURCE}
- # Number of parallel Processed to be used for pig jobs
- PARALLEL_PROCS=6
- # Post diff script(s)
- uim_POST_DIFF_SCRIPT="UIM_Add_Date.sh"
- PDS_SOURCE="${SOURCE}_POST_DIFF_SCRIPT"
- POST_DIFF_SCRIPT=${!PDS_SOURCE}
Add Comment
Please, Sign In to add comment