Guest User

Untitled

a guest
Aug 27th, 2018
150
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.97 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # Parameters File by Pasei Ansah
  4. # Version: 0.1 26/01/2012 PNAnsah Initial Version
  5. # 0.2 20/02/2012 PNAnsah Add parameters for a post diff script
  6. # 0.3 20/02/2012 PNAnsah Rename post POST_DIFF_DATE_FORMAT parameter
  7. #
  8. # Description: File to store execution parameters for the Pre-Processing scripts
  9. # Usage: Reference this file at the start of the script to load the functions
  10. # and all the parameters below
  11. #
  12.  
  13. die () {
  14. echo `date '+%Y-%m-%d %T ' ` "[ERROR] $@" >> $LOG_DIR/$LOG_FILE
  15. echo >&2 "[ERROR] $@"
  16. exit 1
  17. }
  18. log () {
  19. [ $MANUAL_RUN ] && echo `date '+%Y-%m-%d %T ' ` "$@"
  20. echo `date '+%Y-%m-%d %T ' ` "$@" >> $LOG_DIR/$LOG_FILE
  21. }
  22.  
  23. # For development the source files were received in .txt form. In prod this will be .gz - set in properties file?
  24. EXTENSION=.txt.gz
  25.  
  26.  
  27. # Hadoop code directory location
  28. HADOOP_BIN_DIR=/home/hadoop/hadoop/bin
  29.  
  30. # PIG code directory location
  31. PIG_BIN_DIR=/home/hadoop/pig/bin
  32.  
  33. # Hadoop File System base data directory
  34. HDFS_HADOOP_DATA_DIR=/user/hadoop
  35.  
  36. # LINUX file system base data directory
  37. FILE_BASE_DIR=/data/newco/data
  38.  
  39. # Landing folder where Siebel files are delivered and from where they will be retrieved by this script
  40. FILE_LANDING_DIR=$FILE_BASE_DIR/$SOURCE
  41.  
  42. # Define log directory
  43. LOG_DIR=/data/newco/logs
  44.  
  45. # Define the name and location for the daily log file
  46. LOG_FILE=Preprocess_Script_$(date '+%Y%m%d').log
  47.  
  48. # Create log file for the day if it does not already exist
  49. [ -f $LOG_DIR/$LOG_FILE ] || touch $LOG_DIR/$LOG_FILE
  50.  
  51. # Define length of time in days to store logs before they are purged
  52. LOG_PURGE=90
  53.  
  54. # Regex search and replace pattern to convert the data date in filename in the format of YYYYMMDD
  55. siebel_DATA_DATE_FORMAT='^\(..\)\(..\)\(..\).*/20\3\2\1'
  56. uim_DATA_DATE_FORMAT='^\(........\).*/\1'
  57.  
  58. DDF_SOURCE="${SOURCE}_DATA_DATE_FORMAT"
  59. DATA_DATE_FORMAT=${!DDF_SOURCE}
  60.  
  61. # List of possible source systems
  62. SOURCE_LIST="siebel uim"
  63.  
  64. # Check that a file containing the following text is present in each daily directory
  65. siebel_FILE_LIST="S_ADDR_PER S_ASSET S_CONTACT S_CON_ADDR S_INV_PROF S_ORG_EXT S_PROD_INT"
  66. uim_FILE_LIST="uimsubscription"
  67.  
  68. FL_SOURCE="${SOURCE}_FILE_LIST"
  69. FILE_LIST=${!FL_SOURCE}
  70.  
  71. # Join script to use for source system, if no join is required then create a blank variable
  72. siebel_JOIN_SCRIPT="Asset_Account_Join.pig"
  73. uim_JOIN_SCRIPT="UIM_Drop_Columns.pig"
  74.  
  75. JS_SOURCE="${SOURCE}_JOIN_SCRIPT"
  76. JOIN_SCRIPT=${!JS_SOURCE}
  77.  
  78. # Diff script to use for source system
  79. siebel_DIFF_SCRIPT="Asset_Account_Diff.sh"
  80. uim_DIFF_SCRIPT="UIM_Diff.sh"
  81.  
  82. DS_SOURCE="${SOURCE}_DIFF_SCRIPT"
  83. DIFF_SCRIPT=${!DS_SOURCE}
  84.  
  85. # Files to diff for each feed
  86. siebel_DIFF_FILES="BillPayer Owner User"
  87. uim_DIFF_FILES="uimsubsfiltered"
  88.  
  89. DF_SOURCE="${SOURCE}_DIFF_FILES"
  90. DIFF_FILE_LIST=${!DF_SOURCE}
  91.  
  92. # Set in file delimeter
  93. siebel_DELIMITER='\u00C7'
  94. uim_DELIMITER='\u00E7'
  95.  
  96. D_SOURCE="${SOURCE}_DELIMITER"
  97. DELIMITER=${!D_SOURCE}
  98.  
  99. # Set the name of output file to be ingested by ID.
  100. # The ? character will be replaced by the date.
  101. # The # character will be replaced by a suffix if specified
  102. siebel_OUTPUT_FILENAME="siebelsubscriber_?%"
  103. uim_OUTPUT_FILENAME="uimsubscriptions_?"
  104.  
  105. OF_SOURCE="${SOURCE}_OUTPUT_FILENAME"
  106. OUTPUT_FILENAME=${!OF_SOURCE}
  107.  
  108. # Define date format to be used in the Post Diff Script
  109. uim_POST_DIFF_DATE_FORMAT='\(....\)\(..\)\(..\).*|\2\/\3\/\1'
  110.  
  111. PDDF_SOURCE="${SOURCE}_POST_DIFF_DATE_FORMAT"
  112. POST_DIFF_DATE_FORMAT=${!PDDF_SOURCE}
  113.  
  114. # FTP Details for sending files to ID
  115. HOST=localhost
  116. USER=hadoop
  117. PASS=hadoop
  118.  
  119.  
  120. # Set ID Landing Directory
  121. siebel_ID_LANDING_DIR='/data/working/landing/siebel/deltas'
  122. uim_ID_LANDING_DIR='/data/working/landing/UIM/subscription_deltas'
  123.  
  124. IDLD_SOURCE="${SOURCE}_ID_LANDING_DIR"
  125. ID_LANDING_DIR=${!IDLD_SOURCE}
  126.  
  127. # Number of parallel Processed to be used for pig jobs
  128. PARALLEL_PROCS=6
  129.  
  130. # Post diff script(s)
  131. uim_POST_DIFF_SCRIPT="UIM_Add_Date.sh"
  132.  
  133. PDS_SOURCE="${SOURCE}_POST_DIFF_SCRIPT"
  134. POST_DIFF_SCRIPT=${!PDS_SOURCE}
Add Comment
Please, Sign In to add comment