Guest User

Untitled

a guest
Aug 24th, 2016
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.58 KB | None | 0 0
  1. PYTHON_PATH=$(which python)
  2. PSQL_PATH=$(which psql)
  3.  
  4. MYSQL_SCRIPT='mysql2file.py'
  5. MYSQL_SERVER=
  6. MYSQL_PORT=3306
  7. MYSQL_DATABASE=
  8. MYSQL_USER=
  9. MYSQL_PASSWORD=
  10.  
  11. TODAY=$(date '+%Y%m%d')
  12. DUMP_FILENAME="errors_${TODAY}.csv"
  13. DUMP_FILEPREFIX="errors_${TODAY}-"
  14. DUMP_SPLITFACTOR=6 # 3 XL nodes
  15. DUMP_S3BUCKET=archivedlogs
  16.  
  17. REDSHIFT_ENDPOINT=fqdn
  18. REDSHIFT_USER=user
  19. REDSHIFT_DATABASE=reports
  20. REDSDHIFT_PORT=1234
  21. REDSHIFT_TABLE=errors
  22.  
  23. AWS_ACCESS_KEY_ID=asdf
  24. AWS_SECRET_ACCESS=xcvb
  25.  
  26. MAINTENANCE_DAY=6 # Perform VACUUM and ANALYZE on Saturday
  27.  
  28. LOG_PATH="/home/${USER}/redshift_import-${TODAY}.log"
  29.  
  30. MYSQL_MINRANGE=$(date --date='2 days ago' '+%Y-%m-%d')
  31. MYSQL_MAXRANGE=$(date --date='1 day ago' '+%Y-%m-%d')
  32. MYSQL_COMMAND="
  33. SELECT
  34. columns
  35. FROM errors WHERE
  36. WHERE time BETWEEN '${MYSQL_MINRANGE}' AND '${MYSQL_MAXRANGE}';
  37. "
  38.  
  39. # Dump MySQL resultset to file
  40. echo "Starting MySQL dump to ${DUMP_FILENAME}"
  41. ${PYTHON_PATH} ${MYSQL_SCRIPT} ${MYSQL_SERVER} ${MYSQL_PORT} ${MYSQL_USER} ${MYSQL_PASSWORD} ${MYSQL_DATABASE} ${DUMP_FILENAME} ${MYSQL_COMMAND}
  42.  
  43. # Split by end of line, not by file size
  44. echo "Splitting dump into ${DUMP_SPLITFACTOR} files"
  45. split -n l/${DUMP_SPLITFACTOR} ${DUMP_FILENAME} ${DUMP_FILEPREFIX}
  46. # Compress in preparation for Redshift COPY
  47. echo "Compressing MySQL dump files (gzip)"
  48. gzip ${DUMP_FILEPREFIX}*
  49.  
  50. # Upload import files to S3
  51. echo "Uploading dump files to S3 bucket ${DUMP_S3BUCKET}"
  52. s3cmd put --reduced-redundancy ${DUMP_FILEPREFIX}* s3://${DUMP_S3BUCKET}/
  53.  
  54. # Issue COPY command to Redshift cluster
  55. REDSHIFT_COMMAND="
  56. COPY ${REDSHIFT_TABLE} FROM 's3://${DUMP_S3BUCKET}/${DUMP_FILEPREFIX}'
  57. CREDENTIALS 'aws_access_key_id=${AWS_ACCESS_KEY_ID};aws_secret_access_key=${AWS_SECRET_ACCESS}'
  58. GZIP
  59. CSV
  60. DELIMITER ','
  61. NULL AS '\N';
  62. "
  63.  
  64. REDSHIFT_QUERY="${PSQL_PATH} -h ${REDSHIFT_ENDPOINT} -U ${REDSHIFT_USER} -d ${REDSHIFT_DATABASE} -p ${REDSHIFT_PORT}"
  65.  
  66. echo "Loading dump files from S3 into Redshift table ${REDSHIFT_TABLE}"
  67. ${REDSHIFT_QUERY} -f ${REDSHIFT_COMMAND} > ${LOG_PATH} 2>&1
  68.  
  69. if [ -f ${LOG_PATH} ] ; then
  70. echo -e "Import failed when loading to Redshift\n"
  71. cat ${LOG_PATH}
  72. s3cmd put --reduced-redundancy ${LOG_PATH}* s3://${DUMP_S3BUCKET}/
  73. exit 1
  74. fi
  75.  
  76. # VACUUM and ANALYZE if we're in a maintenance window
  77. if [ ${MAINTENANCE_DAY} -eq $(date '+%u') ] ; then
  78. REDSHIFT_COMMAND="VACUUM SORT ONLY ${REDSHIFT_TABLE};"
  79. echo "Vacuuming (resorting) data"
  80. ${REDSHIFT_QUERY} -f ${REDSHIFT_COMMAND} > ${LOG_PATH} 2>&1
  81.  
  82. REDSHIFT_COMMAND="ANALYZE ${REDSHIFT_TABLE};"
  83. echo "Analzying (regenerating statistics) of table"
  84. ${REDSHIFT_QUERY} -f ${REDSHIFT_COMMAND} > ${LOG_PATH} 2>&1
  85. fi
  86.  
  87. echo "Import complete!"
Add Comment
Please, Sign In to add comment