Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- PYTHON_PATH=$(which python)
- PSQL_PATH=$(which psql)
- MYSQL_SCRIPT='mysql2file.py'
- MYSQL_SERVER=
- MYSQL_PORT=3306
- MYSQL_DATABASE=
- MYSQL_USER=
- MYSQL_PASSWORD=
- TODAY=$(date '+%Y%m%d')
- DUMP_FILENAME="errors_${TODAY}.csv"
- DUMP_FILEPREFIX="errors_${TODAY}-"
- DUMP_SPLITFACTOR=6 # 3 XL nodes
- DUMP_S3BUCKET=archivedlogs
- REDSHIFT_ENDPOINT=fqdn
- REDSHIFT_USER=user
- REDSHIFT_DATABASE=reports
- REDSDHIFT_PORT=1234
- REDSHIFT_TABLE=errors
- AWS_ACCESS_KEY_ID=asdf
- AWS_SECRET_ACCESS=xcvb
- MAINTENANCE_DAY=6 # Perform VACUUM and ANALYZE on Saturday
- LOG_PATH="/home/${USER}/redshift_import-${TODAY}.log"
- MYSQL_MINRANGE=$(date --date='2 days ago' '+%Y-%m-%d')
- MYSQL_MAXRANGE=$(date --date='1 day ago' '+%Y-%m-%d')
- MYSQL_COMMAND="
- SELECT
- columns
- FROM errors WHERE
- WHERE time BETWEEN '${MYSQL_MINRANGE}' AND '${MYSQL_MAXRANGE}';
- "
- # Dump MySQL resultset to file
- echo "Starting MySQL dump to ${DUMP_FILENAME}"
- ${PYTHON_PATH} ${MYSQL_SCRIPT} ${MYSQL_SERVER} ${MYSQL_PORT} ${MYSQL_USER} ${MYSQL_PASSWORD} ${MYSQL_DATABASE} ${DUMP_FILENAME} ${MYSQL_COMMAND}
- # Split by end of line, not by file size
- echo "Splitting dump into ${DUMP_SPLITFACTOR} files"
- split -n l/${DUMP_SPLITFACTOR} ${DUMP_FILENAME} ${DUMP_FILEPREFIX}
- # Compress in preparation for Redshift COPY
- echo "Compressing MySQL dump files (gzip)"
- gzip ${DUMP_FILEPREFIX}*
- # Upload import files to S3
- echo "Uploading dump files to S3 bucket ${DUMP_S3BUCKET}"
- s3cmd put --reduced-redundancy ${DUMP_FILEPREFIX}* s3://${DUMP_S3BUCKET}/
- # Issue COPY command to Redshift cluster
- REDSHIFT_COMMAND="
- COPY ${REDSHIFT_TABLE} FROM 's3://${DUMP_S3BUCKET}/${DUMP_FILEPREFIX}'
- CREDENTIALS 'aws_access_key_id=${AWS_ACCESS_KEY_ID};aws_secret_access_key=${AWS_SECRET_ACCESS}'
- GZIP
- CSV
- DELIMITER ','
- NULL AS '\N';
- "
- REDSHIFT_QUERY="${PSQL_PATH} -h ${REDSHIFT_ENDPOINT} -U ${REDSHIFT_USER} -d ${REDSHIFT_DATABASE} -p ${REDSHIFT_PORT}"
- echo "Loading dump files from S3 into Redshift table ${REDSHIFT_TABLE}"
- ${REDSHIFT_QUERY} -f ${REDSHIFT_COMMAND} > ${LOG_PATH} 2>&1
- if [ -f ${LOG_PATH} ] ; then
- echo -e "Import failed when loading to Redshift\n"
- cat ${LOG_PATH}
- s3cmd put --reduced-redundancy ${LOG_PATH}* s3://${DUMP_S3BUCKET}/
- exit 1
- fi
- # VACUUM and ANALYZE if we're in a maintenance window
- if [ ${MAINTENANCE_DAY} -eq $(date '+%u') ] ; then
- REDSHIFT_COMMAND="VACUUM SORT ONLY ${REDSHIFT_TABLE};"
- echo "Vacuuming (resorting) data"
- ${REDSHIFT_QUERY} -f ${REDSHIFT_COMMAND} > ${LOG_PATH} 2>&1
- REDSHIFT_COMMAND="ANALYZE ${REDSHIFT_TABLE};"
- echo "Analzying (regenerating statistics) of table"
- ${REDSHIFT_QUERY} -f ${REDSHIFT_COMMAND} > ${LOG_PATH} 2>&1
- fi
- echo "Import complete!"
Add Comment
Please, Sign In to add comment