Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # download_wikipedia.sh
- # This script downloads a daily backup of the latest Wikipedia archive
- # from kiwix.org, then (on schedule) promotes backups to weekly, monthly,
- # quarterly, and yearly folders. Finally, it deletes old backups according
- # to the specified retention rules.
- #
- # Retention rules:
- # - Daily backups older than 7 days are deleted.
- # - Weekly backups older than 31 days are deleted.
- # - Monthly backups older than 93 days are deleted.
- # - Quarterly backups older than 365 days are deleted.
- set -e
- #################################
- # Directories for Backups
- #################################
- DAILY_DIR="$HOME/wikipedia_archives/daily"
- WEEKLY_DIR="$HOME/wikipedia_archives/weekly"
- MONTHLY_DIR="$HOME/wikipedia_archives/monthly"
- QUARTERLY_DIR="$HOME/wikipedia_archives/quarterly"
- YEARLY_DIR="$HOME/wikipedia_archives/yearly"
- # Ensure all backup directories exist.
- for dir in "$DAILY_DIR" "$WEEKLY_DIR" "$MONTHLY_DIR" "$QUARTERLY_DIR" "$YEARLY_DIR"; do
- mkdir -p "$dir"
- done
- #################################
- # Define the URL of the Wikipedia archive
- #################################
- URL=https://download.kiwix.org/zim/wikipedia_en_all.zim
- # Get today's date (YYYY-MM-DD) and define the output filename.
- TODAY=$(date +%F)
- OUTPUT_FILE="wikipedia_en_all_${TODAY}.zim"
- #################################
- # Step 1: Daily Download
- #################################
- cd "$DAILY_DIR" || { echo "Failed to change directory to $DAILY_DIR"; exit 1; }
- wget -O "$OUTPUT_FILE" "$URL"
- echo "$(date '+%F') - Daily download: Saved as ${OUTPUT_FILE}"
- #################################
- # Step 2: Weekly Backup (On Sundays)
- #################################
- # date +%u returns 7 on Sunday.
- if [ "$(date +%u)" -eq 7 ]; then
- OLDEST_DAILY=$(ls -1tr "$DAILY_DIR"/wikipedia_en_all_*.zim 2>/dev/null | head -n 1)
- if [ -n "$OLDEST_DAILY" ]; then
- cp "$OLDEST_DAILY" "$WEEKLY_DIR"
- echo "$(date '+%F') - Weekly backup: Copied $(basename "$OLDEST_DAILY") to weekly directory."
- else
- echo "$(date '+%F') - Weekly backup: No daily backup file found."
- fi
- fi
- #################################
- # Helper Function: Is Last Day of Month?
- #################################
- is_last_day_of_month() {
- # If tomorrow's day is 01 then today is the last day of the month.
- [ "$(date -d tomorrow +%d)" = "01" ]
- }
- #################################
- # Step 3: Monthly Backup (On Last Day of Month)
- #################################
- if is_last_day_of_month; then
- OLDEST_WEEKLY=$(ls -1tr "$WEEKLY_DIR"/wikipedia_en_all_*.zim 2>/dev/null | head -n 1)
- if [ -n "$OLDEST_WEEKLY" ]; then
- cp "$OLDEST_WEEKLY" "$MONTHLY_DIR"
- echo "$(date '+%F') - Monthly backup: Copied $(basename "$OLDEST_WEEKLY") to monthly directory."
- else
- echo "$(date '+%F') - Monthly backup: No weekly backup file found."
- fi
- fi
- #################################
- # Step 4: Quarterly Backup (On Last Day of Month for Quarter-Ending Months)
- #################################
- if is_last_day_of_month; then
- MONTH=$(date +%m)
- if [[ "$MONTH" == "03" || "$MONTH" == "06" || "$MONTH" == "09" || "$MONTH" == "12" ]]; then
- OLDEST_MONTHLY=$(ls -1tr "$MONTHLY_DIR"/wikipedia_en_all_*.zim 2>/dev/null | head -n 1)
- if [ -n "$OLDEST_MONTHLY" ]; then
- cp "$OLDEST_MONTHLY" "$QUARTERLY_DIR"
- echo "$(date '+%F') - Quarterly backup: Copied $(basename "$OLDEST_MONTHLY") to quarterly directory."
- else
- echo "$(date '+%F') - Quarterly backup: No monthly backup file found."
- fi
- fi
- fi
- #################################
- # Step 5: Yearly Backup (On December 31)
- #################################
- if [ "$(date +%m-%d)" = "12-31" ]; then
- OLDEST_QUARTERLY=$(ls -1tr "$QUARTERLY_DIR"/wikipedia_en_all_*.zim 2>/dev/null | head -n 1)
- if [ -n "$OLDEST_QUARTERLY" ]; then
- cp "$OLDEST_QUARTERLY" "$YEARLY_DIR"
- echo "$(date '+%F') - Yearly backup: Copied $(basename "$OLDEST_QUARTERLY") to yearly directory."
- else
- echo "$(date '+%F') - Yearly backup: No quarterly backup file found."
- fi
- fi
- #################################
- # Step 6: Deletion Operations (Run from Daily to Quarterly)
- #################################
- # Delete daily files older than 7 days.
- find "$DAILY_DIR" -type f -name "wikipedia_en_all_*.zim" -mtime +7 -delete
- # Delete weekly files older than 31 days.
- find "$WEEKLY_DIR" -type f -name "wikipedia_en_all_*.zim" -mtime +31 -delete
- # Delete monthly files older than 93 days.
- find "$MONTHLY_DIR" -type f -name "wikipedia_en_all_*.zim" -mtime +93 -delete
- # Delete quarterly files older than 365 days.
- find "$QUARTERLY_DIR" -type f -name "wikipedia_en_all_*.zim" -mtime +365 -delete
- echo "$(date '+%F') - All backup operations completed."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement