Advertisement
Guest User

Untitled

a guest
Feb 4th, 2025
692
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 4.80 KB | Software | 0 0
  1. #!/bin/bash
  2. # download_wikipedia.sh
  3. # This script downloads a daily backup of the latest Wikipedia archive
  4. # from kiwix.org, then (on schedule) promotes backups to weekly, monthly,
  5. # quarterly, and yearly folders. Finally, it deletes old backups according
  6. # to the specified retention rules.
  7. #
  8. # Retention rules:
  9. # - Daily backups older than 7 days are deleted.
  10. # - Weekly backups older than 31 days are deleted.
  11. # - Monthly backups older than 93 days are deleted.
  12. # - Quarterly backups older than 365 days are deleted.
  13.  
  14. set -e
  15.  
  16. #################################
  17. # Directories for Backups
  18. #################################
  19. DAILY_DIR="$HOME/wikipedia_archives/daily"
  20. WEEKLY_DIR="$HOME/wikipedia_archives/weekly"
  21. MONTHLY_DIR="$HOME/wikipedia_archives/monthly"
  22. QUARTERLY_DIR="$HOME/wikipedia_archives/quarterly"
  23. YEARLY_DIR="$HOME/wikipedia_archives/yearly"
  24.  
  25. # Ensure all backup directories exist.
  26. for dir in "$DAILY_DIR" "$WEEKLY_DIR" "$MONTHLY_DIR" "$QUARTERLY_DIR" "$YEARLY_DIR"; do
  27.     mkdir -p "$dir"
  28. done
  29.  
  30. #################################
  31. # Define the URL of the Wikipedia archive
  32. #################################
  33. URL=https://download.kiwix.org/zim/wikipedia_en_all.zim
  34.  
  35. # Get today's date (YYYY-MM-DD) and define the output filename.
  36. TODAY=$(date +%F)
  37. OUTPUT_FILE="wikipedia_en_all_${TODAY}.zim"
  38.  
  39. #################################
  40. # Step 1: Daily Download
  41. #################################
  42. cd "$DAILY_DIR" || { echo "Failed to change directory to $DAILY_DIR"; exit 1; }
  43. wget -O "$OUTPUT_FILE" "$URL"
  44. echo "$(date '+%F') - Daily download: Saved as ${OUTPUT_FILE}"
  45.  
  46. #################################
  47. # Step 2: Weekly Backup (On Sundays)
  48. #################################
  49. # date +%u returns 7 on Sunday.
  50. if [ "$(date +%u)" -eq 7 ]; then
  51.     OLDEST_DAILY=$(ls -1tr "$DAILY_DIR"/wikipedia_en_all_*.zim 2>/dev/null | head -n 1)
  52.     if [ -n "$OLDEST_DAILY" ]; then
  53.         cp "$OLDEST_DAILY" "$WEEKLY_DIR"
  54.         echo "$(date '+%F') - Weekly backup: Copied $(basename "$OLDEST_DAILY") to weekly directory."
  55.     else
  56.         echo "$(date '+%F') - Weekly backup: No daily backup file found."
  57.     fi
  58. fi
  59.  
  60. #################################
  61. # Helper Function: Is Last Day of Month?
  62. #################################
  63. is_last_day_of_month() {
  64.     # If tomorrow's day is 01 then today is the last day of the month.
  65.     [ "$(date -d tomorrow +%d)" = "01" ]
  66. }
  67.  
  68. #################################
  69. # Step 3: Monthly Backup (On Last Day of Month)
  70. #################################
  71. if is_last_day_of_month; then
  72.     OLDEST_WEEKLY=$(ls -1tr "$WEEKLY_DIR"/wikipedia_en_all_*.zim 2>/dev/null | head -n 1)
  73.     if [ -n "$OLDEST_WEEKLY" ]; then
  74.         cp "$OLDEST_WEEKLY" "$MONTHLY_DIR"
  75.         echo "$(date '+%F') - Monthly backup: Copied $(basename "$OLDEST_WEEKLY") to monthly directory."
  76.     else
  77.         echo "$(date '+%F') - Monthly backup: No weekly backup file found."
  78.     fi
  79. fi
  80.  
  81. #################################
  82. # Step 4: Quarterly Backup (On Last Day of Month for Quarter-Ending Months)
  83. #################################
  84. if is_last_day_of_month; then
  85.     MONTH=$(date +%m)
  86.     if [[ "$MONTH" == "03" || "$MONTH" == "06" || "$MONTH" == "09" || "$MONTH" == "12" ]]; then
  87.         OLDEST_MONTHLY=$(ls -1tr "$MONTHLY_DIR"/wikipedia_en_all_*.zim 2>/dev/null | head -n 1)
  88.         if [ -n "$OLDEST_MONTHLY" ]; then
  89.             cp "$OLDEST_MONTHLY" "$QUARTERLY_DIR"
  90.             echo "$(date '+%F') - Quarterly backup: Copied $(basename "$OLDEST_MONTHLY") to quarterly directory."
  91.         else
  92.             echo "$(date '+%F') - Quarterly backup: No monthly backup file found."
  93.         fi
  94.     fi
  95. fi
  96.  
  97. #################################
  98. # Step 5: Yearly Backup (On December 31)
  99. #################################
  100. if [ "$(date +%m-%d)" = "12-31" ]; then
  101.     OLDEST_QUARTERLY=$(ls -1tr "$QUARTERLY_DIR"/wikipedia_en_all_*.zim 2>/dev/null | head -n 1)
  102.     if [ -n "$OLDEST_QUARTERLY" ]; then
  103.         cp "$OLDEST_QUARTERLY" "$YEARLY_DIR"
  104.         echo "$(date '+%F') - Yearly backup: Copied $(basename "$OLDEST_QUARTERLY") to yearly directory."
  105.     else
  106.         echo "$(date '+%F') - Yearly backup: No quarterly backup file found."
  107.     fi
  108. fi
  109.  
  110. #################################
  111. # Step 6: Deletion Operations (Run from Daily to Quarterly)
  112. #################################
  113. # Delete daily files older than 7 days.
  114. find "$DAILY_DIR" -type f -name "wikipedia_en_all_*.zim" -mtime +7 -delete
  115. # Delete weekly files older than 31 days.
  116. find "$WEEKLY_DIR" -type f -name "wikipedia_en_all_*.zim" -mtime +31 -delete
  117. # Delete monthly files older than 93 days.
  118. find "$MONTHLY_DIR" -type f -name "wikipedia_en_all_*.zim" -mtime +93 -delete
  119. # Delete quarterly files older than 365 days.
  120. find "$QUARTERLY_DIR" -type f -name "wikipedia_en_all_*.zim" -mtime +365 -delete
  121.  
  122. echo "$(date '+%F') - All backup operations completed."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement