my PDS backup script
at main 8.2 kB view raw
1#!/bin/bash 2 3# Variables 4SOURCE_DIR="/pds" # Path to your PDS directory 5DEST_USER="" # Username on the destination machine 6DEST_IP="" # IP address of the destination machine 7DEST_BASE_DIR="" # Base destination directory on the destination machine 8SCRIPT_DIR="$(dirname "$(realpath "$0")")" # Base directory of the script 9LOG_DIR="$SCRIPT_DIR/logs/pds-backup" # Log directory for the backup logs 10DATE_LABEL=$(date +"%Y%m%d-%H%M") # Date label (e.g., "20250216-1944") 11LOG_FILE="$LOG_DIR/$DATE_LABEL.log" # Log file for backup status 12DEST_DIR="${DEST_BASE_DIR}/${DATE_LABEL}" # Destination directory with date-time label 13ARCHIVE_FILE="$SCRIPT_DIR/${DATE_LABEL}.tar.gz" # Local archive file path 14MAX_RETRIES=3 # Maximum retries for backup 15RETRY_INTERVAL=60 # Retry interval in seconds (1 minute) 16CRON_JOBS=("0 12 * * * /bin/bash $(realpath "$0")" "0 0 * * * /bin/bash $(realpath "$0")") # Cron jobs for the backup script 17 18# Helper function for error logging and exit 19fail() { 20 echo "$(date): ERROR: $1" | tee -a "$LOG_FILE" 21 # Always restart the PDS service if it fails 22 systemctl restart pds 2>/dev/null || echo "$(date): WARNING: Failed to restart PDS service after failure." >> "$LOG_FILE" 23 exit 1 24} 25 26# Ensure the log directory exists 27mkdir -p "$LOG_DIR" 28 29# Step 0: Check if the destination machine is reachable by pinging 30echo "$(date): Checking if machine at $DEST_IP is online..." >> "$LOG_FILE" 31if ! ping -c 1 "$DEST_IP" &>/dev/null; then 32 fail "Machine at $DEST_IP is unreachable. Exiting the script." 33fi 34echo "$(date): Machine at $DEST_IP is online." >> "$LOG_FILE" 35 36# Step 1: Ensure the PDS service exists and is either running or stopped 37echo "$(date): Checking PDS service status..." >> "$LOG_FILE" 38if ! systemctl list-units --full -all | grep -Fq "pds.service"; then 39 fail "PDS service not found. Exiting." 40fi 41 42# Step 2: Stop the PDS service (if applicable) 43echo "$(date): Stopping the PDS service..." >> "$LOG_FILE" 44if ! systemctl stop pds 2>/dev/null; then 45 echo "$(date): WARNING: Failed to stop the PDS service. Proceeding with backup." >> "$LOG_FILE" 46else 47 echo "$(date): Successfully stopped the PDS service." >> "$LOG_FILE" 48fi 49 50# Step 3: Create a compressed archive of the PDS directory 51echo "$(date): Creating a compressed backup archive of $SOURCE_DIR..." >> "$LOG_FILE" 52tar -czf "$ARCHIVE_FILE" -C "$SOURCE_DIR" . 2>> "$LOG_FILE" 53if [ $? -eq 0 ]; then 54 echo "$(date): Archive created successfully at $ARCHIVE_FILE." >> "$LOG_FILE" 55else 56 fail "Failed to create the backup archive. Exiting." 57fi 58 59# Step 3.1: Check for differences with the latest backup archive 60echo "$(date): Checking for differences between the new archive and the latest backup archive..." >> "$LOG_FILE" 61LATEST_BACKUP_DIR=$(ssh "$DEST_USER@$DEST_IP" "ls -dt $DEST_BASE_DIR/*/ 2>/dev/null | head -n 1") 62if [ -z "$LATEST_BACKUP_DIR" ]; then 63 echo "$(date): No previous backup directory found. First-time use detected. Skipping change detection." >> "$LOG_FILE" 64else 65 LATEST_ARCHIVE_FILE=$(ssh "$DEST_USER@$DEST_IP" "find $LATEST_BACKUP_DIR -maxdepth 1 -type f -name '*.tar.gz' 2>/dev/null | head -n 1") 66 if [ -z "$LATEST_ARCHIVE_FILE" ]; then 67 echo "$(date): No previous backup archive found in $LATEST_BACKUP_DIR. First-time use detected. Skipping change detection." >> "$LOG_FILE" 68 else 69 NEW_CHECKSUM=$(sha256sum "$ARCHIVE_FILE" | awk '{print $1}') 70 REMOTE_CHECKSUM=$(ssh "$DEST_USER@$DEST_IP" "sha256sum '$LATEST_ARCHIVE_FILE'" 2>/dev/null | awk '{print $1}') 71 echo "$(date): New archive checksum: $NEW_CHECKSUM" >> "$LOG_FILE" 72 echo "$(date): Latest backup archive checksum: $REMOTE_CHECKSUM" >> "$LOG_FILE" 73 if [ "$NEW_CHECKSUM" = "$REMOTE_CHECKSUM" ]; then 74 echo "$(date): No changes detected since the last backup. Skipping backup transfer." >> "$LOG_FILE" 75 echo "No changes detected since the last backup. Backup not performed." | tee -a "$LOG_FILE" 76 # Restart PDS service before exiting 77 systemctl start pds 2>/dev/null && echo "$(date): Successfully restarted the PDS service." >> "$LOG_FILE" 78 exit 0 79 else 80 echo "$(date): Changes detected. Proceeding with backup transfer." >> "$LOG_FILE" 81 fi 82 fi 83fi 84 85# Step 4: Ensure the destination directory exists, create if not 86echo "$(date): Ensuring destination directory $DEST_DIR exists..." >> "$LOG_FILE" 87ssh "$DEST_USER@$DEST_IP" "mkdir -p '$DEST_DIR'" 2>> "$LOG_FILE" 88if [ $? -eq 0 ]; then 89 echo "$(date): Destination directory $DEST_DIR is ready." >> "$LOG_FILE" 90else 91 fail "Failed to create destination directory $DEST_DIR. Exiting." 92fi 93 94# Step 5: Perform the transfer of the archive to the destination machine using rsync with retry mechanism 95attempt=1 96while [ $attempt -le $MAX_RETRIES ]; do 97 echo "$(date): Attempt $attempt to perform backup transfer using rsync..." >> "$LOG_FILE" 98 99 if rsync -avz --remove-source-files "$ARCHIVE_FILE" "$DEST_USER@$DEST_IP:$DEST_DIR/" 2>> "$LOG_FILE"; then 100 echo "$(date): Backup transfer completed successfully to $DEST_DIR" >> "$LOG_FILE" 101 break 102 else 103 echo "$(date): ERROR: Backup transfer failed during rsync operation. Attempt $attempt of $MAX_RETRIES." >> "$LOG_FILE" 104 if [ $attempt -lt $MAX_RETRIES ]; then 105 echo "$(date): Retrying in $RETRY_INTERVAL seconds..." >> "$LOG_FILE" 106 sleep $RETRY_INTERVAL 107 else 108 echo "$(date): ERROR: Backup failed after $MAX_RETRIES attempts." >> "$LOG_FILE" 109 fail "Backup transfer failed after $MAX_RETRIES attempts. Check logs and network connection." 110 fi 111 fi 112 ((attempt++)) 113done 114 115# Step 6: Delete the local archive after successful transfer 116echo "$(date): Deleting local archive $ARCHIVE_FILE..." >> "$LOG_FILE" 117rm -f "$ARCHIVE_FILE" 118echo "$(date): Local archive deleted successfully." >> "$LOG_FILE" 119 120# Step 7: Always restart the PDS service (if applicable) 121echo "$(date): Restarting the PDS service..." >> "$LOG_FILE" 122if ! systemctl start pds 2>/dev/null; then 123 fail "Failed to start the PDS service. Check service status and logs." 124fi 125echo "$(date): Successfully restarted the PDS service." >> "$LOG_FILE" 126 127# Step 8: Delete backup directories older than 30 days 128echo "$(date): Checking and deleting backup directories older than 30 days..." >> "$LOG_FILE" 129ssh "$DEST_USER@$DEST_IP" "find $DEST_BASE_DIR -mindepth 1 -maxdepth 1 -type d -mtime +30 -exec rm -rf {} \;" 2>> "$LOG_FILE" 130if [ $? -eq 0 ]; then 131 echo "$(date): Deleted backup directories older than 30 days successfully." >> "$LOG_FILE" 132else 133 echo "$(date): ERROR: Failed to delete old backup directories. Check logs for details." >> "$LOG_FILE" 134fi 135 136# Step 9: Log Rotation - Delete logs older than 90 days and rotate the log file 137echo "$(date): Checking the size and age of the log file..." >> "$LOG_FILE" 138 139# Delete log files older than 90 days 140find "$LOG_DIR" -type f -name "*.log" -mtime +90 -exec rm -f {} \; 2>/dev/null 141echo "$(date): Deleted log files older than 90 days." >> "$LOG_FILE" 142 143# Check if the log file is older than 30 days 144if [ $(find "$LOG_FILE" -mtime +30 -print) ]; then 145 mv "$LOG_FILE" "$LOG_FILE.old" 146 touch "$LOG_FILE" 147 echo "$(date): Log file older than 30 days, rotated. Previous log archived as $LOG_FILE.old" >> "$LOG_FILE" 148fi 149 150# Check if the log file exceeds 1000 lines (adjust size threshold if necessary) 151if [ $(wc -l < "$LOG_FILE") -gt 1000 ]; then 152 mv "$LOG_FILE" "$LOG_FILE.old" 153 touch "$LOG_FILE" 154 echo "$(date): Log file exceeded 1000 lines, rotated. Previous log archived as $LOG_FILE.old" >> "$LOG_FILE" 155fi 156 157# Step 10: Ensure only the specified cron jobs are present in crontab for this script 158# Remove all existing cron jobs related to this script 159crontab -l | grep -v "$(realpath "$0")" | crontab - 160 161# Add only the desired cron jobs 162for job in "${CRON_JOBS[@]}"; do 163 # Add the job to the crontab 164 (crontab -l; echo "$job") | crontab - 165 echo "$(date): Cron job '$job' added to crontab." >> "$LOG_FILE" 166done 167 168# Completion log 169echo "$(date): Backup and service restart completed successfully." >> "$LOG_FILE" 170exit 0