my PDS backup script
1#!/bin/bash
2
3# Variables
4SOURCE_DIR="/pds" # Path to your PDS directory
5DEST_USER="" # Username on the destination machine
6DEST_IP="" # IP address of the destination machine
7DEST_BASE_DIR="" # Base destination directory on the destination machine
8SCRIPT_DIR="$(dirname "$(realpath "$0")")" # Base directory of the script
9LOG_DIR="$SCRIPT_DIR/logs/pds-backup" # Log directory for the backup logs
10DATE_LABEL=$(date +"%Y%m%d-%H%M") # Date label (e.g., "20250216-1944")
11LOG_FILE="$LOG_DIR/$DATE_LABEL.log" # Log file for backup status
12DEST_DIR="${DEST_BASE_DIR}/${DATE_LABEL}" # Destination directory with date-time label
13ARCHIVE_FILE="$SCRIPT_DIR/${DATE_LABEL}.tar.gz" # Local archive file path
14MAX_RETRIES=3 # Maximum retries for backup
15RETRY_INTERVAL=60 # Retry interval in seconds (1 minute)
16CRON_JOBS=("0 12 * * * /bin/bash $(realpath "$0")" "0 0 * * * /bin/bash $(realpath "$0")") # Cron jobs for the backup script
17
18# Helper function for error logging and exit
19fail() {
20 echo "$(date): ERROR: $1" | tee -a "$LOG_FILE"
21 # Always restart the PDS service if it fails
22 systemctl restart pds 2>/dev/null || echo "$(date): WARNING: Failed to restart PDS service after failure." >> "$LOG_FILE"
23 exit 1
24}
25
26# Ensure the log directory exists
27mkdir -p "$LOG_DIR"
28
29# Step 0: Check if the destination machine is reachable by pinging
30echo "$(date): Checking if machine at $DEST_IP is online..." >> "$LOG_FILE"
31if ! ping -c 1 "$DEST_IP" &>/dev/null; then
32 fail "Machine at $DEST_IP is unreachable. Exiting the script."
33fi
34echo "$(date): Machine at $DEST_IP is online." >> "$LOG_FILE"
35
36# Step 1: Ensure the PDS service exists and is either running or stopped
37echo "$(date): Checking PDS service status..." >> "$LOG_FILE"
38if ! systemctl list-units --full -all | grep -Fq "pds.service"; then
39 fail "PDS service not found. Exiting."
40fi
41
42# Step 2: Stop the PDS service (if applicable)
43echo "$(date): Stopping the PDS service..." >> "$LOG_FILE"
44if ! systemctl stop pds 2>/dev/null; then
45 echo "$(date): WARNING: Failed to stop the PDS service. Proceeding with backup." >> "$LOG_FILE"
46else
47 echo "$(date): Successfully stopped the PDS service." >> "$LOG_FILE"
48fi
49
50# Step 3: Create a compressed archive of the PDS directory
51echo "$(date): Creating a compressed backup archive of $SOURCE_DIR..." >> "$LOG_FILE"
52tar -czf "$ARCHIVE_FILE" -C "$SOURCE_DIR" . 2>> "$LOG_FILE"
53if [ $? -eq 0 ]; then
54 echo "$(date): Archive created successfully at $ARCHIVE_FILE." >> "$LOG_FILE"
55else
56 fail "Failed to create the backup archive. Exiting."
57fi
58
59# Step 3.1: Check for differences with the latest backup archive
60echo "$(date): Checking for differences between the new archive and the latest backup archive..." >> "$LOG_FILE"
61LATEST_BACKUP_DIR=$(ssh "$DEST_USER@$DEST_IP" "ls -dt $DEST_BASE_DIR/*/ 2>/dev/null | head -n 1")
62if [ -z "$LATEST_BACKUP_DIR" ]; then
63 echo "$(date): No previous backup directory found. First-time use detected. Skipping change detection." >> "$LOG_FILE"
64else
65 LATEST_ARCHIVE_FILE=$(ssh "$DEST_USER@$DEST_IP" "find $LATEST_BACKUP_DIR -maxdepth 1 -type f -name '*.tar.gz' 2>/dev/null | head -n 1")
66 if [ -z "$LATEST_ARCHIVE_FILE" ]; then
67 echo "$(date): No previous backup archive found in $LATEST_BACKUP_DIR. First-time use detected. Skipping change detection." >> "$LOG_FILE"
68 else
69 NEW_CHECKSUM=$(sha256sum "$ARCHIVE_FILE" | awk '{print $1}')
70 REMOTE_CHECKSUM=$(ssh "$DEST_USER@$DEST_IP" "sha256sum '$LATEST_ARCHIVE_FILE'" 2>/dev/null | awk '{print $1}')
71 echo "$(date): New archive checksum: $NEW_CHECKSUM" >> "$LOG_FILE"
72 echo "$(date): Latest backup archive checksum: $REMOTE_CHECKSUM" >> "$LOG_FILE"
73 if [ "$NEW_CHECKSUM" = "$REMOTE_CHECKSUM" ]; then
74 echo "$(date): No changes detected since the last backup. Skipping backup transfer." >> "$LOG_FILE"
75 echo "No changes detected since the last backup. Backup not performed." | tee -a "$LOG_FILE"
76 # Restart PDS service before exiting
77 systemctl start pds 2>/dev/null && echo "$(date): Successfully restarted the PDS service." >> "$LOG_FILE"
78 exit 0
79 else
80 echo "$(date): Changes detected. Proceeding with backup transfer." >> "$LOG_FILE"
81 fi
82 fi
83fi
84
85# Step 4: Ensure the destination directory exists, create if not
86echo "$(date): Ensuring destination directory $DEST_DIR exists..." >> "$LOG_FILE"
87ssh "$DEST_USER@$DEST_IP" "mkdir -p '$DEST_DIR'" 2>> "$LOG_FILE"
88if [ $? -eq 0 ]; then
89 echo "$(date): Destination directory $DEST_DIR is ready." >> "$LOG_FILE"
90else
91 fail "Failed to create destination directory $DEST_DIR. Exiting."
92fi
93
94# Step 5: Perform the transfer of the archive to the destination machine using rsync with retry mechanism
95attempt=1
96while [ $attempt -le $MAX_RETRIES ]; do
97 echo "$(date): Attempt $attempt to perform backup transfer using rsync..." >> "$LOG_FILE"
98
99 if rsync -avz --remove-source-files "$ARCHIVE_FILE" "$DEST_USER@$DEST_IP:$DEST_DIR/" 2>> "$LOG_FILE"; then
100 echo "$(date): Backup transfer completed successfully to $DEST_DIR" >> "$LOG_FILE"
101 break
102 else
103 echo "$(date): ERROR: Backup transfer failed during rsync operation. Attempt $attempt of $MAX_RETRIES." >> "$LOG_FILE"
104 if [ $attempt -lt $MAX_RETRIES ]; then
105 echo "$(date): Retrying in $RETRY_INTERVAL seconds..." >> "$LOG_FILE"
106 sleep $RETRY_INTERVAL
107 else
108 echo "$(date): ERROR: Backup failed after $MAX_RETRIES attempts." >> "$LOG_FILE"
109 fail "Backup transfer failed after $MAX_RETRIES attempts. Check logs and network connection."
110 fi
111 fi
112 ((attempt++))
113done
114
115# Step 6: Delete the local archive after successful transfer
116echo "$(date): Deleting local archive $ARCHIVE_FILE..." >> "$LOG_FILE"
117rm -f "$ARCHIVE_FILE"
118echo "$(date): Local archive deleted successfully." >> "$LOG_FILE"
119
120# Step 7: Always restart the PDS service (if applicable)
121echo "$(date): Restarting the PDS service..." >> "$LOG_FILE"
122if ! systemctl start pds 2>/dev/null; then
123 fail "Failed to start the PDS service. Check service status and logs."
124fi
125echo "$(date): Successfully restarted the PDS service." >> "$LOG_FILE"
126
127# Step 8: Delete backup directories older than 30 days
128echo "$(date): Checking and deleting backup directories older than 30 days..." >> "$LOG_FILE"
129ssh "$DEST_USER@$DEST_IP" "find $DEST_BASE_DIR -mindepth 1 -maxdepth 1 -type d -mtime +30 -exec rm -rf {} \;" 2>> "$LOG_FILE"
130if [ $? -eq 0 ]; then
131 echo "$(date): Deleted backup directories older than 30 days successfully." >> "$LOG_FILE"
132else
133 echo "$(date): ERROR: Failed to delete old backup directories. Check logs for details." >> "$LOG_FILE"
134fi
135
136# Step 9: Log Rotation - Delete logs older than 90 days and rotate the log file
137echo "$(date): Checking the size and age of the log file..." >> "$LOG_FILE"
138
139# Delete log files older than 90 days
140find "$LOG_DIR" -type f -name "*.log" -mtime +90 -exec rm -f {} \; 2>/dev/null
141echo "$(date): Deleted log files older than 90 days." >> "$LOG_FILE"
142
143# Check if the log file is older than 30 days
144if [ $(find "$LOG_FILE" -mtime +30 -print) ]; then
145 mv "$LOG_FILE" "$LOG_FILE.old"
146 touch "$LOG_FILE"
147 echo "$(date): Log file older than 30 days, rotated. Previous log archived as $LOG_FILE.old" >> "$LOG_FILE"
148fi
149
150# Check if the log file exceeds 1000 lines (adjust size threshold if necessary)
151if [ $(wc -l < "$LOG_FILE") -gt 1000 ]; then
152 mv "$LOG_FILE" "$LOG_FILE.old"
153 touch "$LOG_FILE"
154 echo "$(date): Log file exceeded 1000 lines, rotated. Previous log archived as $LOG_FILE.old" >> "$LOG_FILE"
155fi
156
157# Step 10: Ensure only the specified cron jobs are present in crontab for this script
158# Remove all existing cron jobs related to this script
159crontab -l | grep -v "$(realpath "$0")" | crontab -
160
161# Add only the desired cron jobs
162for job in "${CRON_JOBS[@]}"; do
163 # Add the job to the crontab
164 (crontab -l; echo "$job") | crontab -
165 echo "$(date): Cron job '$job' added to crontab." >> "$LOG_FILE"
166done
167
168# Completion log
169echo "$(date): Backup and service restart completed successfully." >> "$LOG_FILE"
170exit 0