#!/bin/bash # Comprehensive test script for Telegraf/TimescaleDB metrics setup # This script validates the entire metrics pipeline set -e echo "=========================================" echo "Telegraf/TimescaleDB Metrics Test Suite" echo "=========================================" echo "" # Check if Docker is running if ! docker info > /dev/null 2>&1; then echo "❌ Docker is not running. Please start Docker first." exit 1 fi # Function to wait for a service to be healthy wait_for_service() { local service=$1 local max_attempts=30 local attempt=1 echo -n "Waiting for $service to be healthy" while [ $attempt -le $max_attempts ]; do if docker-compose ps $service | grep -q "healthy"; then echo " ✅" return 0 fi echo -n "." sleep 2 attempt=$((attempt + 1)) done echo " ❌" echo "Service $service failed to become healthy after $max_attempts attempts" return 1 } # Function to run SQL query run_query() { docker exec -i timescaledb psql -U postgres -d metrics -t -c "$1" 2>/dev/null } # Function to check table exists check_table() { local table=$1 local result=$(run_query "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = '$table');") if [[ "$result" =~ "t" ]]; then echo "✅ Table '$table' exists" return 0 else echo "❌ Table '$table' does not exist" return 1 fi } # Navigate to the metrics-stack directory (create if needed) if [ ! -d "metrics-stack" ]; then echo "Creating metrics-stack directory..." mkdir -p metrics-stack/telegraf mkdir -p metrics-stack/test-scripts mkdir -p metrics-stack/init-scripts fi cd metrics-stack # Create .env file if it doesn't exist if [ ! -f ".env" ]; then echo "Creating .env file..." cat > .env << 'EOF' # PostgreSQL/TimescaleDB Configuration POSTGRES_DB=metrics POSTGRES_USER=postgres POSTGRES_PASSWORD=secretpassword # Telegraf Database User TELEGRAF_DB_USER=postgres TELEGRAF_DB_PASSWORD=secretpassword # TimescaleDB Settings TIMESCALE_TELEMETRY=off EOF fi # Copy configuration files if they don't exist if [ ! -f "telegraf/telegraf.conf" ]; then echo "Creating telegraf.conf..." cat > telegraf/telegraf.conf << 'EOF' [agent] interval = "10s" round_interval = true metric_batch_size = 1000 metric_buffer_limit = 10000 collection_jitter = "0s" flush_interval = "10s" flush_jitter = "0s" precision = "" debug = false quiet = false hostname = "telegraf-agent" omit_hostname = false [[inputs.statsd]] service_address = ":8125" protocol = "udp" delete_gauges = true delete_counters = true delete_sets = true delete_timings = true percentiles = [50, 90, 95, 99] metric_separator = "." allowed_pending_messages = 10000 datadog_extensions = true datadog_distributions = true [[outputs.postgresql]] connection = "host=timescaledb user=${TELEGRAF_DB_USER} password=${TELEGRAF_DB_PASSWORD} dbname=${POSTGRES_DB} sslmode=disable" schema = "public" create_templates = [ '''CREATE TABLE IF NOT EXISTS {{.table}} ({{.columns}})''', '''SELECT create_hypertable({{.table|quoteLiteral}}, 'time', if_not_exists => TRUE)''', ] tags_as_jsonb = true fields_as_jsonb = false EOF fi # Copy docker-compose.yml if it doesn't exist if [ ! -f "docker-compose.yml" ]; then echo "Creating docker-compose.yml..." cat > docker-compose.yml << 'EOF' version: '3.8' services: timescaledb: image: timescale/timescaledb:latest-pg17 container_name: timescaledb restart: unless-stopped environment: POSTGRES_DB: ${POSTGRES_DB} POSTGRES_USER: ${POSTGRES_USER} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} TIMESCALE_TELEMETRY: ${TIMESCALE_TELEMETRY} ports: - "5442:5432" volumes: - timescale_data:/home/postgres/pgdata/data - ./init-scripts:/docker-entrypoint-initdb.d:ro command: - postgres - -c - shared_buffers=256MB - -c - effective_cache_size=1GB - -c - maintenance_work_mem=64MB - -c - work_mem=8MB healthcheck: test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"] interval: 10s timeout: 5s retries: 5 networks: - metrics_network telegraf: image: telegraf:1.35 container_name: telegraf restart: unless-stopped environment: TELEGRAF_DB_USER: ${TELEGRAF_DB_USER} TELEGRAF_DB_PASSWORD: ${TELEGRAF_DB_PASSWORD} POSTGRES_DB: ${POSTGRES_DB} ports: - "8125:8125/udp" volumes: - ./telegraf/telegraf.conf:/etc/telegraf/telegraf.conf:ro depends_on: timescaledb: condition: service_healthy networks: - metrics_network command: ["telegraf", "--config", "/etc/telegraf/telegraf.conf"] networks: metrics_network: driver: bridge volumes: timescale_data: EOF fi # Create init script if [ ! -f "init-scripts/01-init.sql" ]; then echo "Creating init script..." cat > init-scripts/01-init.sql << 'EOF' -- Enable TimescaleDB extension CREATE EXTENSION IF NOT EXISTS timescaledb; CREATE EXTENSION IF NOT EXISTS pg_stat_statements; EOF fi echo "" echo "Step 1: Starting Docker services..." echo "=========================================" docker-compose down -v 2>/dev/null || true docker-compose up -d echo "" echo "Step 2: Waiting for services to be healthy..." echo "=========================================" wait_for_service timescaledb sleep 5 # Extra time for Telegraf to connect echo "" echo "Step 3: Sending test metrics..." echo "=========================================" # Send various types of metrics echo "Sending counter metrics..." for i in {1..5}; do echo "quickdid.http.request.count:1|c|#method:GET,path:/resolve,status:200" | nc -u -w0 localhost 8125 echo "quickdid.http.request.count:1|c|#method:POST,path:/api,status:201" | nc -u -w0 localhost 8125 done echo "Sending gauge metrics..." echo "quickdid.resolver.rate_limit.available_permits:10|g" | nc -u -w0 localhost 8125 sleep 1 echo "quickdid.resolver.rate_limit.available_permits:5|g" | nc -u -w0 localhost 8125 echo "Sending timing metrics..." for i in {1..10}; do duration=$((RANDOM % 100 + 10)) echo "quickdid.http.request.duration_ms:${duration}|ms|#method:GET,path:/resolve,status:200" | nc -u -w0 localhost 8125 done echo "Sending histogram metrics..." for i in {1..5}; do resolution_time=$((RANDOM % 500 + 50)) echo "quickdid.resolver.resolution_time:${resolution_time}|h|#resolver:redis" | nc -u -w0 localhost 8125 done echo "Waiting 15 seconds for Telegraf to flush metrics..." sleep 15 echo "" echo "Step 4: Verifying table creation..." echo "=========================================" # Check if tables were created check_table "quickdid.http.request.count" check_table "quickdid.http.request.duration_ms" check_table "quickdid.resolver.rate_limit.available_permits" check_table "quickdid.resolver.resolution_time" echo "" echo "Step 5: Verifying data insertion..." echo "=========================================" # Check row counts for table in "quickdid.http.request.count" "quickdid.http.request.duration_ms" "quickdid.resolver.rate_limit.available_permits" "quickdid.resolver.resolution_time"; do count=$(run_query "SELECT COUNT(*) FROM \"$table\";" | tr -d ' ') if [ "$count" -gt 0 ]; then echo "✅ Table '$table' has $count rows" else echo "❌ Table '$table' is empty" fi done echo "" echo "Step 6: Testing JSONB tag queries..." echo "=========================================" # Test JSONB tag filtering result=$(run_query "SELECT COUNT(*) FROM \"quickdid.http.request.count\" WHERE tags->>'method' = 'GET';" | tr -d ' ') if [ "$result" -gt 0 ]; then echo "✅ JSONB tag filtering works (found $result GET requests)" else echo "❌ JSONB tag filtering failed" fi echo "" echo "Step 7: Testing TimescaleDB functions..." echo "=========================================" # Test time_bucket function result=$(run_query "SELECT COUNT(*) FROM (SELECT time_bucket('1 minute', time) FROM \"quickdid.http.request.count\" GROUP BY 1) t;" | tr -d ' ') if [ "$result" -gt 0 ]; then echo "✅ time_bucket function works" else echo "❌ time_bucket function failed" fi # Check if hypertables were created hypertable_count=$(run_query "SELECT COUNT(*) FROM timescaledb_information.hypertables WHERE hypertable_name LIKE 'quickdid%';" | tr -d ' ') if [ "$hypertable_count" -gt 0 ]; then echo "✅ Found $hypertable_count hypertables" else echo "❌ No hypertables found" fi echo "" echo "Step 8: Running comprehensive query tests..." echo "=========================================" # Run the verify-queries.sql script if it exists if [ -f "../test-scripts/verify-queries.sql" ]; then echo "Running verify-queries.sql..." docker exec -i timescaledb psql -U postgres -d metrics < ../test-scripts/verify-queries.sql > query_results.txt 2>&1 if [ $? -eq 0 ]; then echo "✅ All queries executed successfully" echo " Results saved to query_results.txt" else echo "❌ Some queries failed. Check query_results.txt for details" fi else echo "⚠️ verify-queries.sql not found, skipping comprehensive query tests" fi echo "" echo "=========================================" echo "Test Summary" echo "=========================================" # Generate summary failures=0 successes=0 # Count successes and failures from the output if check_table "quickdid.http.request.count" > /dev/null 2>&1; then successes=$((successes + 1)) else failures=$((failures + 1)) fi if [ "$hypertable_count" -gt 0 ]; then successes=$((successes + 1)) else failures=$((failures + 1)) fi echo "" if [ $failures -eq 0 ]; then echo "✅ All tests passed successfully!" echo "" echo "You can now:" echo "1. Connect to the database: docker exec -it timescaledb psql -U postgres -d metrics" echo "2. View logs: docker-compose logs -f" echo "3. Send more metrics: echo 'metric.name:value|type|#tag:value' | nc -u -w0 localhost 8125" echo "4. Stop services: docker-compose down" else echo "⚠️ Some tests failed. Please check the output above for details." echo "" echo "Troubleshooting tips:" echo "1. Check Telegraf logs: docker-compose logs telegraf" echo "2. Check TimescaleDB logs: docker-compose logs timescaledb" echo "3. Verify connectivity: docker exec telegraf telegraf --test" fi echo "" echo "Test complete!"