forked from
smokesignal.events/quickdid
QuickDID is a high-performance AT Protocol identity resolution service written in Rust. It provides handle-to-DID resolution with Redis-backed caching and queue processing.
1#!/bin/bash
2
3# Comprehensive test script for Telegraf/TimescaleDB metrics setup
4# This script validates the entire metrics pipeline
5
6set -e
7
8echo "========================================="
9echo "Telegraf/TimescaleDB Metrics Test Suite"
10echo "========================================="
11echo ""
12
13# Check if Docker is running
14if ! docker info > /dev/null 2>&1; then
15 echo "❌ Docker is not running. Please start Docker first."
16 exit 1
17fi
18
19# Function to wait for a service to be healthy
20wait_for_service() {
21 local service=$1
22 local max_attempts=30
23 local attempt=1
24
25 echo -n "Waiting for $service to be healthy"
26 while [ $attempt -le $max_attempts ]; do
27 if docker-compose ps $service | grep -q "healthy"; then
28 echo " ✅"
29 return 0
30 fi
31 echo -n "."
32 sleep 2
33 attempt=$((attempt + 1))
34 done
35 echo " ❌"
36 echo "Service $service failed to become healthy after $max_attempts attempts"
37 return 1
38}
39
40# Function to run SQL query
41run_query() {
42 docker exec -i timescaledb psql -U postgres -d metrics -t -c "$1" 2>/dev/null
43}
44
45# Function to check table exists
46check_table() {
47 local table=$1
48 local result=$(run_query "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = '$table');")
49 if [[ "$result" =~ "t" ]]; then
50 echo "✅ Table '$table' exists"
51 return 0
52 else
53 echo "❌ Table '$table' does not exist"
54 return 1
55 fi
56}
57
58# Navigate to the metrics-stack directory (create if needed)
59if [ ! -d "metrics-stack" ]; then
60 echo "Creating metrics-stack directory..."
61 mkdir -p metrics-stack/telegraf
62 mkdir -p metrics-stack/test-scripts
63 mkdir -p metrics-stack/init-scripts
64fi
65
66cd metrics-stack
67
68# Create .env file if it doesn't exist
69if [ ! -f ".env" ]; then
70 echo "Creating .env file..."
71 cat > .env << 'EOF'
72# PostgreSQL/TimescaleDB Configuration
73POSTGRES_DB=metrics
74POSTGRES_USER=postgres
75POSTGRES_PASSWORD=secretpassword
76
77# Telegraf Database User
78TELEGRAF_DB_USER=postgres
79TELEGRAF_DB_PASSWORD=secretpassword
80
81# TimescaleDB Settings
82TIMESCALE_TELEMETRY=off
83EOF
84fi
85
86# Copy configuration files if they don't exist
87if [ ! -f "telegraf/telegraf.conf" ]; then
88 echo "Creating telegraf.conf..."
89 cat > telegraf/telegraf.conf << 'EOF'
90[agent]
91 interval = "10s"
92 round_interval = true
93 metric_batch_size = 1000
94 metric_buffer_limit = 10000
95 collection_jitter = "0s"
96 flush_interval = "10s"
97 flush_jitter = "0s"
98 precision = ""
99 debug = false
100 quiet = false
101 hostname = "telegraf-agent"
102 omit_hostname = false
103
104[[inputs.statsd]]
105 service_address = ":8125"
106 protocol = "udp"
107 delete_gauges = true
108 delete_counters = true
109 delete_sets = true
110 delete_timings = true
111 percentiles = [50, 90, 95, 99]
112 metric_separator = "."
113 allowed_pending_messages = 10000
114 datadog_extensions = true
115 datadog_distributions = true
116
117[[outputs.postgresql]]
118 connection = "host=timescaledb user=${TELEGRAF_DB_USER} password=${TELEGRAF_DB_PASSWORD} dbname=${POSTGRES_DB} sslmode=disable"
119 schema = "public"
120 create_templates = [
121 '''CREATE TABLE IF NOT EXISTS {{.table}} ({{.columns}})''',
122 '''SELECT create_hypertable({{.table|quoteLiteral}}, 'time', if_not_exists => TRUE)''',
123 ]
124 tags_as_jsonb = true
125 fields_as_jsonb = false
126EOF
127fi
128
129# Copy docker-compose.yml if it doesn't exist
130if [ ! -f "docker-compose.yml" ]; then
131 echo "Creating docker-compose.yml..."
132 cat > docker-compose.yml << 'EOF'
133version: '3.8'
134
135services:
136 timescaledb:
137 image: timescale/timescaledb:latest-pg17
138 container_name: timescaledb
139 restart: unless-stopped
140 environment:
141 POSTGRES_DB: ${POSTGRES_DB}
142 POSTGRES_USER: ${POSTGRES_USER}
143 POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
144 TIMESCALE_TELEMETRY: ${TIMESCALE_TELEMETRY}
145 ports:
146 - "5442:5432"
147 volumes:
148 - timescale_data:/home/postgres/pgdata/data
149 - ./init-scripts:/docker-entrypoint-initdb.d:ro
150 command:
151 - postgres
152 - -c
153 - shared_buffers=256MB
154 - -c
155 - effective_cache_size=1GB
156 - -c
157 - maintenance_work_mem=64MB
158 - -c
159 - work_mem=8MB
160 healthcheck:
161 test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
162 interval: 10s
163 timeout: 5s
164 retries: 5
165 networks:
166 - metrics_network
167
168 telegraf:
169 image: telegraf:1.35
170 container_name: telegraf
171 restart: unless-stopped
172 environment:
173 TELEGRAF_DB_USER: ${TELEGRAF_DB_USER}
174 TELEGRAF_DB_PASSWORD: ${TELEGRAF_DB_PASSWORD}
175 POSTGRES_DB: ${POSTGRES_DB}
176 ports:
177 - "8125:8125/udp"
178 volumes:
179 - ./telegraf/telegraf.conf:/etc/telegraf/telegraf.conf:ro
180 depends_on:
181 timescaledb:
182 condition: service_healthy
183 networks:
184 - metrics_network
185 command: ["telegraf", "--config", "/etc/telegraf/telegraf.conf"]
186
187networks:
188 metrics_network:
189 driver: bridge
190
191volumes:
192 timescale_data:
193EOF
194fi
195
196# Create init script
197if [ ! -f "init-scripts/01-init.sql" ]; then
198 echo "Creating init script..."
199 cat > init-scripts/01-init.sql << 'EOF'
200-- Enable TimescaleDB extension
201CREATE EXTENSION IF NOT EXISTS timescaledb;
202CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
203EOF
204fi
205
206echo ""
207echo "Step 1: Starting Docker services..."
208echo "========================================="
209docker-compose down -v 2>/dev/null || true
210docker-compose up -d
211
212echo ""
213echo "Step 2: Waiting for services to be healthy..."
214echo "========================================="
215wait_for_service timescaledb
216sleep 5 # Extra time for Telegraf to connect
217
218echo ""
219echo "Step 3: Sending test metrics..."
220echo "========================================="
221
222# Send various types of metrics
223echo "Sending counter metrics..."
224for i in {1..5}; do
225 echo "quickdid.http.request.count:1|c|#method:GET,path:/resolve,status:200" | nc -u -w0 localhost 8125
226 echo "quickdid.http.request.count:1|c|#method:POST,path:/api,status:201" | nc -u -w0 localhost 8125
227done
228
229echo "Sending gauge metrics..."
230echo "quickdid.resolver.rate_limit.available_permits:10|g" | nc -u -w0 localhost 8125
231sleep 1
232echo "quickdid.resolver.rate_limit.available_permits:5|g" | nc -u -w0 localhost 8125
233
234echo "Sending timing metrics..."
235for i in {1..10}; do
236 duration=$((RANDOM % 100 + 10))
237 echo "quickdid.http.request.duration_ms:${duration}|ms|#method:GET,path:/resolve,status:200" | nc -u -w0 localhost 8125
238done
239
240echo "Sending histogram metrics..."
241for i in {1..5}; do
242 resolution_time=$((RANDOM % 500 + 50))
243 echo "quickdid.resolver.resolution_time:${resolution_time}|h|#resolver:redis" | nc -u -w0 localhost 8125
244done
245
246echo "Waiting 15 seconds for Telegraf to flush metrics..."
247sleep 15
248
249echo ""
250echo "Step 4: Verifying table creation..."
251echo "========================================="
252
253# Check if tables were created
254check_table "quickdid.http.request.count"
255check_table "quickdid.http.request.duration_ms"
256check_table "quickdid.resolver.rate_limit.available_permits"
257check_table "quickdid.resolver.resolution_time"
258
259echo ""
260echo "Step 5: Verifying data insertion..."
261echo "========================================="
262
263# Check row counts
264for table in "quickdid.http.request.count" "quickdid.http.request.duration_ms" "quickdid.resolver.rate_limit.available_permits" "quickdid.resolver.resolution_time"; do
265 count=$(run_query "SELECT COUNT(*) FROM \"$table\";" | tr -d ' ')
266 if [ "$count" -gt 0 ]; then
267 echo "✅ Table '$table' has $count rows"
268 else
269 echo "❌ Table '$table' is empty"
270 fi
271done
272
273echo ""
274echo "Step 6: Testing JSONB tag queries..."
275echo "========================================="
276
277# Test JSONB tag filtering
278result=$(run_query "SELECT COUNT(*) FROM \"quickdid.http.request.count\" WHERE tags->>'method' = 'GET';" | tr -d ' ')
279if [ "$result" -gt 0 ]; then
280 echo "✅ JSONB tag filtering works (found $result GET requests)"
281else
282 echo "❌ JSONB tag filtering failed"
283fi
284
285echo ""
286echo "Step 7: Testing TimescaleDB functions..."
287echo "========================================="
288
289# Test time_bucket function
290result=$(run_query "SELECT COUNT(*) FROM (SELECT time_bucket('1 minute', time) FROM \"quickdid.http.request.count\" GROUP BY 1) t;" | tr -d ' ')
291if [ "$result" -gt 0 ]; then
292 echo "✅ time_bucket function works"
293else
294 echo "❌ time_bucket function failed"
295fi
296
297# Check if hypertables were created
298hypertable_count=$(run_query "SELECT COUNT(*) FROM timescaledb_information.hypertables WHERE hypertable_name LIKE 'quickdid%';" | tr -d ' ')
299if [ "$hypertable_count" -gt 0 ]; then
300 echo "✅ Found $hypertable_count hypertables"
301else
302 echo "❌ No hypertables found"
303fi
304
305echo ""
306echo "Step 8: Running comprehensive query tests..."
307echo "========================================="
308
309# Run the verify-queries.sql script if it exists
310if [ -f "../test-scripts/verify-queries.sql" ]; then
311 echo "Running verify-queries.sql..."
312 docker exec -i timescaledb psql -U postgres -d metrics < ../test-scripts/verify-queries.sql > query_results.txt 2>&1
313 if [ $? -eq 0 ]; then
314 echo "✅ All queries executed successfully"
315 echo " Results saved to query_results.txt"
316 else
317 echo "❌ Some queries failed. Check query_results.txt for details"
318 fi
319else
320 echo "⚠️ verify-queries.sql not found, skipping comprehensive query tests"
321fi
322
323echo ""
324echo "========================================="
325echo "Test Summary"
326echo "========================================="
327
328# Generate summary
329failures=0
330successes=0
331
332# Count successes and failures from the output
333if check_table "quickdid.http.request.count" > /dev/null 2>&1; then
334 successes=$((successes + 1))
335else
336 failures=$((failures + 1))
337fi
338
339if [ "$hypertable_count" -gt 0 ]; then
340 successes=$((successes + 1))
341else
342 failures=$((failures + 1))
343fi
344
345echo ""
346if [ $failures -eq 0 ]; then
347 echo "✅ All tests passed successfully!"
348 echo ""
349 echo "You can now:"
350 echo "1. Connect to the database: docker exec -it timescaledb psql -U postgres -d metrics"
351 echo "2. View logs: docker-compose logs -f"
352 echo "3. Send more metrics: echo 'metric.name:value|type|#tag:value' | nc -u -w0 localhost 8125"
353 echo "4. Stop services: docker-compose down"
354else
355 echo "⚠️ Some tests failed. Please check the output above for details."
356 echo ""
357 echo "Troubleshooting tips:"
358 echo "1. Check Telegraf logs: docker-compose logs telegraf"
359 echo "2. Check TimescaleDB logs: docker-compose logs timescaledb"
360 echo "3. Verify connectivity: docker exec telegraf telegraf --test"
361fi
362
363echo ""
364echo "Test complete!"