+157
PDSharp.Core/Health.fs
+157
PDSharp.Core/Health.fs
···
1
+
namespace PDSharp.Core
2
+
3
+
open System
4
+
open System.IO
5
+
open System.Runtime.InteropServices
6
+
7
+
/// Health monitoring module for guardrails and uptime checks
8
+
module Health =
9
+
10
+
/// Health status response
11
+
type HealthStatus = {
12
+
/// Version of the PDS
13
+
Version : string
14
+
/// Uptime in seconds
15
+
UptimeSeconds : int64
16
+
/// Server start time in ISO8601
17
+
StartTime : string
18
+
/// Database status
19
+
DatabaseStatus : DatabaseStatus
20
+
/// Disk usage information
21
+
DiskUsage : DiskUsage option
22
+
/// Backup status
23
+
BackupStatus : BackupStatus option
24
+
}
25
+
26
+
/// Database connectivity status
27
+
and DatabaseStatus = {
28
+
/// Whether the database is reachable
29
+
IsHealthy : bool
30
+
/// Optional error message
31
+
Message : string option
32
+
}
33
+
34
+
/// Disk usage metrics
35
+
and DiskUsage = {
36
+
/// Total disk space in bytes
37
+
TotalBytes : int64
38
+
/// Free disk space in bytes
39
+
FreeBytes : int64
40
+
/// Used disk space in bytes
41
+
UsedBytes : int64
42
+
/// Percentage of disk used
43
+
UsedPercent : float
44
+
/// Whether disk pressure is critical (>90%)
45
+
IsCritical : bool
46
+
}
47
+
48
+
/// Backup status tracking
49
+
and BackupStatus = {
50
+
/// Timestamp of last successful backup
51
+
LastBackupTime : DateTimeOffset option
52
+
/// Age of last backup in hours
53
+
BackupAgeHours : float option
54
+
/// Whether backup is too old (>24 hours)
55
+
IsStale : bool
56
+
}
57
+
58
+
/// Get disk usage for a given path
59
+
let getDiskUsage (path : string) : DiskUsage option =
60
+
try
61
+
let driveInfo =
62
+
if RuntimeInformation.IsOSPlatform OSPlatform.Windows then
63
+
let driveLetter = Path.GetPathRoot path
64
+
DriveInfo driveLetter
65
+
else
66
+
DriveInfo(if Directory.Exists path then path else "/")
67
+
68
+
if driveInfo.IsReady then
69
+
let total = driveInfo.TotalSize
70
+
let free = driveInfo.TotalFreeSpace
71
+
let used = total - free
72
+
let usedPercent = float used / float total * 100.0
73
+
74
+
Some {
75
+
TotalBytes = total
76
+
FreeBytes = free
77
+
UsedBytes = used
78
+
UsedPercent = Math.Round(usedPercent, 2)
79
+
IsCritical = usedPercent >= 90.0
80
+
}
81
+
else
82
+
None
83
+
with _ ->
84
+
None
85
+
86
+
87
+
88
+
/// Check if a SQLite database file is accessible
89
+
let checkDatabaseHealth (connectionString : string) : DatabaseStatus =
90
+
try
91
+
let dbPath =
92
+
connectionString.Split ';'
93
+
|> Array.tryFind (fun s -> s.Trim().StartsWith("Data Source=", StringComparison.OrdinalIgnoreCase))
94
+
|> Option.map (fun s -> s.Split('=').[1].Trim())
95
+
96
+
match dbPath with
97
+
| Some path when File.Exists path -> { IsHealthy = true; Message = None }
98
+
| Some path -> {
99
+
IsHealthy = false
100
+
Message = Some $"Database file not found: {path}"
101
+
}
102
+
| None -> {
103
+
IsHealthy = false
104
+
Message = Some "Could not parse connection string"
105
+
}
106
+
with ex -> { IsHealthy = false; Message = Some ex.Message }
107
+
108
+
/// Calculate backup status from last backup time
109
+
let getBackupStatus (lastBackupTime : DateTimeOffset option) : BackupStatus =
110
+
match lastBackupTime with
111
+
| Some time ->
112
+
let age = DateTimeOffset.UtcNow - time
113
+
let ageHours = age.TotalHours
114
+
115
+
{
116
+
LastBackupTime = Some time
117
+
BackupAgeHours = Some(Math.Round(ageHours, 2))
118
+
IsStale = ageHours > 24.0
119
+
}
120
+
| None -> {
121
+
LastBackupTime = None
122
+
BackupAgeHours = None
123
+
IsStale = true
124
+
}
125
+
126
+
/// Mutable state for tracking server state
127
+
type HealthState() =
128
+
let mutable startTime = DateTimeOffset.UtcNow
129
+
let mutable lastBackupTime : DateTimeOffset option = None
130
+
131
+
member _.StartTime = startTime
132
+
member _.SetStartTime(time : DateTimeOffset) = startTime <- time
133
+
member _.LastBackupTime = lastBackupTime
134
+
135
+
member _.RecordBackup() =
136
+
lastBackupTime <- Some DateTimeOffset.UtcNow
137
+
138
+
member _.RecordBackup(time : DateTimeOffset) = lastBackupTime <- Some time
139
+
140
+
member _.GetUptime() : int64 =
141
+
int64 (DateTimeOffset.UtcNow - startTime).TotalSeconds
142
+
143
+
/// Build a complete health status
144
+
let buildHealthStatus
145
+
(version : string)
146
+
(healthState : HealthState)
147
+
(connectionString : string)
148
+
(dataPath : string)
149
+
: HealthStatus =
150
+
{
151
+
Version = version
152
+
UptimeSeconds = healthState.GetUptime()
153
+
StartTime = healthState.StartTime.ToString("o")
154
+
DatabaseStatus = checkDatabaseHealth connectionString
155
+
DiskUsage = getDiskUsage dataPath
156
+
BackupStatus = Some(getBackupStatus healthState.LastBackupTime)
157
+
}
+1
PDSharp.Core/PDSharp.Core.fsproj
+1
PDSharp.Core/PDSharp.Core.fsproj
+32
-5
PDSharp.Core/SqliteStore.fs
+32
-5
PDSharp.Core/SqliteStore.fs
···
83
83
84
84
type BlockRow = { cid : string; data : byte[] }
85
85
86
+
/// DTO for account rows with nullable email
87
+
[<CLIMutable>]
88
+
type AccountRow = {
89
+
did : string
90
+
handle : string
91
+
password_hash : string
92
+
email : string // Nullable in DB, null becomes null here
93
+
created_at : string
94
+
}
95
+
96
+
let private toAccount (row : AccountRow) : Account = {
97
+
Did = row.did
98
+
Handle = row.handle
99
+
PasswordHash = row.password_hash
100
+
Email = if isNull row.email then None else Some row.email
101
+
CreatedAt = DateTimeOffset.Parse row.created_at
102
+
}
103
+
86
104
type IRepoStore =
87
105
abstract member GetRepo : string -> Async<RepoRow option>
88
106
abstract member SaveRepo : RepoRow -> Async<unit>
···
146
164
use conn = new SqliteConnection(connectionString)
147
165
148
166
try
167
+
let emailValue = account.Email |> Option.toObj
168
+
let createdAtStr = account.CreatedAt.ToString "o"
169
+
149
170
let! _ =
150
171
conn.ExecuteAsync(
151
172
"""
152
173
INSERT INTO accounts (did, handle, password_hash, email, created_at)
153
174
VALUES (@Did, @Handle, @PasswordHash, @Email, @CreatedAt)
154
175
""",
155
-
account
176
+
{|
177
+
Did = account.Did
178
+
Handle = account.Handle
179
+
PasswordHash = account.PasswordHash
180
+
Email = emailValue
181
+
CreatedAt = createdAtStr
182
+
|}
156
183
)
157
184
|> Async.AwaitTask
158
185
···
167
194
use conn = new SqliteConnection(connectionString)
168
195
169
196
let! result =
170
-
conn.QuerySingleOrDefaultAsync<Account>(
197
+
conn.QuerySingleOrDefaultAsync<AccountRow>(
171
198
"SELECT * FROM accounts WHERE handle = @handle",
172
199
{| handle = handle |}
173
200
)
···
176
203
if isNull (box result) then
177
204
return None
178
205
else
179
-
return Some result
206
+
return Some(toAccount result)
180
207
}
181
208
182
209
member _.GetAccountByDid(did : string) = async {
183
210
use conn = new SqliteConnection(connectionString)
184
211
185
212
let! result =
186
-
conn.QuerySingleOrDefaultAsync<Account>("SELECT * FROM accounts WHERE did = @did", {| did = did |})
213
+
conn.QuerySingleOrDefaultAsync<AccountRow>("SELECT * FROM accounts WHERE did = @did", {| did = did |})
187
214
|> Async.AwaitTask
188
215
189
216
if isNull (box result) then
190
217
return None
191
218
else
192
-
return Some result
219
+
return Some(toAccount result)
193
220
}
194
221
195
222
type SqliteRepoStore(connectionString : string) =
+116
PDSharp.Tests/Health.Tests.fs
+116
PDSharp.Tests/Health.Tests.fs
···
1
+
module PDSharp.Tests.Health
2
+
3
+
open System
4
+
open Xunit
5
+
open PDSharp.Core.Health
6
+
7
+
[<Fact>]
8
+
let ``getDiskUsage returns disk info for valid path`` () =
9
+
let result = getDiskUsage "."
10
+
11
+
match result with
12
+
| Some usage ->
13
+
Assert.True(usage.TotalBytes > 0L)
14
+
Assert.True(usage.FreeBytes >= 0L)
15
+
Assert.True(usage.UsedBytes >= 0L)
16
+
Assert.True(usage.UsedPercent >= 0.0 && usage.UsedPercent <= 100.0)
17
+
| None -> Assert.True(true)
18
+
19
+
[<Fact>]
20
+
let ``getDiskUsage UsedPercent is calculated correctly`` () =
21
+
let result = getDiskUsage "."
22
+
23
+
match result with
24
+
| Some usage ->
25
+
let expectedUsed = usage.TotalBytes - usage.FreeBytes
26
+
Assert.Equal(expectedUsed, usage.UsedBytes)
27
+
let expectedPercent = float usage.UsedBytes / float usage.TotalBytes * 100.0
28
+
Assert.True(abs (usage.UsedPercent - expectedPercent) < 0.1)
29
+
| None -> Assert.True(true)
30
+
31
+
[<Fact>]
32
+
let ``getDiskUsage IsCritical is true when usage > 90 percent`` () =
33
+
let result = getDiskUsage "."
34
+
35
+
match result with
36
+
| Some usage -> Assert.Equal(usage.UsedPercent >= 90.0, usage.IsCritical)
37
+
| None -> Assert.True(true)
38
+
39
+
[<Fact>]
40
+
let ``checkDatabaseHealth returns healthy for existing file`` () =
41
+
let tempPath = System.IO.Path.GetTempFileName()
42
+
43
+
try
44
+
let connStr = $"Data Source={tempPath}"
45
+
let result = checkDatabaseHealth connStr
46
+
Assert.True result.IsHealthy
47
+
Assert.True result.Message.IsNone
48
+
finally
49
+
System.IO.File.Delete tempPath
50
+
51
+
[<Fact>]
52
+
let ``checkDatabaseHealth returns unhealthy for missing file`` () =
53
+
let connStr = "Data Source=/nonexistent/path/to/database.db"
54
+
let result = checkDatabaseHealth connStr
55
+
Assert.False result.IsHealthy
56
+
Assert.True result.Message.IsSome
57
+
58
+
[<Fact>]
59
+
let ``checkDatabaseHealth handles invalid connection string`` () =
60
+
let connStr = "invalid"
61
+
let result = checkDatabaseHealth connStr
62
+
Assert.False result.IsHealthy
63
+
Assert.True result.Message.IsSome
64
+
65
+
[<Fact>]
66
+
let ``getBackupStatus returns stale when no backup`` () =
67
+
let result = getBackupStatus None
68
+
Assert.True result.IsStale
69
+
Assert.True result.LastBackupTime.IsNone
70
+
Assert.True result.BackupAgeHours.IsNone
71
+
72
+
[<Fact>]
73
+
let ``getBackupStatus returns not stale for recent backup`` () =
74
+
let recentTime = DateTimeOffset.UtcNow.AddHours(-1.0)
75
+
let result = getBackupStatus (Some recentTime)
76
+
Assert.False result.IsStale
77
+
Assert.True result.LastBackupTime.IsSome
78
+
Assert.True result.BackupAgeHours.IsSome
79
+
Assert.True(result.BackupAgeHours.Value < 24.0)
80
+
81
+
[<Fact>]
82
+
let ``getBackupStatus returns stale for old backup`` () =
83
+
let oldTime = DateTimeOffset.UtcNow.AddHours(-25.0)
84
+
let result = getBackupStatus (Some oldTime)
85
+
Assert.True result.IsStale
86
+
Assert.True(result.BackupAgeHours.Value > 24.0)
87
+
88
+
[<Fact>]
89
+
let ``HealthState tracks uptime correctly`` () =
90
+
let state = HealthState()
91
+
state.SetStartTime(DateTimeOffset.UtcNow.AddSeconds(-10.0))
92
+
let uptime = state.GetUptime()
93
+
Assert.True(uptime >= 9L && uptime <= 12L)
94
+
95
+
[<Fact>]
96
+
let ``HealthState records backup time`` () =
97
+
let state = HealthState()
98
+
Assert.True state.LastBackupTime.IsNone
99
+
state.RecordBackup()
100
+
Assert.True state.LastBackupTime.IsSome
101
+
102
+
[<Fact>]
103
+
let ``buildHealthStatus constructs complete status`` () =
104
+
let state = HealthState()
105
+
let tempPath = System.IO.Path.GetTempFileName()
106
+
107
+
try
108
+
let connStr = $"Data Source={tempPath}"
109
+
let status = buildHealthStatus "1.0.0" state connStr "."
110
+
111
+
Assert.Equal("1.0.0", status.Version)
112
+
Assert.True(status.UptimeSeconds >= 0L)
113
+
Assert.True status.DatabaseStatus.IsHealthy
114
+
Assert.True status.BackupStatus.IsSome
115
+
finally
116
+
System.IO.File.Delete tempPath
+1
PDSharp.Tests/PDSharp.Tests.fsproj
+1
PDSharp.Tests/PDSharp.Tests.fsproj
+32
PDSharp/Handlers/Health.fs
+32
PDSharp/Handlers/Health.fs
···
1
+
namespace PDSharp.Handlers
2
+
3
+
open System.Text.Json
4
+
open Microsoft.AspNetCore.Http
5
+
open Giraffe
6
+
open PDSharp.Core.Health
7
+
open PDSharp.Core.Config
8
+
9
+
module HealthHandler =
10
+
/// PDS version (could be read from assembly info)
11
+
let private version = "0.1.0"
12
+
13
+
/// JSON serialization options with camelCase naming
14
+
let private jsonOptions =
15
+
JsonSerializerOptions(PropertyNamingPolicy = JsonNamingPolicy.CamelCase, WriteIndented = true)
16
+
17
+
/// Health check handler for /xrpc/_health endpoint
18
+
let healthHandler : HttpHandler =
19
+
fun next ctx -> task {
20
+
let config = ctx.GetService<AppConfig>()
21
+
let healthState = ctx.GetService<HealthState>()
22
+
let status = buildHealthStatus version healthState config.SqliteConnectionString "." // Check disk of current working directory
23
+
24
+
if status.DatabaseStatus.IsHealthy then
25
+
ctx.SetStatusCode 200
26
+
else
27
+
ctx.SetStatusCode 503
28
+
29
+
let json = JsonSerializer.Serialize(status, jsonOptions)
30
+
ctx.SetContentType "application/json"
31
+
return! text json next ctx
32
+
}
+1
PDSharp/PDSharp.fsproj
+1
PDSharp/PDSharp.fsproj
+3
PDSharp/Program.fs
+3
PDSharp/Program.fs
···
10
10
open PDSharp.Core.SqliteStore
11
11
open PDSharp.Core.BlobStore
12
12
open PDSharp.Core.Config
13
+
open PDSharp.Core.Health
13
14
open PDSharp.Handlers
14
15
15
16
let getConfig () =
···
57
58
>=> choose [
58
59
route "/" >=> Server.indexHandler
59
60
route "/xrpc/com.atproto.server.describeServer" >=> Server.describeServerHandler
61
+
route "/xrpc/_health" >=> HealthHandler.healthHandler
60
62
]
61
63
POST
62
64
>=> route "/xrpc/com.atproto.server.createAccount"
···
106
108
services.AddSingleton<IBlobStore> blobStore |> ignore
107
109
services.AddSingleton<FirehoseState>(new FirehoseState()) |> ignore
108
110
services.AddSingleton<SigningKeyStore>(new SigningKeyStore()) |> ignore
111
+
services.AddSingleton<HealthState>(new HealthState()) |> ignore
109
112
110
113
[<EntryPoint>]
111
114
let main args =
+4
-4
roadmap.txt
+4
-4
roadmap.txt
···
73
73
- [ ] Databases (Litestream or raw copy) + /pds/actors backup
74
74
- [ ] Local disk blobs (if applicable)
75
75
- [ ] Guardrails & Monitoring:
76
-
- [ ] Uptime check endpoint: /xrpc/_health with JSON status
77
-
- [ ] Alerts: "Latest backup" too old, Disk pressure > 90%
76
+
- [x] Uptime check endpoint: /xrpc/_health with JSON status
77
+
- [x] Alerts: "Latest backup" too old, Disk pressure > 90%
78
78
- [ ] Log retention policies
79
79
DoD:
80
80
- Backups run automatically and report status
···
137
137
[x] CAR export + sync endpoints
138
138
[x] subscribeRepos firehose
139
139
[x] Authentication (createAccount, createSession)
140
-
[ ] Lexicon validation
140
+
[x] Lexicon validation
141
141
[ ] Domain + TLS configured
142
142
[ ] PDS deployed and reachable
143
143
[ ] Account created, session works
144
144
[ ] Writes + blobs verified
145
-
[ ] Backups + monitoring in place
145
+
[/] Backups + monitoring in place (health endpoint done, backup automation pending)
146
146
================================================================================
147
147
REFERENCES
148
148
================================================================================