Auto-indexing service and GraphQL API for AT Protocol Records quickslice.slices.network/
atproto gleam graphql

add dockerfile, clean up server main, extract global variables to env vars

+23
.dockerignore
··· 1 + # Git 2 + .git 3 + .gitignore 4 + 5 + # Build artifacts 6 + **/build 7 + **/target 8 + **/*.db 9 + **/*.db-shm 10 + **/*.db-wal 11 + **/erl_crash.dump 12 + 13 + # IDE 14 + .vscode 15 + .idea 16 + **/.DS_Store 17 + 18 + # Env files 19 + **/.env 20 + **/.env.local 21 + 22 + # Claude 23 + .claude
+1
.gitignore
··· 1 1 erl_crash.dump 2 + .claude
+67
Dockerfile
··· 1 + ARG GLEAM_VERSION=v1.13.0 2 + 3 + # Build stage - compile the application 4 + FROM ghcr.io/gleam-lang/gleam:${GLEAM_VERSION}-erlang-alpine AS builder 5 + 6 + # Install build dependencies including Rust for NIFs 7 + RUN apk add --no-cache \ 8 + git \ 9 + build-base \ 10 + sqlite-dev \ 11 + rust \ 12 + cargo 13 + 14 + # Configure git for non-interactive use 15 + ENV GIT_TERMINAL_PROMPT=0 16 + 17 + # Add local dependencies first (these change less frequently) 18 + COPY ./jetstream /build/jetstream 19 + COPY ./lexicon /build/lexicon 20 + COPY ./graphql /build/graphql 21 + COPY ./lexicon_graphql /build/lexicon_graphql 22 + 23 + # Add server code 24 + COPY ./server /build/server 25 + 26 + # Build Rust NIFs for lexicon package (Linux build produces .so) 27 + RUN cd /build/lexicon/native/lexicon_nif && cargo build --release && \ 28 + mkdir -p /build/lexicon/priv && \ 29 + cp /build/lexicon/native/lexicon_nif/target/release/liblexicon_nif.so /build/lexicon/priv/liblexicon_nif.so 30 + 31 + # Install dependencies for all projects 32 + RUN cd /build/jetstream && gleam deps download 33 + RUN cd /build/lexicon && gleam deps download 34 + RUN cd /build/graphql && gleam deps download 35 + RUN cd /build/lexicon_graphql && gleam deps download 36 + RUN cd /build/server && gleam deps download 37 + 38 + # Compile the server code 39 + RUN cd /build/server \ 40 + && gleam export erlang-shipment 41 + 42 + # Runtime stage - slim image with only what's needed to run 43 + FROM ghcr.io/gleam-lang/gleam:${GLEAM_VERSION}-erlang-alpine 44 + 45 + # Install runtime dependencies 46 + RUN apk add --no-cache sqlite-libs 47 + 48 + # Copy the compiled server code from the builder stage 49 + COPY --from=builder /build/server/build/erlang-shipment /app 50 + 51 + # Copy lexicons directory to the runtime image 52 + COPY --from=builder /build/server/priv/lexicons /app/priv/lexicons 53 + 54 + # Set up the entrypoint 55 + WORKDIR /app 56 + RUN echo -e '#!/bin/sh\nexec ./entrypoint.sh "$@"' > ./start.sh \ 57 + && chmod +x ./start.sh 58 + 59 + # Set environment variables 60 + ENV HOST=0.0.0.0 61 + ENV PORT=8000 62 + 63 + # Expose the port the server will run on 64 + EXPOSE $PORT 65 + 66 + # Run the server 67 + CMD ["./start.sh", "run"]
+18
server/.env.example
··· 1 + # AIP (AT Protocol Identity Provider) Configuration 2 + AIP_BASE_URL=https://auth.example.com 3 + 4 + # Server Configuration 5 + # HOST: The interface to bind to (use 0.0.0.0 for Docker, 127.0.0.1 for local dev) 6 + HOST=127.0.0.1 7 + # PORT: The port to listen on 8 + PORT=8000 9 + 10 + # Database Configuration 11 + DATABASE_URL=quickslice.db 12 + 13 + # Jetstream Configuration 14 + JETSTREAM_URL=wss://jetstream2.us-east.bsky.network/subscribe 15 + 16 + # Relay Configuration 17 + RELAY_URL=https://relay1.us-west.bsky.network 18 + PLC_DIRECTORY_URL=https://plc.directory
+16 -2
server/src/backfill.gleam
··· 1 1 import database 2 + import envoy 2 3 import gleam/dynamic.{type Dynamic} 3 4 import gleam/dynamic/decode 4 5 import gleam/erlang/process.{type Subject} ··· 58 59 59 60 /// Creates a default backfill configuration 60 61 pub fn default_config() -> BackfillConfig { 62 + // Get PLC directory URL from environment variable or use default 63 + let plc_url = case envoy.get("PLC_DIRECTORY_URL") { 64 + Ok(url) -> url 65 + Error(_) -> "https://plc.directory" 66 + } 67 + 61 68 BackfillConfig( 62 - plc_directory_url: "https://plc.directory", 69 + plc_directory_url: plc_url, 63 70 index_actors: True, 64 71 max_workers: 10, 65 72 ) ··· 483 490 cursor: Option(String), 484 491 acc: List(String), 485 492 ) -> Result(List(String), String) { 493 + // Get relay URL from environment variable or use default 494 + let relay_url = case envoy.get("RELAY_URL") { 495 + Ok(url) -> url 496 + Error(_) -> "https://relay1.us-west.bsky.network" 497 + } 498 + 486 499 // Build URL with large limit and cursor 487 500 let base_url = 488 - "https://relay1.us-west.bsky.network/xrpc/com.atproto.sync.listReposByCollection?collection=" 501 + relay_url 502 + <> "/xrpc/com.atproto.sync.listReposByCollection?collection=" 489 503 <> collection 490 504 <> "&limit=1000" 491 505
+1 -2
server/src/event_handler.gleam
··· 131 131 io.println( 132 132 "🗑️ delete " <> commit.collection <> " (" <> commit.rkey <> ")", 133 133 ) 134 - io.println(" URI: " <> uri) 135 134 136 135 case database.delete_record(db, uri) { 137 136 Ok(_) -> { 138 - io.println(" ✓ Deleted from database") 137 + Nil 139 138 } 140 139 Error(err) -> { 141 140 io.println_error(" ❌ Failed to delete: " <> string.inspect(err))
+8 -1
server/src/jetstream_consumer.gleam
··· 1 1 import database 2 + import envoy 2 3 import event_handler 3 4 import gleam/erlang/process 4 5 import gleam/int ··· 35 36 ) 36 37 list.each(collection_ids, fn(col) { io.println(" - " <> col) }) 37 38 39 + // Get Jetstream URL from environment variable or use default 40 + let jetstream_url = case envoy.get("JETSTREAM_URL") { 41 + Ok(url) -> url 42 + Error(_) -> "wss://jetstream2.us-east.bsky.network/subscribe" 43 + } 44 + 38 45 // Create Jetstream config 39 46 let config = 40 47 jetstream.JetstreamConfig( 41 - endpoint: "wss://jetstream2.us-east.bsky.network/subscribe", 48 + endpoint: jetstream_url, 42 49 wanted_collections: collection_ids, 43 50 wanted_dids: [], 44 51 )
+118 -167
server/src/server.gleam
··· 3 3 import database 4 4 import dotenv_gleam 5 5 import envoy 6 - import gleam/erlang/process.{type Subject} 6 + import gleam/erlang/process 7 + import gleam/http as gleam_http 7 8 import gleam/int 8 9 import gleam/io 9 10 import gleam/list 10 11 import gleam/option 11 - import gleam/otp/actor 12 12 import graphiql_handler 13 13 import graphql_handler 14 14 import importer 15 - import jetstream 16 15 import jetstream_consumer 17 16 import lustre/attribute 18 17 import lustre/element ··· 28 27 Context(db: sqlight.Connection, auth_base_url: String) 29 28 } 30 29 31 - pub type BackfillMessage { 32 - StartLexiconBackfill(reply_to: Subject(Nil)) 33 - StartCustomBackfill(collections: List(String), reply_to: Subject(Nil)) 34 - } 35 - 36 - fn handle_backfill(db: sqlight.Connection, message: BackfillMessage) { 37 - case message { 38 - StartLexiconBackfill(client) -> { 39 - io.println("🔄 Starting lexicon schema backfill...") 40 - backfill_lexicon_schemas(db) 41 - 42 - // After lexicon backfill, check which collections have lexicons 43 - io.println("") 44 - io.println("🔍 Checking collections for lexicons...") 45 - 46 - let collections_to_check = ["xyz.statusphere.status"] 47 - 48 - let collections_with_lexicons = 49 - collections_to_check 50 - |> list.filter(fn(collection) { 51 - case database.has_lexicon_for_collection(db, collection) { 52 - Ok(True) -> { 53 - io.println(" ✓ Found lexicon for: " <> collection) 54 - True 55 - } 56 - Ok(False) -> { 57 - io.println(" ✗ No lexicon for: " <> collection) 58 - False 59 - } 60 - Error(_) -> { 61 - io.println(" ⚠️ Error checking lexicon for: " <> collection) 62 - False 63 - } 64 - } 65 - }) 66 - 67 - case collections_with_lexicons { 68 - [] -> { 69 - io.println("") 70 - io.println( 71 - "⚠️ No collections with lexicons found - skipping custom backfill", 72 - ) 73 - } 74 - _ -> { 75 - io.println("") 76 - io.println( 77 - "📋 Starting custom backfill for " 78 - <> int.to_string(list.length(collections_with_lexicons)) 79 - <> " collections with lexicons...", 80 - ) 81 - run_custom_backfill_for_collections(db, collections_with_lexicons) 82 - } 83 - } 84 - 85 - process.send(client, Nil) 86 - actor.continue(db) 87 - } 88 - StartCustomBackfill(collections, client) -> { 89 - io.println("🔄 Starting custom backfill for specified collections...") 90 - run_custom_backfill_for_collections(db, collections) 91 - process.send(client, Nil) 92 - actor.continue(db) 93 - } 94 - } 95 - } 96 - 97 30 pub fn main() { 98 31 // Check for CLI arguments 99 32 case argv.load().arguments { ··· 134 67 io.println("🔄 Starting backfill for record-type lexicon collections") 135 68 io.println("") 136 69 70 + // Get database URL from environment variable or use default 71 + let database_url = case envoy.get("DATABASE_URL") { 72 + Ok(url) -> url 73 + Error(_) -> "atproto.db" 74 + } 75 + 137 76 // Initialize the database 138 - let assert Ok(db) = database.initialize("atproto.db") 77 + let assert Ok(db) = database.initialize(database_url) 139 78 140 79 // Get all record-type lexicons 141 80 io.println("📚 Fetching record-type lexicons from database...") ··· 173 112 // Load environment variables from .env file 174 113 let _ = dotenv_gleam.config() 175 114 115 + // Get database URL from environment variable or use default 116 + let database_url = case envoy.get("DATABASE_URL") { 117 + Ok(url) -> url 118 + Error(_) -> "atproto.db" 119 + } 120 + 176 121 // Initialize the database 177 - let assert Ok(db) = database.initialize("atproto.db") 122 + let assert Ok(db) = database.initialize(database_url) 178 123 179 124 // Auto-import lexicons from priv/lexicons if directory exists 180 125 io.println("") ··· 222 167 Error(_) -> "https://tunnel.chadtmiller.com" 223 168 } 224 169 170 + // Get HOST and PORT from environment variables or use defaults 171 + let host = case envoy.get("HOST") { 172 + Ok(h) -> h 173 + Error(_) -> "127.0.0.1" 174 + } 175 + 176 + let port = case envoy.get("PORT") { 177 + Ok(p) -> 178 + case int.parse(p) { 179 + Ok(port_num) -> port_num 180 + Error(_) -> 8000 181 + } 182 + Error(_) -> 8000 183 + } 184 + 225 185 io.println("🔐 Using AIP server: " <> auth_base_url) 226 186 227 187 let ctx = Context(db: db, auth_base_url: auth_base_url) ··· 231 191 let assert Ok(_) = 232 192 wisp_mist.handler(handler, secret_key_base) 233 193 |> mist.new 234 - |> mist.port(8000) 194 + |> mist.bind(host) 195 + |> mist.port(port) 235 196 |> mist.start 236 197 237 - io.println("Server started on http://localhost:8000") 198 + io.println("Server started on http://" <> host <> ":" <> int.to_string(port)) 238 199 process.sleep_forever() 239 200 } 240 201 241 - fn start_jetstream(db: sqlight.Connection) { 242 - // Create a configuration for Jetstream 243 - // Listen to commit events only (posts, likes, reposts, follows) 244 - let config = 245 - jetstream.JetstreamConfig( 246 - endpoint: "wss://jetstream2.us-west.bsky.network/subscribe", 247 - wanted_collections: [], 248 - wanted_dids: [], 249 - ) 250 - 251 - // Start the consumer with an event handler that identifies commit events 252 - jetstream.start_consumer(config, fn(event_json) { 253 - case jetstream.parse_event(event_json) { 254 - jetstream.CommitEvent(did, _time_us, commit) -> { 255 - io.println("✨ COMMIT EVENT") 256 - io.println(" DID: " <> did) 257 - io.println(" Operation: " <> commit.operation) 258 - io.println(" Collection: " <> commit.collection) 259 - io.println(" Record key: " <> commit.rkey) 260 - io.println(" Revision: " <> commit.rev) 261 - io.println("---") 262 - } 263 - jetstream.IdentityEvent(did, _time_us, _identity) -> { 264 - io.println("👤 IDENTITY EVENT: " <> did) 265 - io.println("---") 266 - } 267 - jetstream.AccountEvent(did, _time_us, _account) -> { 268 - io.println("🔐 ACCOUNT EVENT: " <> did) 269 - io.println("---") 270 - } 271 - jetstream.UnknownEvent(_raw) -> { 272 - // Ignore unknown events 273 - Nil 274 - } 275 - } 276 - }) 277 - } 278 - 279 202 fn handle_request(req: wisp.Request, ctx: Context) -> wisp.Response { 280 203 use _req <- middleware(req) 281 204 ··· 283 206 284 207 case segments { 285 208 [] -> index_route(ctx) 209 + ["backfill"] -> handle_backfill_request(req, ctx.db) 286 210 ["graphql"] -> graphql_handler.handle_graphql_request(req, ctx.db) 287 211 ["graphiql"] -> graphiql_handler.handle_graphiql_request(req) 288 212 ["xrpc", _] -> { ··· 344 268 } 345 269 } 346 270 271 + fn handle_backfill_request( 272 + req: wisp.Request, 273 + db: sqlight.Connection, 274 + ) -> wisp.Response { 275 + case req.method { 276 + gleam_http.Post -> { 277 + // Get all record-type lexicons 278 + case database.get_record_type_lexicons(db) { 279 + Ok(lexicons) -> { 280 + case lexicons { 281 + [] -> { 282 + wisp.response(200) 283 + |> wisp.set_header("content-type", "application/json") 284 + |> wisp.set_body(wisp.Text( 285 + "{\"status\": \"no_lexicons\", \"message\": \"No record-type lexicons found\"}", 286 + )) 287 + } 288 + _ -> { 289 + let collections = list.map(lexicons, fn(lex) { lex.id }) 290 + // Run backfill in background process 291 + let config = backfill.default_config() 292 + process.spawn_unlinked(fn() { 293 + backfill.backfill_collections([], collections, [], config, db) 294 + }) 295 + 296 + wisp.response(200) 297 + |> wisp.set_header("content-type", "application/json") 298 + |> wisp.set_body(wisp.Text( 299 + "{\"status\": \"started\", \"collections\": " 300 + <> int.to_string(list.length(collections)) 301 + <> "}", 302 + )) 303 + } 304 + } 305 + } 306 + Error(_) -> { 307 + wisp.response(500) 308 + |> wisp.set_header("content-type", "application/json") 309 + |> wisp.set_body(wisp.Text( 310 + "{\"error\": \"database_error\", \"message\": \"Failed to fetch lexicons\"}", 311 + )) 312 + } 313 + } 314 + } 315 + _ -> { 316 + wisp.response(405) 317 + |> wisp.set_header("content-type", "application/json") 318 + |> wisp.set_body(wisp.Text( 319 + "{\"error\": \"method_not_allowed\", \"message\": \"Use POST to trigger backfill\"}", 320 + )) 321 + } 322 + } 323 + } 324 + 347 325 fn index_route(ctx: Context) -> wisp.Response { 348 326 // Query database stats 349 327 let collection_stats = case database.get_collection_stats(ctx.db) { ··· 430 408 html.h1([attribute.class("text-4xl font-bold text-gray-900")], [ 431 409 element.text("quickslice"), 432 410 ]), 433 - html.a( 434 - [ 435 - attribute.href("/graphiql"), 436 - attribute.class( 437 - "bg-purple-600 hover:bg-purple-700 text-white font-semibold py-2 px-4 rounded-lg transition-colors shadow-sm", 438 - ), 439 - ], 440 - [element.text("Open GraphiQL")], 441 - ), 411 + html.div([attribute.class("flex gap-3")], [ 412 + html.form( 413 + [ 414 + attribute.method("post"), 415 + attribute.action("/backfill"), 416 + attribute.class("inline"), 417 + ], 418 + [ 419 + html.button( 420 + [ 421 + attribute.type_("submit"), 422 + attribute.class( 423 + "bg-blue-600 hover:bg-blue-700 text-white font-semibold py-2 px-4 rounded-lg transition-colors shadow-sm", 424 + ), 425 + ], 426 + [element.text("Backfill Collections")], 427 + ), 428 + ], 429 + ), 430 + html.a( 431 + [ 432 + attribute.href("/graphiql"), 433 + attribute.class( 434 + "bg-purple-600 hover:bg-purple-700 text-white font-semibold py-2 px-4 rounded-lg transition-colors shadow-sm", 435 + ), 436 + ], 437 + [element.text("Open GraphiQL")], 438 + ), 439 + ]), 442 440 ]), 443 441 // Lexicons section 444 442 html.div([attribute.class("mb-8")], [ ··· 549 547 550 548 handle_request(req) 551 549 } 552 - 553 - /// Backfills com.atproto.lexicon.schema collections on startup. 554 - /// This function auto-discovers repositories from the relay that have lexicon schemas 555 - /// and indexes them into the local database. 556 - /// Note: Actor indexing is disabled for lexicon schemas. 557 - fn backfill_lexicon_schemas(db: sqlight.Connection) { 558 - let repos = [] 559 - let collections = ["com.atproto.lexicon.schema"] 560 - let external_collections = [] 561 - let config = 562 - backfill.BackfillConfig( 563 - plc_directory_url: "https://plc.directory", 564 - index_actors: False, 565 - max_workers: 10, 566 - ) 567 - 568 - backfill.backfill_collections( 569 - repos, 570 - collections, 571 - external_collections, 572 - config, 573 - db, 574 - ) 575 - 576 - io.println("✅ Lexicon schema backfill complete") 577 - } 578 - 579 - /// Run a custom backfill for specific collections that have lexicons. 580 - /// This backfills actual records (posts, follows, etc.) after verifying lexicons exist. 581 - fn run_custom_backfill_for_collections( 582 - db: sqlight.Connection, 583 - collections: List(String), 584 - ) { 585 - let repos = [] 586 - let external_collections = [] 587 - let config = backfill.default_config() 588 - 589 - backfill.backfill_collections( 590 - repos, 591 - collections, 592 - external_collections, 593 - config, 594 - db, 595 - ) 596 - 597 - io.println("✅ Custom backfill complete") 598 - }