Rust AppView - highly experimental!
1{ 2 description = "Parakeet is a Rust-based Bluesky AppView"; 3 inputs = { 4 nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; 5 crane.url = "github:ipetkov/crane"; 6 flake-utils.url = "github:numtide/flake-utils"; 7 rust-overlay = { 8 url = "github:oxalica/rust-overlay"; 9 inputs.nixpkgs.follows = "nixpkgs"; 10 }; 11 }; 12 outputs = 13 { 14 self, 15 nixpkgs, 16 crane, 17 flake-utils, 18 rust-overlay, 19 ... 20 }: 21 flake-utils.lib.eachDefaultSystem ( 22 system: 23 let 24 pkgs = import nixpkgs { 25 inherit system; 26 overlays = [ (import rust-overlay) ]; 27 }; 28 craneLib = (crane.mkLib pkgs).overrideToolchain ( 29 p: 30 p.rust-bin.selectLatestNightlyWith ( 31 toolchain: 32 toolchain.default.override { 33 extensions = [ 34 "rust-src" 35 "rust-analyzer" 36 ]; 37 } 38 ) 39 ); 40 41 inherit (pkgs) lib; 42 unfilteredRoot = ./.; # The original, unfiltered source 43 src = lib.fileset.toSource { 44 root = unfilteredRoot; 45 fileset = lib.fileset.unions [ 46 # Default files from crane (Rust and cargo files) 47 (craneLib.fileset.commonCargoSources unfilteredRoot) 48 ]; 49 }; 50 # Common arguments can be set here to avoid repeating them later 51 commonArgs = { 52 inherit src; 53 strictDeps = true; 54 nativeBuildInputs = with pkgs; [ 55 pkg-config 56 ]; 57 buildInputs = [ 58 # Add additional build inputs here 59 pkgs.openssl 60 pkgs.postgresql 61 pkgs.libpq 62 pkgs.clang 63 pkgs.libclang 64 pkgs.lld 65 pkgs.protobuf 66 ] 67 ++ lib.optionals pkgs.stdenv.isDarwin [ 68 # Additional darwin specific inputs can be set here 69 pkgs.libiconv 70 pkgs.darwin.apple_sdk.frameworks.Security 71 ]; 72 LIBCLANG_PATH = "${pkgs.llvmPackages_18.libclang.lib}/lib"; 73 CLANG_PATH = "${pkgs.llvmPackages_18.clang}/bin/clang"; 74 PROTOC_INCLUDE = "${pkgs.protobuf}/include"; 75 PROTOC = "${pkgs.protobuf}/bin/protoc"; 76 77 # Additional environment variables can be set directly 78 # MY_CUSTOM_VAR = "some value"; 79 }; 80 81 # Build *just* the cargo dependencies, so we can reuse 82 # all of that work (e.g. via cachix) when running in CI 83 # cargoArtifacts = craneLib.buildDepsOnly commonArgs; 84 85 # individualCrateArgs = commonArgs // { 86 # inherit cargoArtifacts; 87 # inherit (craneLib.crateNameFromCargoToml { inherit src; }) version; 88 # # NB: we disable tests since we'll run them all via cargo-nextest 89 # doCheck = false; 90 # }; 91 # fileSetForCrate = 92 # crate: 93 # lib.fileset.toSource { 94 # root = ./.; 95 # fileset = lib.fileset.unions [ 96 # ./Cargo.toml 97 # ./Cargo.lock 98 # ./migrations 99 # ./consumer/src/sources/jetstream/zstd_dictionary 100 # (craneLib.fileset.commonCargoSources ./consumer) 101 # ./consumer/src/db/sql 102 # (craneLib.fileset.commonCargoSources ./dataloader-rs) 103 # (craneLib.fileset.commonCargoSources ./did-resolver) 104 # (craneLib.fileset.commonCargoSources ./lexica) 105 # (craneLib.fileset.commonCargoSources ./parakeet) 106 # ./parakeet/src/sql 107 # (craneLib.fileset.commonCargoSources ./parakeet-db) 108 # ./parakeet-db/src/dicts/post_content_v1.dict 109 # (craneLib.fileset.commonCargoSources ./parakeet-lexgen) 110 # (craneLib.fileset.commonCargoSources crate) 111 # ]; 112 # }; 113 114 # Build the actual crate itself, reusing the dependency 115 # artifacts from above. 116 # COMMENTED OUT FOR DEV 117 # consumer = craneLib.buildPackage ( 118 # individualCrateArgs 119 # // { 120 # pname = "consumer"; 121 # cargoExtraArgs = "-p consumer"; 122 # src = fileSetForCrate ./consumer; 123 # postInstall = '' 124 # mkdir -p $out/{bin,lib/consumer} 125 # ''; 126 # } 127 # ); 128 # dataloader = craneLib.buildPackage ( 129 # individualCrateArgs 130 # // { 131 # pname = "dataloader"; 132 # cargoExtraArgs = "-p dataloader"; 133 # src = fileSetForCrate ./dataloader-rs; 134 # } 135 # ); 136 # did-resolver = craneLib.buildPackage ( 137 # individualCrateArgs 138 # // { 139 # pname = "did-resolver"; 140 # cargoExtraArgs = "-p did-resolver"; 141 # src = fileSetForCrate ./did-resolver; 142 # } 143 # ); 144 # lexica = craneLib.buildPackage ( 145 # individualCrateArgs 146 # // { 147 # pname = "lexica"; 148 # cargoExtraArgs = "-p lexica"; 149 # src = fileSetForCrate ./lexica; 150 # } 151 # ); 152 # parakeet = craneLib.buildPackage ( 153 # individualCrateArgs 154 # // { 155 # pname = "parakeet"; 156 # cargoExtraArgs = "-p parakeet"; 157 # src = fileSetForCrate ./parakeet; 158 # } 159 # ); 160 # parakeet-db = craneLib.buildPackage ( 161 # individualCrateArgs 162 # // { 163 # pname = "parakeet-db"; 164 # cargoExtraArgs = "-p parakeet-db"; 165 # src = fileSetForCrate ./parakeet-db; 166 # } 167 # ); 168 # parakeet-lexgen = craneLib.buildPackage ( 169 # individualCrateArgs 170 # // { 171 # pname = "parakeet-lexgen"; 172 # cargoExtraArgs = "-p parakeet-lexgen"; 173 # src = fileSetForCrate ./parakeet-lexgen; 174 # } 175 # ); 176 177 # Dummy derivations to satisfy references 178 consumer = pkgs.writeTextDir "bin/consumer" "echo 'Consumer not built in dev mode'"; 179 dataloader = pkgs.writeTextDir "bin/dataloader" "echo 'Dataloader not built in dev mode'"; 180 did-resolver = pkgs.writeTextDir "bin/did-resolver" "echo 'DID resolver not built in dev mode'"; 181 lexica = pkgs.writeTextDir "bin/lexica" "echo 'Lexica not built in dev mode'"; 182 parakeet = pkgs.writeTextDir "bin/parakeet" "echo 'Parakeet not built in dev mode'"; 183 parakeet-db = pkgs.writeTextDir "bin/parakeet-db" "echo 'Parakeet-db not built in dev mode'"; 184 parakeet-lexgen = pkgs.writeTextDir "bin/parakeet-lexgen" "echo 'Parakeet-lexgen not built in dev mode'"; 185 in 186 { 187 checks = { 188 # Build the crate as part of `nix flake check` for convenience 189 inherit 190 consumer 191 dataloader 192 did-resolver 193 lexica 194 parakeet 195 parakeet-db 196 parakeet-lexgen 197 ; 198 }; 199 200 packages = { 201 default = parakeet; 202 inherit 203 consumer 204 dataloader 205 did-resolver 206 lexica 207 parakeet 208 parakeet-db 209 parakeet-lexgen 210 ; 211 }; 212 213 devShells.default = craneLib.devShell { 214 # Inherit inputs from checks. 215 checks = self.checks.${system}; 216 217 # Manually provision devshell 218 nativeBuildInputs = with pkgs; [ 219 pkg-config 220 ]; 221 buildInputs = [ 222 pkgs.openssl 223 pkgs.postgresql 224 pkgs.libpq 225 pkgs.clang 226 pkgs.libclang 227 pkgs.lld 228 pkgs.protobuf 229 ]; 230 CLANG_PATH = "${pkgs.llvmPackages_18.clang}/bin/clang"; 231 PROTOC_INCLUDE = "${pkgs.protobuf}/include"; 232 PROTOC = "${pkgs.protobuf}/bin/protoc"; 233 234 # Additional dev-shell environment variables can be set directly 235 RUST_BACKTRACE = 0; 236 NIXOS_OZONE_WL = 1; 237 LIBCLANG_PATH = "${pkgs.llvmPackages.libclang.lib}/lib"; 238 239 # Extra inputs can be added here; cargo and rustc are provided by default. 240 packages = with pkgs; [ 241 openssl 242 bacon 243 postgresql 244 rust-analyzer 245 rustfmt 246 clippy 247 git 248 nixd 249 direnv 250 libpq 251 clang 252 libclang 253 diesel-cli 254 tokei 255 ast-grep 256 scc 257 openssl 258 postgresql 259 libpq 260 clang 261 libclang 262 lld 263 protobuf 264 pkg-config 265 ]; 266 }; 267 } 268 ) 269 // flake-utils.lib.eachDefaultSystemPassThrough (system: { 270 nixosModules = { 271 default = 272 { 273 pkgs, 274 lib, 275 config, 276 ... 277 }: 278 with lib; 279 let 280 cfg = config.services.parakeet; 281 282 inherit (lib) 283 mkEnableOption 284 mkIf 285 mkOption 286 types 287 ; 288 in 289 { 290 options.services.parakeet = { 291 enable = mkEnableOption "parakeet"; 292 293 package = mkOption { 294 type = types.package; 295 default = self.packages.${pkgs.system}.default; 296 description = "The path to the parakeet package."; 297 }; 298 299 workingDirectory = mkOption { 300 type = types.str; 301 default = "/var/lib/parakeet"; 302 description = "Working directory for parakeet services (where Config.toml is located)"; 303 }; 304 305 environmentFiles = mkOption { 306 type = types.listOf types.path; 307 default = [ "/var/lib/parakeet/config.env" ]; 308 description = '' 309 File to load environment variables from. Loaded variables override 310 values set in {option}`environment`. 311 ''; 312 }; 313 }; 314 config = mkIf cfg.enable { 315 # environment.systemPackages = [ 316 # self.packages.${pkgs.system}.consumer 317 # ]; 318 # systemd.services.consumer = { 319 # description = "consumer"; 320 # after = [ 321 # "network-online.target" 322 # "parakeet.service" 323 # ]; 324 # wants = [ "network-online.target" ]; 325 # requires = [ "parakeet.service" ]; 326 # wantedBy = [ "multi-user.target" ]; 327 # serviceConfig = { 328 # ExecStart = "${self.packages.${pkgs.system}.consumer}/bin/consumer --backfill --indexer"; 329 # Type = "exec"; 330 # WorkingDirectory = cfg.workingDirectory; 331 332 # EnvironmentFile = cfg.environmentFiles; 333 # User = "parakeet"; 334 # Group = "parakeet"; 335 # StateDirectory = "parakeet"; 336 # StateDirectoryMode = "0755"; 337 # Restart = "always"; 338 339 # # Hardening 340 # RemoveIPC = true; 341 # CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ]; 342 # NoNewPrivileges = true; 343 # PrivateDevices = true; 344 # ProtectClock = true; 345 # ProtectKernelLogs = true; 346 # ProtectControlGroups = true; 347 # ProtectKernelModules = true; 348 # PrivateMounts = true; 349 # SystemCallArchitectures = [ "native" ]; 350 # MemoryDenyWriteExecute = false; # required by V8 JIT 351 # RestrictNamespaces = true; 352 # RestrictSUIDSGID = true; 353 # ProtectHostname = true; 354 # LockPersonality = true; 355 # ProtectKernelTunables = true; 356 # RestrictAddressFamilies = [ 357 # "AF_UNIX" 358 # "AF_INET" 359 # "AF_INET6" 360 # ]; 361 # RestrictRealtime = true; 362 # DeviceAllow = [ "" ]; 363 # ProtectSystem = "full"; 364 # ProtectProc = "invisible"; 365 # ProcSubset = "pid"; 366 # ProtectHome = true; 367 # PrivateUsers = true; 368 # PrivateTmp = true; 369 # UMask = "0077"; 370 # }; 371 # }; 372 # systemd.services.parakeet = { 373 # description = "parakeet"; 374 # after = [ 375 # "network-online.target" 376 # "postgresql.service" 377 # ]; 378 # wants = [ "network-online.target" ]; 379 # requires = [ "postgresql.service" ]; 380 # wantedBy = [ "multi-user.target" ]; 381 # serviceConfig = { 382 # ExecStart = "${cfg.package}/bin/parakeet"; 383 # Type = "exec"; 384 # WorkingDirectory = cfg.workingDirectory; 385 386 # EnvironmentFile = cfg.environmentFiles; 387 # User = "parakeet"; 388 # Group = "parakeet"; 389 # StateDirectory = "parakeet"; 390 # StateDirectoryMode = "0755"; 391 # Restart = "always"; 392 393 # # Hardening 394 # RemoveIPC = true; 395 # CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ]; 396 # NoNewPrivileges = true; 397 # PrivateDevices = true; 398 # ProtectClock = true; 399 # ProtectKernelLogs = true; 400 # ProtectControlGroups = true; 401 # ProtectKernelModules = true; 402 # PrivateMounts = true; 403 # SystemCallArchitectures = [ "native" ]; 404 # MemoryDenyWriteExecute = false; # required by V8 JIT 405 # RestrictNamespaces = true; 406 # RestrictSUIDSGID = true; 407 # ProtectHostname = true; 408 # LockPersonality = true; 409 # ProtectKernelTunables = true; 410 # RestrictAddressFamilies = [ 411 # "AF_UNIX" 412 # "AF_INET" 413 # "AF_INET6" 414 # ]; 415 # RestrictRealtime = true; 416 # DeviceAllow = [ "" ]; 417 # ProtectSystem = "full"; 418 # ProtectProc = "invisible"; 419 # ProcSubset = "pid"; 420 # ProtectHome = true; 421 # PrivateUsers = true; 422 # PrivateTmp = true; 423 # UMask = "0077"; 424 # }; 425 # }; 426 users = { 427 users.parakeet = { 428 group = "parakeet"; 429 isSystemUser = true; 430 }; 431 groups.parakeet = { }; 432 }; 433 services.postgresql = { 434 enable = true; 435 ensureUsers = [ 436 { 437 name = "parakeet"; 438 ensureDBOwnership = true; 439 } 440 ]; 441 ensureDatabases = [ "parakeet" ]; 442 authentication = pkgs.lib.mkOverride 10 '' 443 #type database DBuser auth-method 444 local all all trust 445 host all all 127.0.0.1/32 trust 446 host all all ::1/128 trust 447 ''; 448 package = mkForce pkgs.postgresql_16; 449 extraPlugins = with pkgs.postgresql16Packages; [ 450 # Note: pg_stat_statements is built into PostgreSQL (contrib module) 451 # and doesn't need to be listed here - just enable via CREATE EXTENSION 452 pgvector # Vector similarity search (future-proofing) 453 timescaledb # Time-series optimization 454 pgrouting # Graph analysis for social graph 455 postgis # Required dependency for pgrouting 456 ]; 457 settings = { 458 # Preload extensions that require early initialization 459 # pg_stat_statements is a contrib module (built-in, not extraPlugins) 460 shared_preload_libraries = "pg_stat_statements,timescaledb"; 461 462 # pg_stat_statements configuration 463 "pg_stat_statements.max" = "10000"; # Track up to 10k unique queries 464 "pg_stat_statements.track" = "all"; # Track all queries (top-level + nested) 465 }; 466 }; 467 }; 468 }; 469 }; 470 }); 471}