comparing a7a7e03d07ced976b6ccba23cacac5e9223b6dad and social-lexicons on teal.fm/teal

+51

.dockerignore

··· 1 + # Rust build artifacts 2 + target/ 3 + **/target/ 4 + services/target/ 5 + apps/*/target/ 6 + 7 + # Node.js dependencies and build artifacts 8 + node_modules/ 9 + **/node_modules/ 10 + .turbo/ 11 + **/.turbo/ 12 + build/ 13 + dist/ 14 + .next/ 15 + 16 + # Development and cache files 17 + .gitignore 18 + **/.DS_Store 19 + *.log 20 + *.tmp 21 + *.temp 22 + 23 + # IDE and editor files 24 + .vscode/ 25 + .idea/ 26 + *.swp 27 + *.swo 28 + *~ 29 + 30 + # Environment and config files 31 + .env 32 + .env.local 33 + .env.*.local 34 + 35 + # Database files 36 + *.db 37 + *.sqlite 38 + *.sqlite3 39 + 40 + # Test coverage 41 + coverage/ 42 + **/coverage/ 43 + 44 + # Temporary files 45 + tmp/ 46 + temp/ 47 + 48 + # SQLx offline query cache 49 + # Include workspace-level cache for monorepo builds 50 + # Uncomment the line below if you want to force online compilation 51 + # .sqlx/

+22

.env.development

··· 1 + # Test Database Environment Configuration 2 + # This file provides database credentials for testing discriminant improvements 3 + 4 + # Database Configuration 5 + DB_USER=postgres 6 + DB_PASSWORD=testpass123 7 + DB_NAME=teal_test 8 + 9 + # Docker Database URL (used by services in compose) 10 + DOCKER_DB_URL=postgres://postgres:testpass123@postgres:5432/teal_test 11 + 12 + # Local Database URL (used by migration tools and local testing) 13 + DATABASE_URL=postgres://postgres:testpass123@localhost:5433/teal_test 14 + 15 + # Redis Configuration (if needed) 16 + REDIS_URL=redis://garnet:6379 17 + 18 + # AT Protocol Configuration (placeholder for testing) 19 + AT_PROTOCOL_JWT_SECRET=test-jwt-secret-for-development-only 20 + 21 + # Client Configuration 22 + CLIENT_ADDRESS=localhost

+10 -7

.env.template

··· 2 2 NODE_ENV=development 3 3 PORT=3000 4 4 HOST=0.0.0.0 5 - PUBLIC_URL=A publicly accessible url for aqua 5 + PUBLIC_URL= # A publicly accessible url for aqua 6 6 DB_USER=postgres 7 7 DB_PASSWORD=supersecurepassword123987 8 8 DB_NAME=teal 9 9 DATABASE_URL="postgresql://${DB_USER}:${DB_PASSWORD}@localhost:5432/${DB_NAME}" 10 10 DOCKER_DB_URL="postgresql://${DB_USER}:${DB_PASSWORD}@host.docker.internal:5432/${DB_NAME}" 11 - #This is not currently being used fully so can just use this default pubkey for now 11 + # `cargo run --bin teal gen-key` to generate a new pubkey 12 12 DID_WEB_PUBKEY=zQ3sheEnMKhEK87PSu4P2mjAevViqHcjKmgxBWsDQPjLRM9wP 13 - CLIENT_ADDRESS=A publicly accessible host for amethyst like amethyst.teal.fm 14 - PUBLIC_DID_WEB=did:web:{aqua's PUBLIC_URL goes here after did:web:} 13 + CLIENT_ADDRESS= # A publicly accessible host for amethyst like amethyst.teal.fm 14 + PUBLIC_DID_WEB= # did:web:{aqua's PUBLIC_URL goes here after did:web:} 15 + 16 + # amethyst 17 + EXPO_PUBLIC_DID_WEB= # same as PUBLIC_DID_WEB 18 + EXPO_PUBLIC_BASE_URL= # same as CLIENT_ADDRESS but with http scheme like https://amethyst.teal.fm 15 19 16 - #amethyst 17 - EXPO_PUBLIC_DID_WEB=same as PUBLIC_DID_WEB 18 - EXPO_PUBLIC_BASE_URL=same as CLIENT_ADDRESS but with http scheme like https://amethyst.teal.fm 20 + SQLX_OFFLINE=true 21 + SQLX_OFFLINE_DIR="./.sqlx"

+201

.github/WORKFLOWS.md

··· 1 + # GitHub Actions Workflows Documentation 2 + 3 + This document describes the CI/CD workflows configured for the Teal project. 4 + 5 + ## Overview 6 + 7 + The project uses GitHub Actions for continuous integration, deployment, and security scanning. The workflows are designed to handle a polyglot codebase with Rust services, Node.js packages, and a React Native application. 8 + 9 + ## Workflows 10 + 11 + ### 🔧 CI (`ci.yml`) 12 + 13 + **Triggers:** Push/PR to `main` or `develop` branches 14 + 15 + **Purpose:** Primary continuous integration workflow that runs tests, linting, and type checking. 16 + 17 + **Jobs:** 18 + - **rust-check**: Formats, lints (clippy), and tests all Rust code in both `services/` and `apps/` 19 + - **node-check**: Type checking, linting, building, and testing Node.js packages 20 + - **lexicon-check**: Validates lexicon files and ensures generated code is up to date 21 + 22 + **Key Features:** 23 + - Caches Rust and Node.js dependencies for faster builds 24 + - Runs in parallel for optimal performance 25 + - Fails fast if any check fails 26 + 27 + ### 🚀 Aqua (`aqua.yml`) 28 + 29 + **Triggers:** Push/PR to `main` with changes to `apps/aqua/**` 30 + 31 + **Purpose:** Builds and pushes the Aqua Rust application Docker image. 32 + 33 + **Features:** 34 + - Multi-platform builds (linux/amd64, linux/arm64) 35 + - Pushes to GitHub Container Registry (ghcr.io) 36 + - Only pushes on main branch (not PRs) 37 + - Uses GitHub Actions cache for Docker layers 38 + 39 + ### 🤖 Cadet (`cadet.yml`) 40 + 41 + **Triggers:** Push/PR to `main` with changes to `services/cadet/**` 42 + 43 + **Purpose:** Builds and pushes the Cadet Rust service Docker image. 44 + 45 + **Features:** 46 + - Multi-platform builds (linux/amd64, linux/arm64) 47 + - Pushes to GitHub Container Registry (ghcr.io) 48 + - Only pushes on main branch (not PRs) 49 + - Uses GitHub Actions cache for Docker layers 50 + 51 + ### 🔮 Amethyst (`amethyst.yml`) 52 + 53 + **Triggers:** Push/PR to `main` with changes to `apps/amethyst/**` 54 + 55 + **Purpose:** Builds the React Native/Expo application for different platforms. 56 + 57 + **Jobs:** 58 + - **build-web**: Builds web version and uploads artifacts 59 + - **build-ios**: Builds iOS version (only on main branch pushes, requires macOS runner) 60 + - **lint-and-test**: Type checking and testing 61 + 62 + **Features:** 63 + - Generates lexicons before building 64 + - Platform-specific builds 65 + - Artifact uploads for build assets 66 + 67 + ### 🛠️ Services (`services.yml`) 68 + 69 + **Triggers:** Push/PR to `main` with changes to `services/**` 70 + 71 + **Purpose:** Dynamically detects and builds all services with Dockerfiles. 72 + 73 + **Jobs:** 74 + - **detect-services**: Scans for services with Dockerfiles 75 + - **build-service**: Matrix build for each detected service 76 + - **test-services**: Runs tests for all services 77 + 78 + **Features:** 79 + - Dynamic service detection 80 + - Skips special directories (target, migrations, types, .sqlx) 81 + - Per-service Docker caching 82 + - Multi-platform builds 83 + 84 + ### 🎉 Release (`release.yml`) 85 + 86 + **Triggers:** 87 + - Push to tags matching `v*` 88 + - Manual workflow dispatch 89 + 90 + **Purpose:** Creates GitHub releases and builds production Docker images. 91 + 92 + **Jobs:** 93 + - **create-release**: Creates GitHub release with changelog 94 + - **build-and-release-aqua**: Builds and tags Aqua for release 95 + - **build-and-release-cadet**: Builds and tags Cadet for release 96 + - **release-other-services**: Builds other services (rocketman, satellite) 97 + - **build-and-release-amethyst**: Builds Amethyst and uploads to release 98 + 99 + **Features:** 100 + - Automatic changelog extraction 101 + - Production Docker tags (latest + version) 102 + - Release artifact uploads 103 + - Support for pre-releases (tags with `-`) 104 + 105 + ### 🔒 Security (`security.yml`) 106 + 107 + **Triggers:** 108 + - Push/PR to `main` or `develop` 109 + - Daily at 2 AM UTC 110 + - Manual dispatch 111 + 112 + **Purpose:** Comprehensive security scanning and vulnerability detection. 113 + 114 + **Jobs:** 115 + - **rust-security-audit**: Uses `cargo audit` for Rust dependencies 116 + - **node-security-audit**: Uses `pnpm audit` for Node.js dependencies 117 + - **codeql-analysis**: GitHub's semantic code analysis 118 + - **docker-security-scan**: Trivy vulnerability scanning for Docker images 119 + - **secrets-scan**: TruffleHog for secrets detection 120 + 121 + **Features:** 122 + - Fails on high/critical vulnerabilities 123 + - SARIF upload for security tab integration 124 + - Historical scanning with git history 125 + 126 + ## Configuration Files 127 + 128 + ### Dependabot (`dependabot.yml`) 129 + 130 + Automated dependency updates for: 131 + - **npm**: Weekly updates for Node.js dependencies 132 + - **cargo**: Weekly updates for Rust dependencies (services + apps) 133 + - **github-actions**: Weekly updates for workflow actions 134 + - **docker**: Weekly updates for Docker base images 135 + 136 + **Schedule:** Monday-Tuesday mornings, staggered to avoid conflicts 137 + 138 + ## Container Registry 139 + 140 + All Docker images are pushed to GitHub Container Registry: 141 + - `ghcr.io/[owner]/[repo]/aqua` 142 + - `ghcr.io/[owner]/[repo]/cadet` 143 + - `ghcr.io/[owner]/[repo]/[service-name]` 144 + 145 + **Tags:** 146 + - `latest`: Latest build from main branch 147 + - `sha-[commit]`: Specific commit builds 148 + - `v[version]`: Release builds 149 + - `pr-[number]`: Pull request builds (for testing) 150 + 151 + ## Secrets and Permissions 152 + 153 + **Required secrets:** 154 + - `GITHUB_TOKEN`: Automatically provided (for registry access and releases) 155 + 156 + **Permissions used:** 157 + - `contents: read`: Read repository contents 158 + - `packages: write`: Push to GitHub Container Registry 159 + - `security-events: write`: Upload security scan results 160 + - `actions: read`: Access workflow information 161 + 162 + ## Best Practices 163 + 164 + 1. **Path-based triggers**: Workflows only run when relevant files change 165 + 2. **Caching**: Aggressive caching for Rust, Node.js, and Docker layers 166 + 3. **Multi-platform**: Docker images built for amd64 and arm64 167 + 4. **Security-first**: Regular vulnerability scanning and secrets detection 168 + 5. **Fail-fast**: Early termination on critical issues 169 + 6. **Artifact preservation**: Build outputs stored for debugging/deployment 170 + 171 + ## Usage Examples 172 + 173 + ### Manual Release 174 + ```bash 175 + # Tag and push for automatic release 176 + git tag v1.0.0 177 + git push origin v1.0.0 178 + 179 + # Or use workflow dispatch in GitHub UI 180 + ``` 181 + 182 + ### Local Development 183 + ```bash 184 + # Run the same checks locally 185 + pnpm rust:fmt 186 + pnpm rust:clippy 187 + pnpm typecheck 188 + pnpm test 189 + ``` 190 + 191 + ### Debugging Failed Builds 192 + 1. Check the Actions tab for detailed logs 193 + 2. Download artifacts from successful builds 194 + 3. Use the same commands locally with cached dependencies 195 + 196 + ## Maintenance 197 + 198 + - **Weekly**: Review Dependabot PRs 199 + - **Monthly**: Update action versions if not auto-updated 200 + - **Quarterly**: Review and update security scanning tools 201 + - **As needed**: Add new services to release workflow matrix

+77

.github/actions/setup/action.yml

··· 1 + name: "Setup Teal Environment" 2 + description: "Sets up the common environment for Teal builds including Node.js, Rust, pnpm, and lexicons" 3 + 4 + inputs: 5 + setup-rust: 6 + description: "Whether to setup Rust toolchain" 7 + required: false 8 + default: "false" 9 + rust-components: 10 + description: 'Rust components to install (e.g., "rustfmt,clippy")' 11 + required: false 12 + default: "rustfmt,clippy" 13 + setup-node: 14 + description: "Whether to setup Node.js and pnpm" 15 + required: false 16 + default: "true" 17 + node-version: 18 + description: "Node.js version to use" 19 + required: false 20 + default: "20" 21 + lexicons-only-rust: 22 + description: "Generate only Rust lexicons" 23 + required: false 24 + default: "false" 25 + cache-key-suffix: 26 + description: "Additional suffix for cache keys" 27 + required: false 28 + default: "" 29 + 30 + runs: 31 + using: "composite" 32 + steps: 33 + - name: Setup lexicons 34 + shell: bash 35 + run: ./scripts/setup-lexicons.sh 36 + 37 + - name: Install pnpm 38 + if: inputs.setup-node == 'true' 39 + uses: pnpm/action-setup@v4 40 + 41 + - name: Setup Node.js 42 + if: inputs.setup-node == 'true' 43 + uses: actions/setup-node@v4 44 + with: 45 + node-version: ${{ inputs.node-version }} 46 + cache: "pnpm" 47 + 48 + - name: Install Node dependencies 49 + if: inputs.setup-node == 'true' 50 + shell: bash 51 + run: pnpm install --frozen-lockfile 52 + 53 + - name: Generate lexicons 54 + if: inputs.setup-node == 'true' 55 + shell: bash 56 + run: | 57 + cd tools/lexicon-cli && pnpm i && pnpm build && cd .. 58 + if [ "${{ inputs.lexicons-only-rust }}" = "true" ]; then 59 + pnpm lex:gen --rust-only 60 + else 61 + pnpm lex:gen 62 + fi 63 + 64 + - name: Install Rust toolchain 65 + if: inputs.setup-rust == 'true' 66 + uses: dtolnay/rust-toolchain@stable 67 + with: 68 + components: ${{ inputs.rust-components }} 69 + 70 + - name: Cache Rust dependencies 71 + if: inputs.setup-rust == 'true' 72 + uses: Swatinem/rust-cache@v2 73 + with: 74 + workspaces: | 75 + services 76 + apps/aqua 77 + key: ${{ inputs.cache-key-suffix }}

+91

.github/workflows/amethyst.yml

··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: Build Amethyst 4 + 5 + on: 6 + push: 7 + branches: [main] 8 + paths: 9 + - "apps/amethyst/**" 10 + - "packages/**" 11 + - "lexicons/**" 12 + - "package.json" 13 + - "pnpm-lock.yaml" 14 + - ".github/workflows/amethyst.yml" 15 + pull_request: 16 + branches: [main] 17 + paths: 18 + - "apps/amethyst/**" 19 + - "packages/**" 20 + - "lexicons/**" 21 + - "package.json" 22 + - "pnpm-lock.yaml" 23 + - ".github/workflows/amethyst.yml" 24 + 25 + jobs: 26 + build: 27 + name: Build Amethyst 28 + runs-on: ubuntu-latest 29 + outputs: 30 + build-cache-key: ${{ steps.cache-key.outputs.key }} 31 + steps: 32 + - name: Checkout repository 33 + uses: actions/checkout@v4 34 + 35 + - name: Setup environment 36 + uses: ./.github/actions/setup 37 + with: 38 + setup-node: "true" 39 + 40 + - name: Generate cache key 41 + id: cache-key 42 + run: echo "key=amethyst-build-${{ hashFiles('apps/amethyst/**', 'packages/**', 'lexicons/**') }}" >> $GITHUB_OUTPUT 43 + 44 + - name: Build lex tool 45 + run: cd tools/lexicon-cli && pnpm i && pnpm build 46 + 47 + - name: Build web 48 + run: pnpm turbo build:web --filter=@teal/amethyst 49 + 50 + - name: Type check 51 + run: pnpm turbo check-types --filter=@teal/amethyst 52 + 53 + - name: Run tests 54 + run: pnpm turbo test --filter=@teal/amethyst 55 + 56 + - name: Upload web build artifacts 57 + uses: actions/upload-artifact@v4 58 + with: 59 + name: amethyst-web-build 60 + path: apps/amethyst/build/ 61 + retention-days: 7 62 + 63 + build-ios: 64 + name: Build iOS 65 + runs-on: macos-latest 66 + needs: build 67 + if: github.event_name == 'push' && github.ref == 'refs/heads/main' 68 + steps: 69 + - name: Checkout repository 70 + uses: actions/checkout@v4 71 + 72 + - name: Setup environment 73 + uses: ./.github/actions/setup 74 + with: 75 + setup-node: "true" 76 + 77 + - name: Build lex tool 78 + run: cd tools/lexicon-cli && pnpm i && pnpm build 79 + 80 + - name: Setup Expo CLI 81 + run: npm install -g @expo/cli 82 + 83 + - name: Build iOS 84 + run: pnpm turbo build:ios --filter=@teal/amethyst 85 + 86 + - name: Upload iOS build artifacts 87 + uses: actions/upload-artifact@v4 88 + with: 89 + name: amethyst-ios-build 90 + path: apps/amethyst/build/ 91 + retention-days: 7

+70

.github/workflows/aqua.yml

··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: Build and Push Aqua 4 + 5 + on: 6 + push: 7 + branches: 8 + - main 9 + paths: 10 + - "apps/aqua/**" 11 + - "Cargo.toml" 12 + - "Cargo.lock" 13 + - ".github/workflows/aqua.yml" 14 + pull_request: 15 + branches: 16 + - main 17 + paths: 18 + - "apps/aqua/**" 19 + - "Cargo.toml" 20 + - "Cargo.lock" 21 + - ".github/workflows/aqua.yml" 22 + 23 + env: 24 + REGISTRY: ghcr.io 25 + IMAGE_NAME: ${{ github.repository }}/aqua 26 + 27 + jobs: 28 + build-and-push: 29 + runs-on: ubuntu-latest 30 + permissions: 31 + contents: read 32 + packages: write 33 + 34 + steps: 35 + - name: Checkout repository 36 + uses: actions/checkout@v4 37 + 38 + - name: Log in to Container Registry 39 + if: github.event_name != 'pull_request' 40 + uses: docker/login-action@v3 41 + with: 42 + registry: ${{ env.REGISTRY }} 43 + username: ${{ github.actor }} 44 + password: ${{ secrets.GITHUB_TOKEN }} 45 + 46 + - name: Extract metadata 47 + id: meta 48 + uses: docker/metadata-action@v5 49 + with: 50 + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 51 + tags: | 52 + type=ref,event=branch 53 + type=ref,event=pr 54 + type=sha,prefix=sha- 55 + type=raw,value=latest,enable={{is_default_branch}} 56 + 57 + - name: Set up Docker Buildx 58 + uses: docker/setup-buildx-action@v3 59 + 60 + - name: Build and push Docker image 61 + uses: docker/build-push-action@v5 62 + with: 63 + context: . 64 + file: ./apps/aqua/Dockerfile 65 + push: ${{ github.event_name != 'pull_request' }} 66 + tags: ${{ steps.meta.outputs.tags }} 67 + labels: ${{ steps.meta.outputs.labels }} 68 + platforms: linux/amd64,linux/arm64 69 + cache-from: type=gha 70 + cache-to: type=gha,mode=max

+68

.github/workflows/cadet.yml

··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: Build and Push Cadet 4 + 5 + on: 6 + push: 7 + branches: [main] 8 + paths: 9 + - "services/cadet/**" 10 + - "Cargo.toml" 11 + - "Cargo.lock" 12 + - ".github/workflows/cadet.yml" 13 + pull_request: 14 + branches: [main] 15 + paths: 16 + - "services/cadet/**" 17 + - "Cargo.toml" 18 + - "Cargo.lock" 19 + - ".github/workflows/cadet.yml" 20 + 21 + env: 22 + REGISTRY: ghcr.io 23 + IMAGE_NAME: ${{ github.repository }}/cadet 24 + 25 + jobs: 26 + build-and-push: 27 + runs-on: ubuntu-latest 28 + permissions: 29 + contents: read 30 + packages: write 31 + 32 + steps: 33 + - name: Checkout repository 34 + uses: actions/checkout@v4 35 + 36 + - name: Log in to Container Registry 37 + if: github.event_name != 'pull_request' 38 + uses: docker/login-action@v3 39 + with: 40 + registry: ${{ env.REGISTRY }} 41 + username: ${{ github.actor }} 42 + password: ${{ secrets.GITHUB_TOKEN }} 43 + 44 + - name: Extract metadata 45 + id: meta 46 + uses: docker/metadata-action@v5 47 + with: 48 + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 49 + tags: | 50 + type=ref,event=branch 51 + type=ref,event=pr 52 + type=sha,prefix=sha- 53 + type=raw,value=latest,enable={{is_default_branch}} 54 + 55 + - name: Set up Docker Buildx 56 + uses: docker/setup-buildx-action@v3 57 + 58 + - name: Build and push Docker image 59 + uses: docker/build-push-action@v5 60 + with: 61 + context: . 62 + file: ./services/cadet/Dockerfile 63 + push: ${{ github.event_name != 'pull_request' }} 64 + tags: ${{ steps.meta.outputs.tags }} 65 + labels: ${{ steps.meta.outputs.labels }} 66 + platforms: linux/amd64,linux/arm64 67 + cache-from: type=gha 68 + cache-to: type=gha,mode=max

+217

.github/workflows/ci.yml

··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: CI 4 + 5 + on: 6 + push: 7 + branches: [main, develop] 8 + pull_request: 9 + branches: [main, develop] 10 + 11 + env: 12 + CARGO_TERM_COLOR: always 13 + SQLX_OFFLINE: true 14 + SQLX_OFFLINE_DIR: "./.sqlx" 15 + 16 + jobs: 17 + setup-and-build: 18 + name: Setup and Build All 19 + runs-on: ubuntu-latest 20 + outputs: 21 + rust-cache-key: ${{ steps.rust-cache.outputs.cache-hit }} 22 + node-cache-key: ${{ steps.node-cache.outputs.cache-hit }} 23 + steps: 24 + - name: Checkout repository 25 + uses: actions/checkout@v4 26 + 27 + - name: Setup environment 28 + uses: ./.github/actions/setup 29 + with: 30 + setup-rust: "true" 31 + setup-node: "true" 32 + cache-key-suffix: "ci-build" 33 + 34 + - name: Setup SQLx offline files 35 + run: ./scripts/setup-sqlx-offline.sh 36 + 37 + - name: Build Node packages 38 + run: pnpm build 39 + 40 + - name: Build Rust services (x86_64) 41 + run: | 42 + cargo build --release --all-features 43 + 44 + - name: Build Rust apps (x86_64) 45 + run: | 46 + cd apps/aqua 47 + cargo build --release --all-features 48 + 49 + - name: Collect executables (x86_64) 50 + run: | 51 + mkdir -p artifacts/x86_64 52 + # Copy service executables 53 + if [ -d "services/target/release" ]; then 54 + find services/target/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/x86_64/ \; 55 + fi 56 + # Copy app executables 57 + if [ -d "apps/aqua/target/release" ]; then 58 + find apps/aqua/target/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/x86_64/ \; 59 + fi 60 + echo "x86_64 executables:" 61 + ls -la artifacts/x86_64/ || echo "No executables found" 62 + 63 + - name: Upload Node build artifacts 64 + uses: actions/upload-artifact@v4 65 + with: 66 + name: node-builds 67 + path: | 68 + packages/*/dist/ 69 + apps/amethyst/build/ 70 + retention-days: 1 71 + 72 + - name: Upload Rust build artifacts 73 + uses: actions/upload-artifact@v4 74 + with: 75 + name: rust-builds-x86_64 76 + path: | 77 + artifacts/x86_64/ 78 + retention-days: 1 79 + 80 + rust-cross-compile: 81 + name: Cross-compile Rust 82 + runs-on: ubuntu-latest 83 + needs: setup-and-build 84 + strategy: 85 + matrix: 86 + target: [aarch64-unknown-linux-gnu] 87 + steps: 88 + - name: Checkout repository 89 + uses: actions/checkout@v4 90 + 91 + - name: Setup environment 92 + uses: ./.github/actions/setup 93 + with: 94 + setup-rust: "true" 95 + setup-node: "true" 96 + lexicons-only-rust: "true" 97 + cache-key-suffix: "cross-${{ matrix.target }}" 98 + 99 + - name: Setup SQLx offline files 100 + run: ./scripts/setup-sqlx-offline.sh 101 + 102 + - name: Install cross-compilation tools 103 + run: | 104 + cargo install cross --git https://github.com/cross-rs/cross 105 + rustup target add ${{ matrix.target }} 106 + # Set up environment for cross-compilation 107 + echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV 108 + echo "CROSS_NO_WARNINGS=0" >> $GITHUB_ENV 109 + 110 + - name: Cross-compile services 111 + run: | 112 + cross build --release --all-features --target ${{ matrix.target }} 113 + 114 + - name: Collect cross-compiled executables 115 + run: | 116 + mkdir -p artifacts/${{ matrix.target }} 117 + # Copy service executables 118 + if [ -d "services/target/${{ matrix.target }}/release" ]; then 119 + find services/target/${{ matrix.target }}/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/${{ matrix.target }}/ \; 120 + fi 121 + # Copy app executables 122 + if [ -d "apps/aqua/target/${{ matrix.target }}/release" ]; then 123 + find apps/aqua/target/${{ matrix.target }}/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/${{ matrix.target }}/ \; 124 + fi 125 + echo "Cross-compiled executables for ${{ matrix.target }}:" 126 + ls -la artifacts/${{ matrix.target }}/ || echo "No executables found" 127 + 128 + - name: Upload cross-compiled artifacts 129 + uses: actions/upload-artifact@v4 130 + with: 131 + name: rust-builds-${{ matrix.target }} 132 + path: | 133 + artifacts/${{ matrix.target }}/ 134 + retention-days: 1 135 + 136 + # disabled b/c it's triggered on autogenerated content 137 + # and can't find a way around it rn 138 + 139 + # rust-quality: 140 + # name: Rust Quality Checks 141 + # runs-on: ubuntu-latest 142 + # needs: setup-and-build 143 + # steps: 144 + # - name: Checkout repository 145 + # uses: actions/checkout@v4 146 + 147 + # - name: Setup environment 148 + # uses: ./.github/actions/setup 149 + # with: 150 + # setup-rust: "true" 151 + # setup-node: "true" 152 + # lexicons-only-rust: "true" 153 + # cache-key-suffix: "ci-build" 154 + 155 + # - name: Setup SQLx offline files 156 + # run: ./scripts/setup-sqlx-offline.sh 157 + 158 + # # - name: Check Rust formatting 159 + # # run: | 160 + # # cargo fmt --all -- --check 161 + 162 + # - name: Run Clippy 163 + # run: | 164 + # cargo clippy --all-targets --all-features --workspace --exclude types -- -D warnings 165 + 166 + # - name: Run Rust tests 167 + # run: | 168 + # cargo test --all-features 169 + 170 + # node-quality: 171 + # name: Node.js Quality Checks 172 + # runs-on: ubuntu-latest 173 + # needs: setup-and-build 174 + # steps: 175 + # - name: Checkout repository 176 + # uses: actions/checkout@v4 177 + 178 + # - name: Setup environment 179 + # uses: ./.github/actions/setup 180 + # with: 181 + # setup-node: "true" 182 + # cache-key-suffix: "ci-build" 183 + 184 + # - name: Download Node build artifacts 185 + # uses: actions/download-artifact@v4 186 + # with: 187 + # name: node-builds 188 + # path: . 189 + 190 + # # - name: Type check 191 + # # run: pnpm typecheck 192 + 193 + # - name: Lint and format check 194 + # run: pnpm fix --check 195 + 196 + # - name: Run tests 197 + # run: pnpm test 198 + 199 + lexicon-validation: 200 + name: Lexicon Validation 201 + runs-on: ubuntu-latest 202 + steps: 203 + - name: Checkout repository 204 + uses: actions/checkout@v4 205 + 206 + - name: Setup environment 207 + uses: ./.github/actions/setup 208 + with: 209 + setup-node: "true" 210 + 211 + - name: Validate lexicons 212 + run: pnpm lex:validate 213 + 214 + - name: Check lexicon generation consistency 215 + run: | 216 + pnpm lex:gen 217 + git diff --exit-code || (echo "Lexicon files are out of sync. Run 'pnpm lex:gen' locally." && exit 1)

+236

.github/workflows/release.yml

··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: Release 4 + 5 + on: 6 + push: 7 + tags: 8 + - "v*" 9 + workflow_dispatch: 10 + inputs: 11 + tag: 12 + description: "Release tag" 13 + required: true 14 + type: string 15 + 16 + env: 17 + REGISTRY: ghcr.io 18 + CARGO_TERM_COLOR: always 19 + SQLX_OFFLINE: true 20 + 21 + jobs: 22 + create-release: 23 + name: Create Release 24 + runs-on: ubuntu-latest 25 + outputs: 26 + release_id: ${{ steps.create_release.outputs.id }} 27 + upload_url: ${{ steps.create_release.outputs.upload_url }} 28 + tag: ${{ steps.tag.outputs.tag }} 29 + steps: 30 + - name: Checkout repository 31 + uses: actions/checkout@v4 32 + 33 + - name: Get tag name 34 + id: tag 35 + run: | 36 + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then 37 + echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT 38 + else 39 + echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT 40 + fi 41 + 42 + - name: Generate changelog 43 + id: changelog 44 + run: | 45 + if [ -f "CHANGELOG.md" ]; then 46 + # Extract changelog for this version 47 + awk '/^## \[${{ steps.tag.outputs.tag }}\]/{flag=1; next} /^## \[/{flag=0} flag' CHANGELOG.md > release_notes.md 48 + else 49 + echo "Release ${{ steps.tag.outputs.tag }}" > release_notes.md 50 + fi 51 + 52 + - name: Create Release 53 + id: create_release 54 + uses: actions/create-release@v1 55 + env: 56 + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 57 + with: 58 + tag_name: ${{ steps.tag.outputs.tag }} 59 + release_name: Release ${{ steps.tag.outputs.tag }} 60 + body_path: release_notes.md 61 + draft: false 62 + prerelease: ${{ contains(steps.tag.outputs.tag, '-') }} 63 + 64 + build-all: 65 + name: Build All Artifacts 66 + runs-on: ubuntu-latest 67 + needs: create-release 68 + outputs: 69 + rust-artifacts: ${{ steps.upload-rust.outputs.artifact-id }} 70 + node-artifacts: ${{ steps.upload-node.outputs.artifact-id }} 71 + steps: 72 + - name: Checkout repository 73 + uses: actions/checkout@v4 74 + 75 + - name: Setup environment 76 + uses: ./.github/actions/setup 77 + with: 78 + setup-rust: "true" 79 + setup-node: "true" 80 + cache-key-suffix: "release-${{ needs.create-release.outputs.tag }}" 81 + 82 + - name: Install cross-compilation tools 83 + run: | 84 + cargo install cross 85 + rustup target add aarch64-unknown-linux-gnu 86 + 87 + - name: Build Node.js artifacts 88 + run: | 89 + pnpm build 90 + cd apps/amethyst && pnpm build 91 + 92 + - name: Build Rust services (x86_64) 93 + run: | 94 + cd services 95 + cargo build --release --all-features 96 + 97 + - name: Build Rust services (aarch64) 98 + run: | 99 + cd services 100 + cross build --release --all-features --target aarch64-unknown-linux-gnu 101 + 102 + - name: Build Rust apps (x86_64) 103 + run: | 104 + cd apps/aqua 105 + cargo build --release --all-features 106 + 107 + - name: Build Rust apps (aarch64) 108 + run: | 109 + cd apps/aqua 110 + cross build --release --all-features --target aarch64-unknown-linux-gnu 111 + 112 + - name: Create Amethyst build archive 113 + run: | 114 + cd apps/amethyst 115 + tar -czf amethyst-${{ needs.create-release.outputs.tag }}.tar.gz build/ 116 + 117 + - name: Upload Rust build artifacts 118 + id: upload-rust 119 + uses: actions/upload-artifact@v4 120 + with: 121 + name: rust-release-builds 122 + path: | 123 + target/release/ 124 + target/aarch64-unknown-linux-gnu/release/ 125 + apps/aqua/target/release/ 126 + apps/aqua/target/aarch64-unknown-linux-gnu/release/ 127 + retention-days: 7 128 + 129 + - name: Upload Node build artifacts 130 + id: upload-node 131 + uses: actions/upload-artifact@v4 132 + with: 133 + name: node-release-builds 134 + path: | 135 + packages/*/dist/ 136 + apps/amethyst/build/ 137 + apps/amethyst/amethyst-${{ needs.create-release.outputs.tag }}.tar.gz 138 + retention-days: 7 139 + 140 + - name: Upload Amethyst build to release 141 + uses: actions/upload-release-asset@v1 142 + env: 143 + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 144 + with: 145 + upload_url: ${{ needs.create-release.outputs.upload_url }} 146 + asset_path: ./apps/amethyst/amethyst-${{ needs.create-release.outputs.tag }}.tar.gz 147 + asset_name: amethyst-${{ needs.create-release.outputs.tag }}.tar.gz 148 + asset_content_type: application/gzip 149 + 150 + release-services: 151 + name: Release Services 152 + runs-on: ubuntu-latest 153 + needs: [create-release, build-all] 154 + permissions: 155 + contents: read 156 + packages: write 157 + strategy: 158 + matrix: 159 + service: 160 + - name: aqua 161 + dockerfile: apps/aqua/Dockerfile 162 + context: . 163 + - name: cadet 164 + dockerfile: services/cadet/Dockerfile 165 + context: . 166 + - name: rocketman 167 + dockerfile: services/rocketman/Dockerfile 168 + context: . 169 + - name: satellite 170 + dockerfile: services/satellite/Dockerfile 171 + context: . 172 + steps: 173 + - name: Checkout repository 174 + uses: actions/checkout@v4 175 + 176 + - name: Check if service has Dockerfile 177 + id: check 178 + run: | 179 + if [ -f "${{ matrix.service.dockerfile }}" ]; then 180 + echo "has_dockerfile=true" >> $GITHUB_OUTPUT 181 + echo "Service ${{ matrix.service.name }} has Dockerfile" 182 + else 183 + echo "has_dockerfile=false" >> $GITHUB_OUTPUT 184 + echo "Service ${{ matrix.service.name }} does not have Dockerfile, skipping" 185 + fi 186 + 187 + - name: Setup environment 188 + if: steps.check.outputs.has_dockerfile == 'true' 189 + uses: ./.github/actions/setup 190 + with: 191 + setup-node: "true" 192 + lexicons-only-rust: "true" 193 + 194 + - name: Download build artifacts 195 + if: steps.check.outputs.has_dockerfile == 'true' 196 + uses: actions/download-artifact@v4 197 + with: 198 + name: rust-release-builds 199 + path: . 200 + 201 + - name: Log in to Container Registry 202 + if: steps.check.outputs.has_dockerfile == 'true' 203 + uses: docker/login-action@v3 204 + with: 205 + registry: ${{ env.REGISTRY }} 206 + username: ${{ github.actor }} 207 + password: ${{ secrets.GITHUB_TOKEN }} 208 + 209 + - name: Extract metadata 210 + if: steps.check.outputs.has_dockerfile == 'true' 211 + id: meta 212 + uses: docker/metadata-action@v5 213 + with: 214 + images: ${{ env.REGISTRY }}/${{ github.repository }}/${{ matrix.service.name }} 215 + tags: | 216 + type=raw,value=latest 217 + type=raw,value=${{ needs.create-release.outputs.tag }} 218 + 219 + - name: Set up Docker Buildx 220 + if: steps.check.outputs.has_dockerfile == 'true' 221 + uses: docker/setup-buildx-action@v3 222 + 223 + - name: Build and push Docker image 224 + if: steps.check.outputs.has_dockerfile == 'true' 225 + uses: docker/build-push-action@v5 226 + with: 227 + context: ${{ matrix.service.context }} 228 + file: ${{ matrix.service.dockerfile }} 229 + push: true 230 + tags: ${{ steps.meta.outputs.tags }} 231 + labels: ${{ steps.meta.outputs.labels }} 232 + platforms: linux/amd64,linux/arm64 233 + cache-from: type=gha,scope=${{ matrix.service.name }} 234 + cache-to: type=gha,mode=max,scope=${{ matrix.service.name }} 235 + build-args: | 236 + BUILDKIT_INLINE_CACHE=1

+4 -17

.gitignore

··· 55 55 56 56 # generated lexicons 57 57 # js lexicons 58 - */**/lexicons 58 + packages/lexicons/src 59 59 # rust lexicons (types :))) 60 - */**/types 60 + services/types/src 61 61 62 62 # vendor directory for submodules 63 63 !vendor/ ··· 66 66 vendor/**/dist/ 67 67 vendor/**/node_modules/ 68 68 69 - # lexicons directory structure 70 - !lexicons/ 71 - # Track our custom lexicons 72 - !lexicons/fm.teal.alpha/ 73 - !lexicons/fm.teal.alpha/**/*.json 74 - # Track the symlinks to atproto lexicons 75 - !lexicons/app 76 - !lexicons/chat 77 - !lexicons/com 78 - !lexicons/tools 79 - # But ignore any generated files within lexicons 80 - lexicons/**/*.js 81 - lexicons/**/*.d.ts 82 - lexicons/**/dist/ 83 - lexicons/**/node_modules/ 69 + # claude 70 + .claude

+126

.pre-commit-config.yaml

··· 1 + # Pre-commit configuration for Teal project 2 + # Install with: pip install pre-commit && pre-commit install 3 + # Run manually with: pre-commit run --all-files 4 + 5 + repos: 6 + # General file checks 7 + - repo: https://github.com/pre-commit/pre-commit-hooks 8 + rev: v4.6.0 9 + hooks: 10 + - id: trailing-whitespace 11 + - id: end-of-file-fixer 12 + - id: check-yaml 13 + - id: check-json 14 + - id: check-toml 15 + - id: check-merge-conflict 16 + - id: check-added-large-files 17 + args: ["--maxkb=500"] 18 + - id: mixed-line-ending 19 + args: ["--fix=lf"] 20 + 21 + # TypeScript/JavaScript formatting and linting 22 + - repo: local 23 + hooks: 24 + - id: prettier 25 + name: Prettier 26 + entry: pnpm prettier --write 27 + language: system 28 + files: \.(ts|tsx|js|jsx|json|md|yaml|yml)$ 29 + pass_filenames: true 30 + 31 + - id: biome-check 32 + name: Biome Check 33 + entry: pnpm biome check --apply 34 + language: system 35 + files: \.(ts|tsx|js|jsx)$ 36 + pass_filenames: false 37 + 38 + # TypeScript check temporarily disabled due to vendor compilation issues 39 + # - id: typescript-check 40 + # name: TypeScript Check 41 + # entry: pnpm typecheck 42 + # language: system 43 + # files: \.(ts|tsx)$ 44 + # pass_filenames: false 45 + 46 + # Rust formatting and linting 47 + - repo: local 48 + hooks: 49 + - id: cargo-fmt-services 50 + name: Cargo Format (Services Workspace) 51 + entry: bash -c 'cd services && cargo fmt' 52 + language: system 53 + files: services/.*\.rs$ 54 + pass_filenames: false 55 + 56 + - id: cargo-clippy-services 57 + name: Cargo Clippy (Services Workspace) 58 + entry: bash -c 'cd services && cargo clippy -- -D warnings' 59 + language: system 60 + files: services/.*\.rs$ 61 + pass_filenames: false 62 + 63 + - id: cargo-fmt-apps 64 + name: Cargo Format (Apps) 65 + entry: bash -c 'for dir in apps/*/; do if [ -f "$dir/Cargo.toml" ]; then cd "$dir" && cargo fmt && cd ../..; fi; done' 66 + language: system 67 + files: apps/.*\.rs$ 68 + pass_filenames: false 69 + 70 + - id: cargo-clippy-apps 71 + name: Cargo Clippy (Apps) 72 + entry: bash -c 'for dir in apps/*/; do if [ -f "$dir/Cargo.toml" ]; then cd "$dir" && cargo clippy -- -D warnings && cd ../..; fi; done' 73 + language: system 74 + files: apps/.*\.rs$ 75 + pass_filenames: false 76 + 77 + # Lexicon validation and generation 78 + - repo: local 79 + hooks: 80 + - id: lexicon-validate 81 + name: Validate Lexicons 82 + entry: pnpm lex:validate 83 + language: system 84 + files: lexicons/.*\.json$ 85 + pass_filenames: false 86 + 87 + - id: lexicon-generate 88 + name: Generate Lexicons (files ignored by .gitignore) 89 + entry: pnpm lex:gen-server 90 + language: system 91 + files: lexicons/.*\.json$ 92 + pass_filenames: false 93 + always_run: false 94 + 95 + # Optional: Additional checks 96 + - repo: local 97 + hooks: 98 + - id: no-console-log 99 + name: Check for console.log 100 + entry: bash -c 'if grep -r "console\.log" --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" .; then echo "Found console.log statements. Please remove them."; exit 1; fi' 101 + language: system 102 + files: \.(ts|tsx|js|jsx)$ 103 + pass_filenames: false 104 + 105 + - id: check-todos 106 + name: Check for TODO/FIXME 107 + entry: bash -c 'if grep -r -i "TODO\|FIXME" --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" --include="*.rs" .; then echo "Found TODO/FIXME comments. Consider addressing them."; fi' 108 + language: system 109 + files: \.(ts|tsx|js|jsx|rs)$ 110 + pass_filenames: false 111 + verbose: true 112 + 113 + # Global settings 114 + default_language_version: 115 + node: system 116 + python: python3 117 + 118 + # Skip certain hooks for specific file patterns 119 + exclude: | 120 + (?x)^( 121 + vendor/.*| 122 + node_modules/.*| 123 + target/.*| 124 + .git/.*| 125 + .*\.lock$ 126 + )$

+46

.sqlx/query-00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n ae1.name as synthetic_name,\n ae2.name as target_name,\n similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score,\n COUNT(ptae1.play_uri) as synthetic_plays,\n COUNT(ptae2.play_uri) as target_plays\n FROM artists_extended ae1\n CROSS JOIN artists_extended ae2\n LEFT JOIN play_to_artists_extended ptae1 ON ae1.id = ptae1.artist_id\n LEFT JOIN play_to_artists_extended ptae2 ON ae2.id = ptae2.artist_id\n WHERE ae1.id != ae2.id\n AND ae1.mbid_type = 'synthetic'\n AND ae2.mbid_type = 'musicbrainz'\n AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1\n GROUP BY ae1.id, ae1.name, ae2.id, ae2.name, similarity_score\n ORDER BY similarity_score DESC\n LIMIT 10\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "synthetic_name", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "target_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "similarity_score", 19 + "type_info": "Float4" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "synthetic_plays", 24 + "type_info": "Int8" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "target_plays", 29 + "type_info": "Int8" 30 + } 31 + ], 32 + "parameters": { 33 + "Left": [ 34 + "Float4" 35 + ] 36 + }, 37 + "nullable": [ 38 + false, 39 + false, 40 + null, 41 + null, 42 + null 43 + ] 44 + }, 45 + "hash": "00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75" 46 + }

+12

.sqlx/query-0d7c3ef80c20dac6efd0fe3c430d7f41b1c90368ff99ce8a09f66bca63864d1e.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "REFRESH MATERIALIZED VIEW mv_release_play_counts;", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [] 8 + }, 9 + "nullable": [] 10 + }, 11 + "hash": "0d7c3ef80c20dac6efd0fe3c430d7f41b1c90368ff99ce8a09f66bca63864d1e" 12 + }

+35

.sqlx/query-0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n pta.artist_mbid as mbid,\n pta.artist_name as name,\n COUNT(*) as play_count\n FROM plays p\n INNER JOIN play_to_artists pta ON p.uri = pta.play_uri\n WHERE p.did = $1\n AND pta.artist_mbid IS NOT NULL\n AND pta.artist_name IS NOT NULL\n GROUP BY pta.artist_mbid, pta.artist_name\n ORDER BY play_count DESC\n LIMIT $2\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "play_count", 19 + "type_info": "Int8" 20 + } 21 + ], 22 + "parameters": { 23 + "Left": [ 24 + "Text", 25 + "Int8" 26 + ] 27 + }, 28 + "nullable": [ 29 + false, 30 + true, 31 + null 32 + ] 33 + }, 34 + "hash": "0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273" 35 + }

+14

.sqlx/query-0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM artists_extended WHERE id = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Int4" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec" 14 + }

+112

.sqlx/query-0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays\n LEFT JOIN play_to_artists as pta ON uri = pta.play_uri\n WHERE did = ANY($1)\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time desc\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "uri", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "did", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "rkey", 19 + "type_info": "Text" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "cid", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "isrc", 29 + "type_info": "Text" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "duration", 34 + "type_info": "Int4" 35 + }, 36 + { 37 + "ordinal": 6, 38 + "name": "track_name", 39 + "type_info": "Text" 40 + }, 41 + { 42 + "ordinal": 7, 43 + "name": "played_time", 44 + "type_info": "Timestamptz" 45 + }, 46 + { 47 + "ordinal": 8, 48 + "name": "processed_time", 49 + "type_info": "Timestamptz" 50 + }, 51 + { 52 + "ordinal": 9, 53 + "name": "release_mbid", 54 + "type_info": "Uuid" 55 + }, 56 + { 57 + "ordinal": 10, 58 + "name": "release_name", 59 + "type_info": "Text" 60 + }, 61 + { 62 + "ordinal": 11, 63 + "name": "recording_mbid", 64 + "type_info": "Uuid" 65 + }, 66 + { 67 + "ordinal": 12, 68 + "name": "submission_client_agent", 69 + "type_info": "Text" 70 + }, 71 + { 72 + "ordinal": 13, 73 + "name": "music_service_base_domain", 74 + "type_info": "Text" 75 + }, 76 + { 77 + "ordinal": 14, 78 + "name": "origin_url", 79 + "type_info": "Text" 80 + }, 81 + { 82 + "ordinal": 15, 83 + "name": "artists", 84 + "type_info": "Json" 85 + } 86 + ], 87 + "parameters": { 88 + "Left": [ 89 + "TextArray" 90 + ] 91 + }, 92 + "nullable": [ 93 + false, 94 + false, 95 + false, 96 + false, 97 + true, 98 + true, 99 + false, 100 + true, 101 + true, 102 + true, 103 + true, 104 + true, 105 + true, 106 + true, 107 + true, 108 + null 109 + ] 110 + }, 111 + "hash": "0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1" 112 + }

+22

.sqlx/query-193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT extract_discriminant($1)", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "extract_discriminant", 9 + "type_info": "Text" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Text" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7" 22 + }

+14

.sqlx/query-1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM releases WHERE mbid = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Uuid" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c" 14 + }

+14

.sqlx/query-1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM play_to_artists WHERE play_uri = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363" 14 + }

+22

.sqlx/query-28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT extract_edition_discriminant($1)", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "extract_edition_discriminant", 9 + "type_info": "Text" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Text" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e" 22 + }

+52

.sqlx/query-2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n r1.mbid as release1_mbid,\n r1.name as release1_name,\n r2.mbid as release2_mbid,\n r2.name as release2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists\n FROM releases r1\n CROSS JOIN releases r2\n INNER JOIN plays p1 ON p1.release_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.release_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id -- Same artist\n AND (\n (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR\n (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))\n ) -- Same or no discriminants\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist\n ORDER BY similarity_score DESC, shared_artists DESC\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "release1_mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "release1_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "release2_mbid", 19 + "type_info": "Uuid" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "release2_name", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "similarity_score", 29 + "type_info": "Float4" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "shared_artists", 34 + "type_info": "Int8" 35 + } 36 + ], 37 + "parameters": { 38 + "Left": [ 39 + "Float4" 40 + ] 41 + }, 42 + "nullable": [ 43 + false, 44 + false, 45 + false, 46 + false, 47 + null, 48 + null 49 + ] 50 + }, 51 + "hash": "2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149" 52 + }

+14

.sqlx/query-2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM play_to_artists_extended WHERE artist_id = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Int4" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a" 14 + }

+12

.sqlx/query-3d84a9e1ed05846bc931eea9b90fd88cae8b636968af4bd2f9b1a9927d15379d.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "REFRESH MATERIALIZED VIEW mv_global_play_count;", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [] 8 + }, 9 + "nullable": [] 10 + }, 11 + "hash": "3d84a9e1ed05846bc931eea9b90fd88cae8b636968af4bd2f9b1a9927d15379d" 12 + }

+22

.sqlx/query-413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT COUNT(*) FROM plays WHERE recording_mbid = $1", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "count", 9 + "type_info": "Int8" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0" 22 + }

+14

.sqlx/query-5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n DELETE FROM profiles WHERE did = $1\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1" 14 + }

+112

.sqlx/query-651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays\n LEFT JOIN play_to_artists as pta ON uri = pta.play_uri\n WHERE uri = $1\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time desc\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "uri", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "did", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "rkey", 19 + "type_info": "Text" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "cid", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "isrc", 29 + "type_info": "Text" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "duration", 34 + "type_info": "Int4" 35 + }, 36 + { 37 + "ordinal": 6, 38 + "name": "track_name", 39 + "type_info": "Text" 40 + }, 41 + { 42 + "ordinal": 7, 43 + "name": "played_time", 44 + "type_info": "Timestamptz" 45 + }, 46 + { 47 + "ordinal": 8, 48 + "name": "processed_time", 49 + "type_info": "Timestamptz" 50 + }, 51 + { 52 + "ordinal": 9, 53 + "name": "release_mbid", 54 + "type_info": "Uuid" 55 + }, 56 + { 57 + "ordinal": 10, 58 + "name": "release_name", 59 + "type_info": "Text" 60 + }, 61 + { 62 + "ordinal": 11, 63 + "name": "recording_mbid", 64 + "type_info": "Uuid" 65 + }, 66 + { 67 + "ordinal": 12, 68 + "name": "submission_client_agent", 69 + "type_info": "Text" 70 + }, 71 + { 72 + "ordinal": 13, 73 + "name": "music_service_base_domain", 74 + "type_info": "Text" 75 + }, 76 + { 77 + "ordinal": 14, 78 + "name": "origin_url", 79 + "type_info": "Text" 80 + }, 81 + { 82 + "ordinal": 15, 83 + "name": "artists", 84 + "type_info": "Json" 85 + } 86 + ], 87 + "parameters": { 88 + "Left": [ 89 + "Text" 90 + ] 91 + }, 92 + "nullable": [ 93 + false, 94 + false, 95 + false, 96 + false, 97 + true, 98 + true, 99 + false, 100 + true, 101 + true, 102 + true, 103 + true, 104 + true, 105 + true, 106 + true, 107 + true, 108 + null 109 + ] 110 + }, 111 + "hash": "651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0" 112 + }

+16

.sqlx/query-6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n UPDATE play_to_artists_extended\n SET artist_id = $1, artist_name = $2\n WHERE artist_id = $3\n AND NOT EXISTS (\n SELECT 1 FROM play_to_artists_extended existing\n WHERE existing.play_uri = play_to_artists_extended.play_uri\n AND existing.artist_id = $1\n )\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Int4", 9 + "Text", 10 + "Int4" 11 + ] 12 + }, 13 + "nullable": [] 14 + }, 15 + "hash": "6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48" 16 + }

+52

.sqlx/query-6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n ae1.id as synthetic_id,\n ae1.name as synthetic_name,\n ae2.id as target_id,\n ae2.name as target_name,\n ae2.mbid as target_mbid,\n similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score\n FROM artists_extended ae1\n CROSS JOIN artists_extended ae2\n WHERE ae1.id != ae2.id\n AND ae1.mbid_type = 'synthetic'\n AND ae2.mbid_type = 'musicbrainz'\n AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1\n ORDER BY similarity_score DESC\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "synthetic_id", 9 + "type_info": "Int4" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "synthetic_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "target_id", 19 + "type_info": "Int4" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "target_name", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "target_mbid", 29 + "type_info": "Uuid" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "similarity_score", 34 + "type_info": "Float4" 35 + } 36 + ], 37 + "parameters": { 38 + "Left": [ 39 + "Float4" 40 + ] 41 + }, 42 + "nullable": [ 43 + false, 44 + false, 45 + false, 46 + false, 47 + true, 48 + null 49 + ] 50 + }, 51 + "hash": "6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56" 52 + }

+23

.sqlx/query-76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'musicbrainz')\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n updated_at = NOW()\n RETURNING id;\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "id", 9 + "type_info": "Int4" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid", 15 + "Text" 16 + ] 17 + }, 18 + "nullable": [ 19 + false 20 + ] 21 + }, 22 + "hash": "76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0" 23 + }

+29

.sqlx/query-7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO plays (\n uri, cid, did, rkey, isrc, duration, track_name, played_time,\n processed_time, release_mbid, release_name, recording_mbid,\n submission_client_agent, music_service_base_domain, artist_names_raw,\n track_discriminant, release_discriminant\n ) VALUES (\n $1, $2, $3, $4, $5, $6, $7, $8,\n NOW(), $9, $10, $11, $12, $13, $14, $15, $16\n ) ON CONFLICT(uri) DO UPDATE SET\n isrc = EXCLUDED.isrc,\n duration = EXCLUDED.duration,\n track_name = EXCLUDED.track_name,\n played_time = EXCLUDED.played_time,\n processed_time = EXCLUDED.processed_time,\n release_mbid = EXCLUDED.release_mbid,\n release_name = EXCLUDED.release_name,\n recording_mbid = EXCLUDED.recording_mbid,\n submission_client_agent = EXCLUDED.submission_client_agent,\n music_service_base_domain = EXCLUDED.music_service_base_domain,\n artist_names_raw = EXCLUDED.artist_names_raw,\n track_discriminant = EXCLUDED.track_discriminant,\n release_discriminant = EXCLUDED.release_discriminant;\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Text", 10 + "Text", 11 + "Text", 12 + "Text", 13 + "Int4", 14 + "Text", 15 + "Timestamptz", 16 + "Uuid", 17 + "Text", 18 + "Uuid", 19 + "Text", 20 + "Text", 21 + "Jsonb", 22 + "Text", 23 + "Text" 24 + ] 25 + }, 26 + "nullable": [] 27 + }, 28 + "hash": "7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85" 29 + }

+16

.sqlx/query-7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "UPDATE plays SET release_mbid = $1, release_name = $2 WHERE release_mbid = $3", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Uuid", 9 + "Text", 10 + "Uuid" 11 + ] 12 + }, 13 + "nullable": [] 14 + }, 15 + "hash": "7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10" 16 + }

+18

.sqlx/query-8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO statii (uri, did, rkey, cid, record)\n VALUES ($1, $2, $3, $4, $5)\n ON CONFLICT (uri) DO UPDATE SET\n cid = EXCLUDED.cid,\n record = EXCLUDED.record,\n indexed_at = NOW();\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Text", 10 + "Text", 11 + "Text", 12 + "Jsonb" 13 + ] 14 + }, 15 + "nullable": [] 16 + }, 17 + "hash": "8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946" 18 + }

+34

.sqlx/query-97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n p.release_mbid as mbid,\n p.release_name as name,\n COUNT(*) as play_count\n FROM plays p\n WHERE p.release_mbid IS NOT NULL\n AND p.release_name IS NOT NULL\n GROUP BY p.release_mbid, p.release_name\n ORDER BY play_count DESC\n LIMIT $1\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "play_count", 19 + "type_info": "Int8" 20 + } 21 + ], 22 + "parameters": { 23 + "Left": [ 24 + "Int8" 25 + ] 26 + }, 27 + "nullable": [ 28 + true, 29 + true, 30 + null 31 + ] 32 + }, 33 + "hash": "97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59" 34 + }

+12

.sqlx/query-9af33e4329198dee7814519573b63858eaf69f08ad2959d96ffee5c8387af0ba.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "REFRESH MATERIALIZED VIEW mv_artist_play_counts;", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [] 8 + }, 9 + "nullable": [] 10 + }, 11 + "hash": "9af33e4329198dee7814519573b63858eaf69f08ad2959d96ffee5c8387af0ba" 12 + }

+16

.sqlx/query-9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) VALUES\n ($1, $2, $3)\n ON CONFLICT (play_uri, artist_id) DO NOTHING;\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Int4", 10 + "Text" 11 + ] 12 + }, 13 + "nullable": [] 14 + }, 15 + "hash": "9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3" 16 + }

+24

.sqlx/query-9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO recordings (mbid, name, discriminant) VALUES ($1, $2, $3)\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n discriminant = COALESCE(EXCLUDED.discriminant, recordings.discriminant)\n RETURNING mbid;\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid", 15 + "Text", 16 + "Text" 17 + ] 18 + }, 19 + "nullable": [ 20 + false 21 + ] 22 + }, 23 + "hash": "9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81" 24 + }

+14

.sqlx/query-9d4e872755f90087f64f116d8fee340218e09b40ab8f94b5d9d17b9c39bf3d4f.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM plays WHERE uri = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "9d4e872755f90087f64f116d8fee340218e09b40ab8f94b5d9d17b9c39bf3d4f" 14 + }

+22

.sqlx/query-ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT generate_synthetic_mbid($1)", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "generate_synthetic_mbid", 9 + "type_info": "Uuid" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Text" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd" 22 + }

+35

.sqlx/query-af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n p.release_mbid as mbid,\n p.release_name as name,\n COUNT(*) as play_count\n FROM plays p\n WHERE p.did = $1\n AND p.release_mbid IS NOT NULL\n AND p.release_name IS NOT NULL\n GROUP BY p.release_mbid, p.release_name\n ORDER BY play_count DESC\n LIMIT $2\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "play_count", 19 + "type_info": "Int8" 20 + } 21 + ], 22 + "parameters": { 23 + "Left": [ 24 + "Text", 25 + "Int8" 26 + ] 27 + }, 28 + "nullable": [ 29 + true, 30 + true, 31 + null 32 + ] 33 + }, 34 + "hash": "af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3" 35 + }

+46

.sqlx/query-b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n r1.name as recording1_name,\n r2.name as recording2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists,\n STRING_AGG(DISTINCT ae.name, ', ') as artist_names\n FROM recordings r1\n CROSS JOIN recordings r2\n INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0\n ORDER BY similarity_score DESC\n LIMIT 5\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "recording1_name", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "recording2_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "similarity_score", 19 + "type_info": "Float4" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "shared_artists", 24 + "type_info": "Int8" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "artist_names", 29 + "type_info": "Text" 30 + } 31 + ], 32 + "parameters": { 33 + "Left": [ 34 + "Float4" 35 + ] 36 + }, 37 + "nullable": [ 38 + false, 39 + false, 40 + null, 41 + null, 42 + null 43 + ] 44 + }, 45 + "hash": "b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453" 46 + }

+15

.sqlx/query-b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "UPDATE artists_extended SET name = $1, updated_at = NOW() WHERE id = $2", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Int4" 10 + ] 11 + }, 12 + "nullable": [] 13 + }, 14 + "hash": "b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56" 15 + }

+65

.sqlx/query-b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT\n p.avatar,\n p.banner,\n p.created_at,\n p.description,\n p.description_facets,\n p.did,\n p.display_name,\n s.record as status\n FROM profiles p\n LEFT JOIN statii s ON p.did = s.did AND s.rkey = 'self'\n WHERE (p.did = ANY($1))\n OR (p.handle = ANY($2))", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "avatar", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "banner", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "created_at", 19 + "type_info": "Timestamptz" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "description", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "description_facets", 29 + "type_info": "Jsonb" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "did", 34 + "type_info": "Text" 35 + }, 36 + { 37 + "ordinal": 6, 38 + "name": "display_name", 39 + "type_info": "Text" 40 + }, 41 + { 42 + "ordinal": 7, 43 + "name": "status", 44 + "type_info": "Jsonb" 45 + } 46 + ], 47 + "parameters": { 48 + "Left": [ 49 + "TextArray", 50 + "TextArray" 51 + ] 52 + }, 53 + "nullable": [ 54 + true, 55 + true, 56 + true, 57 + true, 58 + true, 59 + false, 60 + true, 61 + true 62 + ] 63 + }, 64 + "hash": "b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837" 65 + }

+34

.sqlx/query-b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n pta.artist_mbid as mbid,\n pta.artist_name as name,\n COUNT(*) as play_count\n FROM plays p\n INNER JOIN play_to_artists pta ON p.uri = pta.play_uri\n WHERE pta.artist_mbid IS NOT NULL\n AND pta.artist_name IS NOT NULL\n GROUP BY pta.artist_mbid, pta.artist_name\n ORDER BY play_count DESC\n LIMIT $1\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "play_count", 19 + "type_info": "Int8" 20 + } 21 + ], 22 + "parameters": { 23 + "Left": [ 24 + "Int8" 25 + ] 26 + }, 27 + "nullable": [ 28 + false, 29 + true, 30 + null 31 + ] 32 + }, 33 + "hash": "b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e" 34 + }

+21

.sqlx/query-b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO profiles (did, handle, display_name, description, description_facets, avatar, banner, created_at)\n VALUES ($1, $2, $3, $4, $5, $6, $7, $8)\n ON CONFLICT (did) DO UPDATE SET\n display_name = EXCLUDED.display_name,\n description = EXCLUDED.description,\n description_facets = EXCLUDED.description_facets,\n avatar = EXCLUDED.avatar,\n banner = EXCLUDED.banner,\n created_at = EXCLUDED.created_at;\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Text", 10 + "Text", 11 + "Text", 12 + "Jsonb", 13 + "Text", 14 + "Text", 15 + "Timestamptz" 16 + ] 17 + }, 18 + "nullable": [] 19 + }, 20 + "hash": "b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded" 21 + }

+22

.sqlx/query-bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT COUNT(*) FROM plays WHERE release_mbid = $1", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "count", 9 + "type_info": "Int8" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224" 22 + }

+12

.sqlx/query-bf9c6d3bf0f9594ae1c02dc85c9887b747aaa5f0c3e67d9381c3867c4f67ae6d.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "REFRESH MATERIALIZED VIEW mv_recording_play_counts;", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [] 8 + }, 9 + "nullable": [] 10 + }, 11 + "hash": "bf9c6d3bf0f9594ae1c02dc85c9887b747aaa5f0c3e67d9381c3867c4f67ae6d" 12 + }

+46

.sqlx/query-cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n r1.name as release1_name,\n r2.name as release2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists,\n STRING_AGG(DISTINCT ae.name, ', ') as artist_names\n FROM releases r1\n CROSS JOIN releases r2\n INNER JOIN plays p1 ON p1.release_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.release_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0\n ORDER BY similarity_score DESC\n LIMIT 5\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "release1_name", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "release2_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "similarity_score", 19 + "type_info": "Float4" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "shared_artists", 24 + "type_info": "Int8" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "artist_names", 29 + "type_info": "Text" 30 + } 31 + ], 32 + "parameters": { 33 + "Left": [ 34 + "Float4" 35 + ] 36 + }, 37 + "nullable": [ 38 + false, 39 + false, 40 + null, 41 + null, 42 + null 43 + ] 44 + }, 45 + "hash": "cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c" 46 + }

+15

.sqlx/query-cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "UPDATE plays SET recording_mbid = $1 WHERE recording_mbid = $2", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Uuid", 9 + "Uuid" 10 + ] 11 + }, 12 + "nullable": [] 13 + }, 14 + "hash": "cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2" 15 + }

+14

.sqlx/query-d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM recordings WHERE mbid = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Uuid" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45" 14 + }

+14

.sqlx/query-d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n DELETE FROM statii WHERE uri = $1\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961" 14 + }

+112

.sqlx/query-f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays p\n LEFT JOIN play_to_artists as pta ON p.uri = pta.play_uri\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time DESC\n LIMIT $1\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "uri", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "did", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "rkey", 19 + "type_info": "Text" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "cid", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "isrc", 29 + "type_info": "Text" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "duration", 34 + "type_info": "Int4" 35 + }, 36 + { 37 + "ordinal": 6, 38 + "name": "track_name", 39 + "type_info": "Text" 40 + }, 41 + { 42 + "ordinal": 7, 43 + "name": "played_time", 44 + "type_info": "Timestamptz" 45 + }, 46 + { 47 + "ordinal": 8, 48 + "name": "processed_time", 49 + "type_info": "Timestamptz" 50 + }, 51 + { 52 + "ordinal": 9, 53 + "name": "release_mbid", 54 + "type_info": "Uuid" 55 + }, 56 + { 57 + "ordinal": 10, 58 + "name": "release_name", 59 + "type_info": "Text" 60 + }, 61 + { 62 + "ordinal": 11, 63 + "name": "recording_mbid", 64 + "type_info": "Uuid" 65 + }, 66 + { 67 + "ordinal": 12, 68 + "name": "submission_client_agent", 69 + "type_info": "Text" 70 + }, 71 + { 72 + "ordinal": 13, 73 + "name": "music_service_base_domain", 74 + "type_info": "Text" 75 + }, 76 + { 77 + "ordinal": 14, 78 + "name": "origin_url", 79 + "type_info": "Text" 80 + }, 81 + { 82 + "ordinal": 15, 83 + "name": "artists", 84 + "type_info": "Json" 85 + } 86 + ], 87 + "parameters": { 88 + "Left": [ 89 + "Int8" 90 + ] 91 + }, 92 + "nullable": [ 93 + false, 94 + false, 95 + false, 96 + false, 97 + true, 98 + true, 99 + false, 100 + true, 101 + true, 102 + true, 103 + true, 104 + true, 105 + true, 106 + true, 107 + true, 108 + null 109 + ] 110 + }, 111 + "hash": "f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0" 112 + }

+23

.sqlx/query-f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'synthetic')\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n updated_at = NOW()\n RETURNING id;\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "id", 9 + "type_info": "Int4" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid", 15 + "Text" 16 + ] 17 + }, 18 + "nullable": [ 19 + false 20 + ] 21 + }, 22 + "hash": "f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734" 23 + }

+24

.sqlx/query-f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO releases (mbid, name, discriminant) VALUES ($1, $2, $3)\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n discriminant = COALESCE(EXCLUDED.discriminant, releases.discriminant)\n RETURNING mbid;\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid", 15 + "Text", 16 + "Text" 17 + ] 18 + }, 19 + "nullable": [ 20 + false 21 + ] 22 + }, 23 + "hash": "f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672" 24 + }

+28

.sqlx/query-fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n ae.id,\n ae.name\n FROM artists_extended ae\n WHERE ae.mbid_type = 'musicbrainz'\n AND (\n LOWER(TRIM(ae.name)) = $1\n OR LOWER(TRIM(ae.name)) LIKE '%' || $1 || '%'\n OR $1 LIKE '%' || LOWER(TRIM(ae.name)) || '%'\n OR similarity(LOWER(TRIM(ae.name)), $1) > 0.6\n )\n ORDER BY similarity(LOWER(TRIM(ae.name)), $1) DESC\n LIMIT 10\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "id", 9 + "type_info": "Int4" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + } 16 + ], 17 + "parameters": { 18 + "Left": [ 19 + "Text" 20 + ] 21 + }, 22 + "nullable": [ 23 + false, 24 + false 25 + ] 26 + }, 27 + "hash": "fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7" 28 + }

+52

.sqlx/query-ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0.json

··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n r1.mbid as recording1_mbid,\n r1.name as recording1_name,\n r2.mbid as recording2_mbid,\n r2.name as recording2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists\n FROM recordings r1\n CROSS JOIN recordings r2\n INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id -- Same artist\n AND (\n (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR\n (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))\n ) -- Same or no discriminants\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist\n ORDER BY similarity_score DESC, shared_artists DESC\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "recording1_mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "recording1_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "recording2_mbid", 19 + "type_info": "Uuid" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "recording2_name", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "similarity_score", 29 + "type_info": "Float4" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "shared_artists", 34 + "type_info": "Int8" 35 + } 36 + ], 37 + "parameters": { 38 + "Left": [ 39 + "Float4" 40 + ] 41 + }, 42 + "nullable": [ 43 + false, 44 + false, 45 + false, 46 + false, 47 + null, 48 + null 49 + ] 50 + }, 51 + "hash": "ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0" 52 + }

-3

.vscode/settings.json

··· 1 - { 2 - "deno.enable": false 3 - }

+616 -241

Cargo.lock

··· 121 121 dependencies = [ 122 122 "anyhow", 123 123 "async-trait", 124 + "atmst", 124 125 "atrium-api", 125 126 "axum", 126 - "base64", 127 + "base64 0.22.1", 127 128 "chrono", 128 129 "clap", 129 130 "dotenvy", ··· 165 166 checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" 166 167 167 168 [[package]] 169 + name = "async-compression" 170 + version = "0.4.27" 171 + source = "registry+https://github.com/rust-lang/crates.io-index" 172 + checksum = "ddb939d66e4ae03cee6091612804ba446b12878410cfa17f785f4dd67d4014e8" 173 + dependencies = [ 174 + "flate2", 175 + "futures-core", 176 + "memchr", 177 + "pin-project-lite", 178 + "tokio", 179 + ] 180 + 181 + [[package]] 168 182 name = "async-lock" 169 183 version = "3.4.0" 170 184 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 187 201 ] 188 202 189 203 [[package]] 204 + name = "atmst" 205 + version = "0.0.1" 206 + source = "registry+https://github.com/rust-lang/crates.io-index" 207 + checksum = "aeb2a4631a64a242ae62c3ceb140adfa2a8bdacb1b22a6549db5de2ce3389c1d" 208 + dependencies = [ 209 + "async-trait", 210 + "bytes", 211 + "cid 0.11.1", 212 + "dashmap", 213 + "futures", 214 + "ipld-core", 215 + "iroh-car", 216 + "log", 217 + "multihash 0.19.3", 218 + "serde", 219 + "serde_ipld_dagcbor", 220 + "serde_ipld_dagjson", 221 + "sha2", 222 + "thiserror 1.0.69", 223 + "tokio", 224 + ] 225 + 226 + [[package]] 190 227 name = "atoi" 191 228 version = "2.0.0" 192 229 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 210 247 "atrium-common", 211 248 "atrium-xrpc", 212 249 "chrono", 213 - "http", 250 + "http 1.3.1", 214 251 "ipld-core", 215 252 "langtag", 216 253 "regex", ··· 243 280 source = "registry+https://github.com/rust-lang/crates.io-index" 244 281 checksum = "0216ad50ce34e9ff982e171c3659e65dedaa2ed5ac2994524debdc9a9647ffa8" 245 282 dependencies = [ 246 - "http", 283 + "http 1.3.1", 247 284 "serde", 248 285 "serde_html_form", 249 286 "serde_json", ··· 259 296 260 297 [[package]] 261 298 name = "aws-lc-rs" 262 - version = "1.13.2" 299 + version = "1.13.3" 263 300 source = "registry+https://github.com/rust-lang/crates.io-index" 264 - checksum = "08b5d4e069cbc868041a64bd68dc8cb39a0d79585cd6c5a24caa8c2d622121be" 301 + checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" 265 302 dependencies = [ 266 303 "aws-lc-sys", 267 304 "zeroize", ··· 291 328 "bytes", 292 329 "form_urlencoded", 293 330 "futures-util", 294 - "http", 331 + "http 1.3.1", 295 332 "http-body", 296 333 "http-body-util", 297 334 "hyper", ··· 324 361 dependencies = [ 325 362 "bytes", 326 363 "futures-core", 327 - "http", 364 + "http 1.3.1", 328 365 "http-body", 329 366 "http-body-util", 330 367 "mime", ··· 348 385 ] 349 386 350 387 [[package]] 388 + name = "backon" 389 + version = "1.5.2" 390 + source = "registry+https://github.com/rust-lang/crates.io-index" 391 + checksum = "592277618714fbcecda9a02ba7a8781f319d26532a88553bbacc77ba5d2b3a8d" 392 + dependencies = [ 393 + "fastrand", 394 + ] 395 + 396 + [[package]] 351 397 name = "backtrace" 352 398 version = "0.3.75" 353 399 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 369 415 checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" 370 416 371 417 [[package]] 418 + name = "base16ct" 419 + version = "0.2.0" 420 + source = "registry+https://github.com/rust-lang/crates.io-index" 421 + checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" 422 + 423 + [[package]] 424 + name = "base64" 425 + version = "0.21.7" 426 + source = "registry+https://github.com/rust-lang/crates.io-index" 427 + checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" 428 + 429 + [[package]] 372 430 name = "base64" 373 431 version = "0.22.1" 374 432 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 397 455 "proc-macro2", 398 456 "quote", 399 457 "regex", 400 - "rustc-hash", 458 + "rustc-hash 1.1.0", 401 459 "shlex", 402 460 "syn 2.0.104", 403 461 "which", ··· 504 562 version = "1.10.1" 505 563 source = "registry+https://github.com/rust-lang/crates.io-index" 506 564 checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 565 + dependencies = [ 566 + "serde", 567 + ] 507 568 508 569 [[package]] 509 570 name = "cadet" ··· 511 572 dependencies = [ 512 573 "anyhow", 513 574 "async-trait", 575 + "atmst", 514 576 "atrium-api", 515 - "base64", 577 + "base64 0.22.1", 516 578 "chrono", 517 579 "cid 0.11.1", 518 580 "dotenvy", 519 581 "flume", 582 + "futures", 520 583 "iroh-car", 521 584 "libipld", 522 585 "metrics 0.23.1", ··· 528 591 "reqwest", 529 592 "rocketman", 530 593 "serde", 594 + "serde_ipld_dagcbor", 531 595 "serde_json", 532 596 "sqlx", 533 597 "time", 534 598 "tokio", 535 - "tokio-tungstenite", 599 + "tokio-tungstenite 0.24.0", 536 600 "tracing", 537 601 "tracing-subscriber", 538 602 "types", ··· 583 647 584 648 [[package]] 585 649 name = "cc" 586 - version = "1.2.30" 650 + version = "1.2.31" 587 651 source = "registry+https://github.com/rust-lang/crates.io-index" 588 - checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" 652 + checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" 589 653 dependencies = [ 590 654 "jobserver", 591 655 "libc", ··· 608 672 checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" 609 673 610 674 [[package]] 675 + name = "cfg_aliases" 676 + version = "0.2.1" 677 + source = "registry+https://github.com/rust-lang/crates.io-index" 678 + checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 679 + 680 + [[package]] 611 681 name = "chrono" 612 682 version = "0.4.41" 613 683 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 662 732 663 733 [[package]] 664 734 name = "clap" 665 - version = "4.5.41" 735 + version = "4.5.42" 666 736 source = "registry+https://github.com/rust-lang/crates.io-index" 667 - checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" 737 + checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882" 668 738 dependencies = [ 669 739 "clap_builder", 670 740 "clap_derive", ··· 672 742 673 743 [[package]] 674 744 name = "clap_builder" 675 - version = "4.5.41" 745 + version = "4.5.42" 676 746 source = "registry+https://github.com/rust-lang/crates.io-index" 677 - checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" 747 + checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966" 678 748 dependencies = [ 679 749 "anstream", 680 750 "anstyle", ··· 716 786 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 717 787 718 788 [[package]] 789 + name = "colored" 790 + version = "2.2.0" 791 + source = "registry+https://github.com/rust-lang/crates.io-index" 792 + checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" 793 + dependencies = [ 794 + "lazy_static", 795 + "windows-sys 0.59.0", 796 + ] 797 + 798 + [[package]] 719 799 name = "combine" 720 800 version = "4.6.7" 721 801 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 810 890 checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" 811 891 812 892 [[package]] 893 + name = "crc32fast" 894 + version = "1.5.0" 895 + source = "registry+https://github.com/rust-lang/crates.io-index" 896 + checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" 897 + dependencies = [ 898 + "cfg-if", 899 + ] 900 + 901 + [[package]] 813 902 name = "crossbeam-channel" 814 903 version = "0.5.15" 815 904 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 841 930 version = "0.8.21" 842 931 source = "registry+https://github.com/rust-lang/crates.io-index" 843 932 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 933 + 934 + [[package]] 935 + name = "crypto-bigint" 936 + version = "0.5.5" 937 + source = "registry+https://github.com/rust-lang/crates.io-index" 938 + checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" 939 + dependencies = [ 940 + "generic-array", 941 + "rand_core 0.6.4", 942 + "subtle", 943 + "zeroize", 944 + ] 844 945 845 946 [[package]] 846 947 name = "crypto-common" ··· 1026 1127 ] 1027 1128 1028 1129 [[package]] 1130 + name = "dirs" 1131 + version = "5.0.1" 1132 + source = "registry+https://github.com/rust-lang/crates.io-index" 1133 + checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" 1134 + dependencies = [ 1135 + "dirs-sys", 1136 + ] 1137 + 1138 + [[package]] 1139 + name = "dirs-sys" 1140 + version = "0.4.1" 1141 + source = "registry+https://github.com/rust-lang/crates.io-index" 1142 + checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" 1143 + dependencies = [ 1144 + "libc", 1145 + "option-ext", 1146 + "redox_users", 1147 + "windows-sys 0.48.0", 1148 + ] 1149 + 1150 + [[package]] 1029 1151 name = "displaydoc" 1030 1152 version = "0.2.5" 1031 1153 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1049 1171 checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" 1050 1172 1051 1173 [[package]] 1174 + name = "ecdsa" 1175 + version = "0.16.9" 1176 + source = "registry+https://github.com/rust-lang/crates.io-index" 1177 + checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" 1178 + dependencies = [ 1179 + "der", 1180 + "digest", 1181 + "elliptic-curve", 1182 + "rfc6979", 1183 + "signature", 1184 + "spki", 1185 + ] 1186 + 1187 + [[package]] 1052 1188 name = "either" 1053 1189 version = "1.15.0" 1054 1190 source = "registry+https://github.com/rust-lang/crates.io-index" 1055 1191 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 1056 1192 dependencies = [ 1057 1193 "serde", 1194 + ] 1195 + 1196 + [[package]] 1197 + name = "elliptic-curve" 1198 + version = "0.13.8" 1199 + source = "registry+https://github.com/rust-lang/crates.io-index" 1200 + checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" 1201 + dependencies = [ 1202 + "base16ct", 1203 + "crypto-bigint", 1204 + "digest", 1205 + "ff", 1206 + "generic-array", 1207 + "group", 1208 + "pkcs8", 1209 + "rand_core 0.6.4", 1210 + "sec1", 1211 + "subtle", 1212 + "zeroize", 1058 1213 ] 1059 1214 1060 1215 [[package]] ··· 1121 1276 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 1122 1277 1123 1278 [[package]] 1279 + name = "ff" 1280 + version = "0.13.1" 1281 + source = "registry+https://github.com/rust-lang/crates.io-index" 1282 + checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" 1283 + dependencies = [ 1284 + "rand_core 0.6.4", 1285 + "subtle", 1286 + ] 1287 + 1288 + [[package]] 1289 + name = "flate2" 1290 + version = "1.1.2" 1291 + source = "registry+https://github.com/rust-lang/crates.io-index" 1292 + checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" 1293 + dependencies = [ 1294 + "crc32fast", 1295 + "miniz_oxide", 1296 + ] 1297 + 1298 + [[package]] 1124 1299 name = "flume" 1125 1300 version = "0.11.1" 1126 1301 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1145 1320 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 1146 1321 1147 1322 [[package]] 1148 - name = "foreign-types" 1149 - version = "0.3.2" 1150 - source = "registry+https://github.com/rust-lang/crates.io-index" 1151 - checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" 1152 - dependencies = [ 1153 - "foreign-types-shared", 1154 - ] 1155 - 1156 - [[package]] 1157 - name = "foreign-types-shared" 1158 - version = "0.1.1" 1159 - source = "registry+https://github.com/rust-lang/crates.io-index" 1160 - checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" 1161 - 1162 - [[package]] 1163 1323 name = "form_urlencoded" 1164 1324 version = "1.2.1" 1165 1325 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1296 1456 dependencies = [ 1297 1457 "typenum", 1298 1458 "version_check", 1459 + "zeroize", 1299 1460 ] 1300 1461 1301 1462 [[package]] ··· 1318 1479 checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 1319 1480 dependencies = [ 1320 1481 "cfg-if", 1482 + "js-sys", 1321 1483 "libc", 1322 1484 "r-efi", 1323 1485 "wasi 0.14.2+wasi-0.2.4", 1486 + "wasm-bindgen", 1324 1487 ] 1325 1488 1326 1489 [[package]] ··· 1336 1499 checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" 1337 1500 1338 1501 [[package]] 1502 + name = "group" 1503 + version = "0.13.0" 1504 + source = "registry+https://github.com/rust-lang/crates.io-index" 1505 + checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" 1506 + dependencies = [ 1507 + "ff", 1508 + "rand_core 0.6.4", 1509 + "subtle", 1510 + ] 1511 + 1512 + [[package]] 1339 1513 name = "h2" 1340 1514 version = "0.4.11" 1341 1515 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1346 1520 "fnv", 1347 1521 "futures-core", 1348 1522 "futures-sink", 1349 - "http", 1523 + "http 1.3.1", 1350 1524 "indexmap", 1351 1525 "slab", 1352 1526 "tokio", ··· 1421 1595 1422 1596 [[package]] 1423 1597 name = "http" 1598 + version = "0.2.12" 1599 + source = "registry+https://github.com/rust-lang/crates.io-index" 1600 + checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" 1601 + dependencies = [ 1602 + "bytes", 1603 + "fnv", 1604 + "itoa", 1605 + ] 1606 + 1607 + [[package]] 1608 + name = "http" 1424 1609 version = "1.3.1" 1425 1610 source = "registry+https://github.com/rust-lang/crates.io-index" 1426 1611 checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" ··· 1437 1622 checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" 1438 1623 dependencies = [ 1439 1624 "bytes", 1440 - "http", 1625 + "http 1.3.1", 1441 1626 ] 1442 1627 1443 1628 [[package]] ··· 1448 1633 dependencies = [ 1449 1634 "bytes", 1450 1635 "futures-core", 1451 - "http", 1636 + "http 1.3.1", 1452 1637 "http-body", 1453 1638 "pin-project-lite", 1454 1639 ] ··· 1475 1660 "futures-channel", 1476 1661 "futures-util", 1477 1662 "h2", 1478 - "http", 1663 + "http 1.3.1", 1479 1664 "http-body", 1480 1665 "httparse", 1481 1666 "httpdate", ··· 1492 1677 source = "registry+https://github.com/rust-lang/crates.io-index" 1493 1678 checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" 1494 1679 dependencies = [ 1495 - "http", 1680 + "http 1.3.1", 1496 1681 "hyper", 1497 1682 "hyper-util", 1498 - "rustls", 1499 - "rustls-native-certs", 1683 + "rustls 0.23.31", 1684 + "rustls-native-certs 0.8.1", 1500 1685 "rustls-pki-types", 1501 1686 "tokio", 1502 - "tokio-rustls", 1687 + "tokio-rustls 0.26.2", 1503 1688 "tower-service", 1504 - ] 1505 - 1506 - [[package]] 1507 - name = "hyper-tls" 1508 - version = "0.6.0" 1509 - source = "registry+https://github.com/rust-lang/crates.io-index" 1510 - checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" 1511 - dependencies = [ 1512 - "bytes", 1513 - "http-body-util", 1514 - "hyper", 1515 - "hyper-util", 1516 - "native-tls", 1517 - "tokio", 1518 - "tokio-native-tls", 1519 - "tower-service", 1689 + "webpki-roots 1.0.2", 1520 1690 ] 1521 1691 1522 1692 [[package]] 1523 1693 name = "hyper-util" 1524 - version = "0.1.15" 1694 + version = "0.1.16" 1525 1695 source = "registry+https://github.com/rust-lang/crates.io-index" 1526 - checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df" 1696 + checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" 1527 1697 dependencies = [ 1528 - "base64", 1698 + "base64 0.22.1", 1529 1699 "bytes", 1530 1700 "futures-channel", 1531 1701 "futures-core", 1532 1702 "futures-util", 1533 - "http", 1703 + "http 1.3.1", 1534 1704 "http-body", 1535 1705 "hyper", 1536 1706 "ipnet", 1537 1707 "libc", 1538 1708 "percent-encoding", 1539 1709 "pin-project-lite", 1540 - "socket2 0.5.10", 1541 - "system-configuration", 1710 + "socket2 0.6.0", 1542 1711 "tokio", 1543 1712 "tower-service", 1544 1713 "tracing", 1545 - "windows-registry", 1546 1714 ] 1547 1715 1548 1716 [[package]] ··· 1694 1862 1695 1863 [[package]] 1696 1864 name = "io-uring" 1697 - version = "0.7.8" 1865 + version = "0.7.9" 1698 1866 source = "registry+https://github.com/rust-lang/crates.io-index" 1699 - checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" 1867 + checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" 1700 1868 dependencies = [ 1701 1869 "bitflags 2.9.1", 1702 1870 "cfg-if", ··· 1732 1900 1733 1901 [[package]] 1734 1902 name = "iroh-car" 1735 - version = "0.4.0" 1903 + version = "0.5.1" 1736 1904 source = "registry+https://github.com/rust-lang/crates.io-index" 1737 - checksum = "475a6f0ebd64c87ea011021c67f10b57930f6c286e0163807066bfb83553b1b6" 1905 + checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 1738 1906 dependencies = [ 1739 1907 "anyhow", 1740 - "cid 0.10.1", 1908 + "cid 0.11.1", 1741 1909 "futures", 1742 - "libipld", 1910 + "serde", 1911 + "serde_ipld_dagcbor", 1743 1912 "thiserror 1.0.69", 1744 1913 "tokio", 1745 1914 "unsigned-varint 0.7.2", ··· 1784 1953 dependencies = [ 1785 1954 "once_cell", 1786 1955 "wasm-bindgen", 1956 + ] 1957 + 1958 + [[package]] 1959 + name = "k256" 1960 + version = "0.13.4" 1961 + source = "registry+https://github.com/rust-lang/crates.io-index" 1962 + checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b" 1963 + dependencies = [ 1964 + "cfg-if", 1965 + "ecdsa", 1966 + "elliptic-curve", 1967 + "once_cell", 1968 + "sha2", 1969 + "signature", 1787 1970 ] 1788 1971 1789 1972 [[package]] ··· 1921 2104 checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" 1922 2105 dependencies = [ 1923 2106 "cfg-if", 1924 - "windows-targets 0.53.2", 2107 + "windows-targets 0.53.3", 1925 2108 ] 1926 2109 1927 2110 [[package]] ··· 1931 2114 checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" 1932 2115 1933 2116 [[package]] 2117 + name = "libredox" 2118 + version = "0.1.9" 2119 + source = "registry+https://github.com/rust-lang/crates.io-index" 2120 + checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" 2121 + dependencies = [ 2122 + "bitflags 2.9.1", 2123 + "libc", 2124 + ] 2125 + 2126 + [[package]] 1934 2127 name = "libsqlite3-sys" 1935 2128 version = "0.30.1" 1936 2129 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1997 2190 ] 1998 2191 1999 2192 [[package]] 2193 + name = "lru-slab" 2194 + version = "0.1.2" 2195 + source = "registry+https://github.com/rust-lang/crates.io-index" 2196 + checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" 2197 + 2198 + [[package]] 2000 2199 name = "matchers" 2001 2200 version = "0.1.0" 2002 2201 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2053 2252 source = "registry+https://github.com/rust-lang/crates.io-index" 2054 2253 checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" 2055 2254 dependencies = [ 2056 - "base64", 2255 + "base64 0.22.1", 2057 2256 "http-body-util", 2058 2257 "hyper", 2059 2258 "hyper-rustls", ··· 2079 2278 "hashbrown 0.15.4", 2080 2279 "metrics 0.24.2", 2081 2280 "quanta", 2082 - "rand 0.9.1", 2281 + "rand 0.9.2", 2083 2282 "rand_xoshiro", 2084 2283 "sketches-ddsketch", 2085 2284 ] ··· 2147 2346 "bytes", 2148 2347 "encoding_rs", 2149 2348 "futures-util", 2150 - "http", 2349 + "http 1.3.1", 2151 2350 "httparse", 2152 2351 "memchr", 2153 2352 "mime", ··· 2262 2461 ] 2263 2462 2264 2463 [[package]] 2265 - name = "native-tls" 2266 - version = "0.2.14" 2267 - source = "registry+https://github.com/rust-lang/crates.io-index" 2268 - checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" 2269 - dependencies = [ 2270 - "libc", 2271 - "log", 2272 - "openssl", 2273 - "openssl-probe", 2274 - "openssl-sys", 2275 - "schannel", 2276 - "security-framework 2.11.1", 2277 - "security-framework-sys", 2278 - "tempfile", 2279 - ] 2280 - 2281 - [[package]] 2282 2464 name = "nom" 2283 2465 version = "7.1.3" 2284 2466 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2305 2487 dependencies = [ 2306 2488 "overload", 2307 2489 "winapi", 2490 + ] 2491 + 2492 + [[package]] 2493 + name = "num-bigint" 2494 + version = "0.4.6" 2495 + source = "registry+https://github.com/rust-lang/crates.io-index" 2496 + checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" 2497 + dependencies = [ 2498 + "num-integer", 2499 + "num-traits", 2308 2500 ] 2309 2501 2310 2502 [[package]] ··· 2400 2592 checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 2401 2593 2402 2594 [[package]] 2403 - name = "openssl" 2404 - version = "0.10.73" 2405 - source = "registry+https://github.com/rust-lang/crates.io-index" 2406 - checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" 2407 - dependencies = [ 2408 - "bitflags 2.9.1", 2409 - "cfg-if", 2410 - "foreign-types", 2411 - "libc", 2412 - "once_cell", 2413 - "openssl-macros", 2414 - "openssl-sys", 2415 - ] 2416 - 2417 - [[package]] 2418 - name = "openssl-macros" 2419 - version = "0.1.1" 2420 - source = "registry+https://github.com/rust-lang/crates.io-index" 2421 - checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" 2422 - dependencies = [ 2423 - "proc-macro2", 2424 - "quote", 2425 - "syn 2.0.104", 2426 - ] 2427 - 2428 - [[package]] 2429 2595 name = "openssl-probe" 2430 2596 version = "0.1.6" 2431 2597 source = "registry+https://github.com/rust-lang/crates.io-index" 2432 2598 checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" 2433 2599 2434 2600 [[package]] 2435 - name = "openssl-sys" 2436 - version = "0.9.109" 2601 + name = "option-ext" 2602 + version = "0.2.0" 2437 2603 source = "registry+https://github.com/rust-lang/crates.io-index" 2438 - checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" 2439 - dependencies = [ 2440 - "cc", 2441 - "libc", 2442 - "pkg-config", 2443 - "vcpkg", 2444 - ] 2604 + checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" 2445 2605 2446 2606 [[package]] 2447 2607 name = "overload" ··· 2494 2654 checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 2495 2655 2496 2656 [[package]] 2497 - name = "pin-project" 2498 - version = "1.1.10" 2499 - source = "registry+https://github.com/rust-lang/crates.io-index" 2500 - checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" 2501 - dependencies = [ 2502 - "pin-project-internal", 2503 - ] 2504 - 2505 - [[package]] 2506 - name = "pin-project-internal" 2507 - version = "1.1.10" 2508 - source = "registry+https://github.com/rust-lang/crates.io-index" 2509 - checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" 2510 - dependencies = [ 2511 - "proc-macro2", 2512 - "quote", 2513 - "syn 2.0.104", 2514 - ] 2515 - 2516 - [[package]] 2517 2657 name = "pin-project-lite" 2518 2658 version = "0.2.16" 2519 2659 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2584 2724 2585 2725 [[package]] 2586 2726 name = "prettyplease" 2587 - version = "0.2.35" 2727 + version = "0.2.36" 2588 2728 source = "registry+https://github.com/rust-lang/crates.io-index" 2589 - checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a" 2729 + checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" 2590 2730 dependencies = [ 2591 2731 "proc-macro2", 2592 2732 "syn 2.0.104", ··· 2669 2809 ] 2670 2810 2671 2811 [[package]] 2812 + name = "quinn" 2813 + version = "0.11.8" 2814 + source = "registry+https://github.com/rust-lang/crates.io-index" 2815 + checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" 2816 + dependencies = [ 2817 + "bytes", 2818 + "cfg_aliases", 2819 + "pin-project-lite", 2820 + "quinn-proto", 2821 + "quinn-udp", 2822 + "rustc-hash 2.1.1", 2823 + "rustls 0.23.31", 2824 + "socket2 0.5.10", 2825 + "thiserror 2.0.12", 2826 + "tokio", 2827 + "tracing", 2828 + "web-time", 2829 + ] 2830 + 2831 + [[package]] 2832 + name = "quinn-proto" 2833 + version = "0.11.12" 2834 + source = "registry+https://github.com/rust-lang/crates.io-index" 2835 + checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" 2836 + dependencies = [ 2837 + "bytes", 2838 + "getrandom 0.3.3", 2839 + "lru-slab", 2840 + "rand 0.9.2", 2841 + "ring", 2842 + "rustc-hash 2.1.1", 2843 + "rustls 0.23.31", 2844 + "rustls-pki-types", 2845 + "slab", 2846 + "thiserror 2.0.12", 2847 + "tinyvec", 2848 + "tracing", 2849 + "web-time", 2850 + ] 2851 + 2852 + [[package]] 2853 + name = "quinn-udp" 2854 + version = "0.5.13" 2855 + source = "registry+https://github.com/rust-lang/crates.io-index" 2856 + checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" 2857 + dependencies = [ 2858 + "cfg_aliases", 2859 + "libc", 2860 + "once_cell", 2861 + "socket2 0.5.10", 2862 + "tracing", 2863 + "windows-sys 0.59.0", 2864 + ] 2865 + 2866 + [[package]] 2672 2867 name = "quote" 2673 2868 version = "1.0.40" 2674 2869 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2696 2891 2697 2892 [[package]] 2698 2893 name = "rand" 2699 - version = "0.9.1" 2894 + version = "0.9.2" 2700 2895 source = "registry+https://github.com/rust-lang/crates.io-index" 2701 - checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" 2896 + checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" 2702 2897 dependencies = [ 2703 2898 "rand_chacha 0.9.0", 2704 2899 "rand_core 0.9.3", ··· 2762 2957 2763 2958 [[package]] 2764 2959 name = "redis" 2765 - version = "0.24.0" 2960 + version = "0.32.4" 2766 2961 source = "registry+https://github.com/rust-lang/crates.io-index" 2767 - checksum = "c580d9cbbe1d1b479e8d67cf9daf6a62c957e6846048408b80b43ac3f6af84cd" 2962 + checksum = "e1f66bf4cac9733a23bcdf1e0e01effbaaad208567beba68be8f67e5f4af3ee1" 2768 2963 dependencies = [ 2769 2964 "arc-swap", 2770 - "async-trait", 2965 + "backon", 2771 2966 "bytes", 2967 + "cfg-if", 2772 2968 "combine", 2773 - "futures", 2969 + "futures-channel", 2774 2970 "futures-util", 2775 2971 "itoa", 2972 + "num-bigint", 2776 2973 "percent-encoding", 2777 2974 "pin-project-lite", 2778 2975 "ryu", 2779 2976 "sha1_smol", 2780 - "socket2 0.4.10", 2977 + "socket2 0.6.0", 2781 2978 "tokio", 2782 - "tokio-retry", 2783 2979 "tokio-util", 2784 2980 "url", 2785 2981 ] 2786 2982 2787 2983 [[package]] 2788 2984 name = "redox_syscall" 2789 - version = "0.5.13" 2985 + version = "0.5.17" 2790 2986 source = "registry+https://github.com/rust-lang/crates.io-index" 2791 - checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" 2987 + checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" 2792 2988 dependencies = [ 2793 2989 "bitflags 2.9.1", 2794 2990 ] 2795 2991 2796 2992 [[package]] 2993 + name = "redox_users" 2994 + version = "0.4.6" 2995 + source = "registry+https://github.com/rust-lang/crates.io-index" 2996 + checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" 2997 + dependencies = [ 2998 + "getrandom 0.2.16", 2999 + "libredox", 3000 + "thiserror 1.0.69", 3001 + ] 3002 + 3003 + [[package]] 2797 3004 name = "regex" 2798 3005 version = "1.11.1" 2799 3006 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2843 3050 source = "registry+https://github.com/rust-lang/crates.io-index" 2844 3051 checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" 2845 3052 dependencies = [ 2846 - "base64", 3053 + "async-compression", 3054 + "base64 0.22.1", 2847 3055 "bytes", 2848 - "encoding_rs", 2849 3056 "futures-core", 2850 - "h2", 2851 - "http", 3057 + "futures-util", 3058 + "http 1.3.1", 2852 3059 "http-body", 2853 3060 "http-body-util", 2854 3061 "hyper", 2855 3062 "hyper-rustls", 2856 - "hyper-tls", 2857 3063 "hyper-util", 2858 3064 "js-sys", 2859 3065 "log", 2860 - "mime", 2861 - "native-tls", 2862 3066 "percent-encoding", 2863 3067 "pin-project-lite", 3068 + "quinn", 3069 + "rustls 0.23.31", 2864 3070 "rustls-pki-types", 2865 3071 "serde", 2866 3072 "serde_json", 2867 3073 "serde_urlencoded", 2868 3074 "sync_wrapper", 2869 3075 "tokio", 2870 - "tokio-native-tls", 3076 + "tokio-rustls 0.26.2", 3077 + "tokio-util", 2871 3078 "tower", 2872 3079 "tower-http", 2873 3080 "tower-service", 2874 3081 "url", 2875 3082 "wasm-bindgen", 2876 3083 "wasm-bindgen-futures", 3084 + "wasm-streams", 2877 3085 "web-sys", 3086 + "webpki-roots 1.0.2", 3087 + ] 3088 + 3089 + [[package]] 3090 + name = "rfc6979" 3091 + version = "0.4.0" 3092 + source = "registry+https://github.com/rust-lang/crates.io-index" 3093 + checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" 3094 + dependencies = [ 3095 + "hmac", 3096 + "subtle", 2878 3097 ] 2879 3098 2880 3099 [[package]] ··· 2903 3122 [[package]] 2904 3123 name = "rocketman" 2905 3124 version = "0.2.3" 3125 + source = "registry+https://github.com/rust-lang/crates.io-index" 3126 + checksum = "9928fe43979c19ff1f46f7920c30b76dfcead7a4d571c9836c4d02da8587f844" 2906 3127 dependencies = [ 2907 3128 "anyhow", 2908 3129 "async-trait", ··· 2910 3131 "derive_builder", 2911 3132 "flume", 2912 3133 "futures-util", 2913 - "metrics 0.23.1", 3134 + "metrics 0.24.2", 2914 3135 "rand 0.8.5", 2915 3136 "serde", 2916 3137 "serde_json", 2917 3138 "tokio", 2918 - "tokio-tungstenite", 3139 + "tokio-tungstenite 0.20.1", 2919 3140 "tracing", 2920 3141 "tracing-subscriber", 2921 3142 "url", ··· 2944 3165 2945 3166 [[package]] 2946 3167 name = "rustc-demangle" 2947 - version = "0.1.25" 3168 + version = "0.1.26" 2948 3169 source = "registry+https://github.com/rust-lang/crates.io-index" 2949 - checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" 3170 + checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" 2950 3171 2951 3172 [[package]] 2952 3173 name = "rustc-hash" 2953 3174 version = "1.1.0" 2954 3175 source = "registry+https://github.com/rust-lang/crates.io-index" 2955 3176 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 3177 + 3178 + [[package]] 3179 + name = "rustc-hash" 3180 + version = "2.1.1" 3181 + source = "registry+https://github.com/rust-lang/crates.io-index" 3182 + checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 2956 3183 2957 3184 [[package]] 2958 3185 name = "rustc_version" ··· 2991 3218 2992 3219 [[package]] 2993 3220 name = "rustls" 2994 - version = "0.23.29" 3221 + version = "0.21.12" 3222 + source = "registry+https://github.com/rust-lang/crates.io-index" 3223 + checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" 3224 + dependencies = [ 3225 + "log", 3226 + "ring", 3227 + "rustls-webpki 0.101.7", 3228 + "sct", 3229 + ] 3230 + 3231 + [[package]] 3232 + name = "rustls" 3233 + version = "0.23.31" 2995 3234 source = "registry+https://github.com/rust-lang/crates.io-index" 2996 - checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1" 3235 + checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" 2997 3236 dependencies = [ 2998 3237 "aws-lc-rs", 2999 3238 "once_cell", 3239 + "ring", 3000 3240 "rustls-pki-types", 3001 - "rustls-webpki", 3241 + "rustls-webpki 0.103.4", 3002 3242 "subtle", 3003 3243 "zeroize", 3004 3244 ] 3005 3245 3006 3246 [[package]] 3007 3247 name = "rustls-native-certs" 3248 + version = "0.6.3" 3249 + source = "registry+https://github.com/rust-lang/crates.io-index" 3250 + checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" 3251 + dependencies = [ 3252 + "openssl-probe", 3253 + "rustls-pemfile", 3254 + "schannel", 3255 + "security-framework 2.11.1", 3256 + ] 3257 + 3258 + [[package]] 3259 + name = "rustls-native-certs" 3008 3260 version = "0.8.1" 3009 3261 source = "registry+https://github.com/rust-lang/crates.io-index" 3010 3262 checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" ··· 3016 3268 ] 3017 3269 3018 3270 [[package]] 3271 + name = "rustls-pemfile" 3272 + version = "1.0.4" 3273 + source = "registry+https://github.com/rust-lang/crates.io-index" 3274 + checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" 3275 + dependencies = [ 3276 + "base64 0.21.7", 3277 + ] 3278 + 3279 + [[package]] 3019 3280 name = "rustls-pki-types" 3020 3281 version = "1.12.0" 3021 3282 source = "registry+https://github.com/rust-lang/crates.io-index" 3022 3283 checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" 3023 3284 dependencies = [ 3285 + "web-time", 3024 3286 "zeroize", 3287 + ] 3288 + 3289 + [[package]] 3290 + name = "rustls-webpki" 3291 + version = "0.101.7" 3292 + source = "registry+https://github.com/rust-lang/crates.io-index" 3293 + checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" 3294 + dependencies = [ 3295 + "ring", 3296 + "untrusted", 3025 3297 ] 3026 3298 3027 3299 [[package]] ··· 3070 3342 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 3071 3343 3072 3344 [[package]] 3345 + name = "sct" 3346 + version = "0.7.1" 3347 + source = "registry+https://github.com/rust-lang/crates.io-index" 3348 + checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" 3349 + dependencies = [ 3350 + "ring", 3351 + "untrusted", 3352 + ] 3353 + 3354 + [[package]] 3355 + name = "sec1" 3356 + version = "0.7.3" 3357 + source = "registry+https://github.com/rust-lang/crates.io-index" 3358 + checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" 3359 + dependencies = [ 3360 + "base16ct", 3361 + "der", 3362 + "generic-array", 3363 + "pkcs8", 3364 + "subtle", 3365 + "zeroize", 3366 + ] 3367 + 3368 + [[package]] 3073 3369 name = "security-framework" 3074 3370 version = "2.11.1" 3075 3371 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3169 3465 ] 3170 3466 3171 3467 [[package]] 3468 + name = "serde_ipld_dagjson" 3469 + version = "0.2.0" 3470 + source = "registry+https://github.com/rust-lang/crates.io-index" 3471 + checksum = "3359b47ba7f4a306ef5984665e10539e212e97217afa489437d533208eecda36" 3472 + dependencies = [ 3473 + "ipld-core", 3474 + "serde", 3475 + "serde_json", 3476 + ] 3477 + 3478 + [[package]] 3172 3479 name = "serde_json" 3173 - version = "1.0.141" 3480 + version = "1.0.142" 3174 3481 source = "registry+https://github.com/rust-lang/crates.io-index" 3175 - checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" 3482 + checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" 3176 3483 dependencies = [ 3177 3484 "itoa", 3178 3485 "memchr", ··· 3256 3563 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 3257 3564 3258 3565 [[package]] 3566 + name = "signal-hook-registry" 3567 + version = "1.4.5" 3568 + source = "registry+https://github.com/rust-lang/crates.io-index" 3569 + checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" 3570 + dependencies = [ 3571 + "libc", 3572 + ] 3573 + 3574 + [[package]] 3259 3575 name = "signature" 3260 3576 version = "2.2.0" 3261 3577 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3288 3604 3289 3605 [[package]] 3290 3606 name = "socket2" 3291 - version = "0.4.10" 3607 + version = "0.5.10" 3292 3608 source = "registry+https://github.com/rust-lang/crates.io-index" 3293 - checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" 3609 + checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" 3294 3610 dependencies = [ 3295 3611 "libc", 3296 - "winapi", 3612 + "windows-sys 0.52.0", 3297 3613 ] 3298 3614 3299 3615 [[package]] 3300 3616 name = "socket2" 3301 - version = "0.5.10" 3617 + version = "0.6.0" 3302 3618 source = "registry+https://github.com/rust-lang/crates.io-index" 3303 - checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" 3619 + checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" 3304 3620 dependencies = [ 3305 3621 "libc", 3306 - "windows-sys 0.52.0", 3622 + "windows-sys 0.59.0", 3307 3623 ] 3308 3624 3309 3625 [[package]] ··· 3344 3660 source = "registry+https://github.com/rust-lang/crates.io-index" 3345 3661 checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" 3346 3662 dependencies = [ 3347 - "base64", 3663 + "base64 0.22.1", 3348 3664 "bytes", 3349 3665 "crc", 3350 3666 "crossbeam-queue", ··· 3361 3677 "memchr", 3362 3678 "once_cell", 3363 3679 "percent-encoding", 3680 + "rustls 0.23.31", 3364 3681 "serde", 3365 3682 "serde_json", 3366 3683 "sha2", ··· 3372 3689 "tracing", 3373 3690 "url", 3374 3691 "uuid", 3692 + "webpki-roots 0.26.11", 3375 3693 ] 3376 3694 3377 3695 [[package]] ··· 3419 3737 checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" 3420 3738 dependencies = [ 3421 3739 "atoi", 3422 - "base64", 3740 + "base64 0.22.1", 3423 3741 "bitflags 2.9.1", 3424 3742 "byteorder", 3425 3743 "bytes", ··· 3463 3781 checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" 3464 3782 dependencies = [ 3465 3783 "atoi", 3466 - "base64", 3784 + "base64 0.22.1", 3467 3785 "bitflags 2.9.1", 3468 3786 "byteorder", 3469 3787 "crc", ··· 3641 3959 ] 3642 3960 3643 3961 [[package]] 3644 - name = "system-configuration" 3645 - version = "0.6.1" 3962 + name = "tagptr" 3963 + version = "0.2.0" 3646 3964 source = "registry+https://github.com/rust-lang/crates.io-index" 3647 - checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" 3648 - dependencies = [ 3649 - "bitflags 2.9.1", 3650 - "core-foundation 0.9.4", 3651 - "system-configuration-sys", 3652 - ] 3965 + checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" 3653 3966 3654 3967 [[package]] 3655 - name = "system-configuration-sys" 3656 - version = "0.6.0" 3657 - source = "registry+https://github.com/rust-lang/crates.io-index" 3658 - checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" 3968 + name = "teal-cli" 3969 + version = "0.1.0" 3659 3970 dependencies = [ 3660 - "core-foundation-sys", 3661 - "libc", 3971 + "anyhow", 3972 + "chrono", 3973 + "clap", 3974 + "colored", 3975 + "dirs", 3976 + "hex", 3977 + "k256", 3978 + "multibase", 3979 + "rand 0.8.5", 3980 + "serde", 3981 + "serde_json", 3982 + "tempfile", 3983 + "tokio", 3662 3984 ] 3663 - 3664 - [[package]] 3665 - name = "tagptr" 3666 - version = "0.2.0" 3667 - source = "registry+https://github.com/rust-lang/crates.io-index" 3668 - checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" 3669 3985 3670 3986 [[package]] 3671 3987 name = "tempfile" ··· 3789 4105 3790 4106 [[package]] 3791 4107 name = "tokio" 3792 - version = "1.46.1" 4108 + version = "1.47.1" 3793 4109 source = "registry+https://github.com/rust-lang/crates.io-index" 3794 - checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" 4110 + checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" 3795 4111 dependencies = [ 3796 4112 "backtrace", 3797 4113 "bytes", 3798 4114 "io-uring", 3799 4115 "libc", 3800 4116 "mio", 4117 + "parking_lot", 3801 4118 "pin-project-lite", 4119 + "signal-hook-registry", 3802 4120 "slab", 3803 - "socket2 0.5.10", 4121 + "socket2 0.6.0", 3804 4122 "tokio-macros", 3805 - "windows-sys 0.52.0", 4123 + "windows-sys 0.59.0", 3806 4124 ] 3807 4125 3808 4126 [[package]] ··· 3817 4135 ] 3818 4136 3819 4137 [[package]] 3820 - name = "tokio-native-tls" 3821 - version = "0.3.1" 3822 - source = "registry+https://github.com/rust-lang/crates.io-index" 3823 - checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" 3824 - dependencies = [ 3825 - "native-tls", 3826 - "tokio", 3827 - ] 3828 - 3829 - [[package]] 3830 - name = "tokio-retry" 3831 - version = "0.3.0" 4138 + name = "tokio-rustls" 4139 + version = "0.24.1" 3832 4140 source = "registry+https://github.com/rust-lang/crates.io-index" 3833 - checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" 4141 + checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" 3834 4142 dependencies = [ 3835 - "pin-project", 3836 - "rand 0.8.5", 4143 + "rustls 0.21.12", 3837 4144 "tokio", 3838 4145 ] 3839 4146 ··· 3843 4150 source = "registry+https://github.com/rust-lang/crates.io-index" 3844 4151 checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" 3845 4152 dependencies = [ 3846 - "rustls", 4153 + "rustls 0.23.31", 3847 4154 "tokio", 3848 4155 ] 3849 4156 ··· 3860 4167 3861 4168 [[package]] 3862 4169 name = "tokio-tungstenite" 4170 + version = "0.20.1" 4171 + source = "registry+https://github.com/rust-lang/crates.io-index" 4172 + checksum = "212d5dcb2a1ce06d81107c3d0ffa3121fe974b73f068c8282cb1c32328113b6c" 4173 + dependencies = [ 4174 + "futures-util", 4175 + "log", 4176 + "rustls 0.21.12", 4177 + "rustls-native-certs 0.6.3", 4178 + "tokio", 4179 + "tokio-rustls 0.24.1", 4180 + "tungstenite 0.20.1", 4181 + "webpki-roots 0.25.4", 4182 + ] 4183 + 4184 + [[package]] 4185 + name = "tokio-tungstenite" 3863 4186 version = "0.24.0" 3864 4187 source = "registry+https://github.com/rust-lang/crates.io-index" 3865 4188 checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9" 3866 4189 dependencies = [ 3867 4190 "futures-util", 3868 4191 "log", 4192 + "rustls 0.23.31", 4193 + "rustls-pki-types", 3869 4194 "tokio", 3870 - "tungstenite", 4195 + "tokio-rustls 0.26.2", 4196 + "tungstenite 0.24.0", 4197 + "webpki-roots 0.26.11", 3871 4198 ] 3872 4199 3873 4200 [[package]] ··· 3934 4261 "bitflags 2.9.1", 3935 4262 "bytes", 3936 4263 "futures-util", 3937 - "http", 4264 + "http 1.3.1", 3938 4265 "http-body", 3939 4266 "iri-string", 3940 4267 "pin-project-lite", ··· 4036 4363 4037 4364 [[package]] 4038 4365 name = "tungstenite" 4366 + version = "0.20.1" 4367 + source = "registry+https://github.com/rust-lang/crates.io-index" 4368 + checksum = "9e3dac10fd62eaf6617d3a904ae222845979aec67c615d1c842b4002c7666fb9" 4369 + dependencies = [ 4370 + "byteorder", 4371 + "bytes", 4372 + "data-encoding", 4373 + "http 0.2.12", 4374 + "httparse", 4375 + "log", 4376 + "rand 0.8.5", 4377 + "rustls 0.21.12", 4378 + "sha1", 4379 + "thiserror 1.0.69", 4380 + "url", 4381 + "utf-8", 4382 + ] 4383 + 4384 + [[package]] 4385 + name = "tungstenite" 4039 4386 version = "0.24.0" 4040 4387 source = "registry+https://github.com/rust-lang/crates.io-index" 4041 4388 checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a" ··· 4043 4390 "byteorder", 4044 4391 "bytes", 4045 4392 "data-encoding", 4046 - "http", 4393 + "http 1.3.1", 4047 4394 "httparse", 4048 4395 "log", 4049 4396 "rand 0.8.5", 4397 + "rustls 0.23.31", 4398 + "rustls-pki-types", 4050 4399 "sha1", 4051 4400 "thiserror 1.0.69", 4052 4401 "utf-8", ··· 4065 4414 "atrium-api", 4066 4415 "atrium-xrpc", 4067 4416 "chrono", 4068 - "http", 4417 + "http 1.3.1", 4069 4418 "ipld-core", 4070 4419 "langtag", 4071 4420 "regex", ··· 4074 4423 "serde_ipld_dagcbor", 4075 4424 "serde_json", 4076 4425 "thiserror 2.0.12", 4077 - "uuid", 4078 4426 ] 4079 4427 4080 4428 [[package]] ··· 4331 4679 ] 4332 4680 4333 4681 [[package]] 4682 + name = "wasm-streams" 4683 + version = "0.4.2" 4684 + source = "registry+https://github.com/rust-lang/crates.io-index" 4685 + checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" 4686 + dependencies = [ 4687 + "futures-util", 4688 + "js-sys", 4689 + "wasm-bindgen", 4690 + "wasm-bindgen-futures", 4691 + "web-sys", 4692 + ] 4693 + 4694 + [[package]] 4334 4695 name = "web-sys" 4335 4696 version = "0.3.77" 4336 4697 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4348 4709 dependencies = [ 4349 4710 "js-sys", 4350 4711 "wasm-bindgen", 4712 + ] 4713 + 4714 + [[package]] 4715 + name = "webpki-roots" 4716 + version = "0.25.4" 4717 + source = "registry+https://github.com/rust-lang/crates.io-index" 4718 + checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" 4719 + 4720 + [[package]] 4721 + name = "webpki-roots" 4722 + version = "0.26.11" 4723 + source = "registry+https://github.com/rust-lang/crates.io-index" 4724 + checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" 4725 + dependencies = [ 4726 + "webpki-roots 1.0.2", 4727 + ] 4728 + 4729 + [[package]] 4730 + name = "webpki-roots" 4731 + version = "1.0.2" 4732 + source = "registry+https://github.com/rust-lang/crates.io-index" 4733 + checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" 4734 + dependencies = [ 4735 + "rustls-pki-types", 4351 4736 ] 4352 4737 4353 4738 [[package]] ··· 4523 4908 ] 4524 4909 4525 4910 [[package]] 4526 - name = "windows-registry" 4527 - version = "0.5.3" 4528 - source = "registry+https://github.com/rust-lang/crates.io-index" 4529 - checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" 4530 - dependencies = [ 4531 - "windows-link", 4532 - "windows-result 0.3.4", 4533 - "windows-strings", 4534 - ] 4535 - 4536 - [[package]] 4537 4911 name = "windows-result" 4538 4912 version = "0.1.2" 4539 4913 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4593 4967 source = "registry+https://github.com/rust-lang/crates.io-index" 4594 4968 checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 4595 4969 dependencies = [ 4596 - "windows-targets 0.53.2", 4970 + "windows-targets 0.53.3", 4597 4971 ] 4598 4972 4599 4973 [[package]] ··· 4629 5003 4630 5004 [[package]] 4631 5005 name = "windows-targets" 4632 - version = "0.53.2" 5006 + version = "0.53.3" 4633 5007 source = "registry+https://github.com/rust-lang/crates.io-index" 4634 - checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" 5008 + checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" 4635 5009 dependencies = [ 5010 + "windows-link", 4636 5011 "windows_aarch64_gnullvm 0.53.0", 4637 5012 "windows_aarch64_msvc 0.53.0", 4638 5013 "windows_i686_gnu 0.53.0",

+30 -8

Cargo.toml

··· 1 1 [workspace] 2 - members = ["apps/aqua", "services/cadet", "services/rocketman"] 2 + members = ["apps/aqua", "services/cadet", "tools/teal-cli"] 3 + default-members = ["services/types"] 3 4 resolver = "2" 4 5 5 6 [workspace.dependencies] 6 7 # Shared dependencies 7 - tokio = { version = "1.0", features = ["rt-multi-thread", "macros"] } 8 + tokio = { version = "1.0", features = [ 9 + "rt-multi-thread", 10 + "macros", 11 + "time", 12 + "net", 13 + "sync", 14 + ] } 8 15 axum = { version = "0.8", features = ["macros"] } 9 16 tower-http = { version = "0.6", features = ["cors"] } 10 - sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "uuid"] } 17 + sqlx = { version = "0.8", features = [ 18 + "runtime-tokio", 19 + "postgres", 20 + "uuid", 21 + "tls-rustls", 22 + ] } 11 23 serde = { version = "1.0", features = ["derive"] } 12 24 anyhow = "1.0" 13 25 serde_json = "1.0" 14 26 tracing = "0.1" 15 27 tracing-subscriber = "0.3" 16 28 metrics = "0.23" 17 - reqwest = { version = "0.12", features = ["json"] } 29 + reqwest = { version = "0.12", default-features = false, features = [ 30 + "json", 31 + "rustls-tls", 32 + "stream", 33 + "gzip", 34 + ] } 18 35 url = "2.5" 19 36 rand = "0.8" 20 37 flume = "0.11" 21 38 async-trait = "0.1" 22 39 time = "0.3" 23 40 dotenvy = "0.15" 24 - tokio-tungstenite = "0.24" 41 + tokio-tungstenite = { version = "*", default-features = false, features = [ 42 + "rustls-tls-webpki-roots", 43 + "connect", 44 + "handshake", 45 + ] } 25 46 atrium-api = "0.25" 26 47 chrono = "0.4" 27 48 uuid = { version = "1.0", features = ["v4", "serde"] } 28 49 types = { path = "services/types" } 29 - rocketman = { path = "services/rocketman" } 50 + rocketman = "0.2.3" 30 51 31 52 # CAR and IPLD dependencies 32 - iroh-car = "0.4" 53 + iroh-car = "0.5" 33 54 libipld = { version = "0.16", features = ["dag-cbor", "dag-json"] } 34 55 cid = "0.11" 35 56 base64 = "0.22" 57 + atmst = "0.0.1" 36 58 37 59 # Redis for job queues and caching 38 - redis = { version = "0.24", features = ["tokio-comp", "connection-manager"] } 60 + redis = { version = "0.32", features = ["tokio-comp", "connection-manager"] }

+18

Cross.toml

··· 1 + [build.env] 2 + passthrough = [ 3 + "CARGO_HOME", 4 + "CARGO_TARGET_DIR", 5 + "SQLX_OFFLINE", 6 + "PKG_CONFIG_ALLOW_CROSS", 7 + ] 8 + 9 + [target.aarch64-unknown-linux-gnu] 10 + image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main" 11 + 12 + [target.aarch64-unknown-linux-gnu.env] 13 + passthrough = [ 14 + "CARGO_HOME", 15 + "CARGO_TARGET_DIR", 16 + "SQLX_OFFLINE", 17 + "PKG_CONFIG_ALLOW_CROSS", 18 + ]

+14 -9

README.md

··· 29 29 # Install all dependencies (Node.js and Rust) 30 30 pnpm install 31 31 32 + # clone submodules 33 + git submodule update --init --recursive 34 + 32 35 # Set up environment configuration 33 36 cp apps/aqua/.env.example apps/aqua/.env 34 37 ··· 90 93 - **Format**: `YYYYMMDDHHMMSS_description.sql` (timestamped SQL files) 91 94 - **Type**: Forward-only SQL migrations managed by SQLx 92 95 93 - #### Database Schema 94 - 95 - The database includes tables for: 96 - - **Music data**: `artists`, `releases`, `recordings`, `plays` 97 - - **User data**: `profiles`, `statii` (status records), `featured_items` 98 - - **CAR imports**: `car_import_requests`, `car_blocks`, `car_extracted_records` 99 - - **Analytics**: Materialized views for play counts and top charts 100 - 101 96 ## Development 102 97 103 98 To start the development server run: ··· 106 101 turbo dev --filter=@teal/aqua 107 102 ``` 108 103 109 - Open http://localhost:3000/ with your browser to see the home page. You will need to login with Bluesky to test the posting functionality of the app. Note: if the redirect back to the app after you login isn't working correctly, you may need to replace the `127.0.0.1` with `localhost`. 104 + Open http://localhost:3000/ with your browser to see the home page. Note: if the redirect back to the app after you login isn't working correctly, you may need to replace the `127.0.0.1` with `localhost`, or you may need to set up a publicly accessible endpoint for the app to post to (see below). 110 105 111 106 ### Running the full stack in docker for development 112 107 ··· 153 148 154 149 # Show lexicon change impact 155 150 pnpm lex:diff 151 + ``` 152 + 153 + # Updating Vendored Lexicons 154 + To update vendored lexicons (anything that's not under fm.teal), follow these steps: 155 + ```bash 156 + cd vendor/atproto 157 + git pull origin main 158 + cd ../.. 159 + git add vendor/atproto 160 + git commit -m "Update atproto lexicons to latest" 156 161 ``` 157 162 158 163 See [`tools/lexicon-cli/README.md`](tools/lexicon-cli/README.md) for detailed documentation.

+24 -14

apps/amethyst/Dockerfile

··· 18 18 COPY packages/lexicons/ ./packages/lexicons/ 19 19 COPY packages/tsconfig/ ./packages/tsconfig/ 20 20 21 + # Copy lexicons source data 22 + COPY lexicons/ ./lexicons/ 23 + 21 24 # Copy the aqua app 22 25 COPY apps/amethyst/ ./apps/amethyst/ 23 26 24 27 # Copy .env 25 28 COPY ../../.env ./apps/amethyst/.env 26 29 27 - # Build the aqua app 30 + # Install dependencies and generate lexicons 31 + RUN cd tools/lexicon-cli && pnpm build 32 + 33 + # Generate lexicons before building amethyst 34 + RUN pnpm lex:gen-server 35 + 36 + RUN pnpm install 37 + 38 + # Build the amethyst app 28 39 WORKDIR /app/apps/amethyst 29 - RUN pnpm install 30 40 RUN pnpm run build:web 31 41 32 42 #create the client-json 33 43 RUN echo '{ \ 34 - "redirect_uris": ["https://'"${CLIENT_ADDRESS}"'/auth/callback"], \ 35 - "response_types": ["code"], \ 36 - "grant_types": ["authorization_code", "refresh_token"], \ 37 - "scope": "atproto transition:generic", \ 38 - "token_endpoint_auth_method": "none", \ 39 - "application_type": "web", \ 40 - "client_id": "https://'"${CLIENT_ADDRESS}"'/client-metadata.json", \ 41 - "client_name": "teal", \ 42 - "client_uri": "https://'"${CLIENT_ADDRESS}"'", \ 43 - "dpop_bound_access_tokens": true \ 44 - }' > /app/client-metadata.json 44 + "redirect_uris": ["https://'"${CLIENT_ADDRESS}"'/auth/callback"], \ 45 + "response_types": ["code"], \ 46 + "grant_types": ["authorization_code", "refresh_token"], \ 47 + "scope": "atproto transition:generic", \ 48 + "token_endpoint_auth_method": "none", \ 49 + "application_type": "web", \ 50 + "client_id": "https://'"${CLIENT_ADDRESS}"'/client-metadata.json", \ 51 + "client_name": "teal", \ 52 + "client_uri": "https://'"${CLIENT_ADDRESS}"'", \ 53 + "dpop_bound_access_tokens": true \ 54 + }' > /app/client-metadata.json 45 55 46 56 47 57 FROM caddy:2.1.0-alpine AS caddy ··· 50 60 EXPOSE 443/udp 51 61 COPY /apps/amethyst/Caddyfile /etc/caddy/Caddyfile 52 62 COPY --from=builder /app/apps/amethyst/build /srv 53 - COPY --from=builder /app/client-metadata.json /srv/client-metadata.json 63 + COPY --from=builder /app/client-metadata.json /srv/client-metadata.json

+2 -2

apps/aqua/Cargo.toml

··· 19 19 tracing-subscriber.workspace = true 20 20 sqlx = { workspace = true, features = ["time"] } 21 21 dotenvy.workspace = true 22 - 23 22 types.workspace = true 24 - chrono = "0.4.41" 23 + chrono.workspace = true 25 24 26 25 # CAR import functionality 27 26 iroh-car.workspace = true ··· 29 28 reqwest.workspace = true 30 29 url.workspace = true 31 30 clap = { version = "4.0", features = ["derive"] } 31 + atmst.workspace = true 32 32 33 33 # Redis for job queues 34 34 redis.workspace = true

+20

apps/aqua/Cross.toml

··· 1 + [build.env] 2 + passthrough = [ 3 + "CARGO_HOME", 4 + "CARGO_TARGET_DIR", 5 + "SQLX_OFFLINE", 6 + "PKG_CONFIG_ALLOW_CROSS", 7 + ] 8 + 9 + [target.aarch64-unknown-linux-gnu] 10 + image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main" 11 + 12 + [target.aarch64-unknown-linux-gnu.env] 13 + passthrough = ["CARGO_HOME", "CARGO_TARGET_DIR", "SQLX_OFFLINE"] 14 + # Allow cross-compilation of native dependencies 15 + PKG_CONFIG_ALLOW_CROSS = "1" 16 + # Use static linking to reduce runtime dependencies 17 + RUSTFLAGS = "-C target-feature=+crt-static -C link-arg=-s" 18 + # Disable problematic features that might require OpenSSL 19 + CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc" 20 + CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"

+90

apps/aqua/Dockerfile

··· 1 + # Docker build args for cross-platform builds (must be at the top) 2 + ARG TARGETPLATFORM 3 + ARG BUILDPLATFORM 4 + ARG TARGETARCH 5 + ARG TARGETOS 6 + 7 + FROM --platform=${BUILDPLATFORM} rust:latest AS buildah 8 + 9 + # Create appuser 10 + ENV USER=app 11 + ENV UID=10001 12 + 13 + RUN adduser \ 14 + --disabled-password \ 15 + --gecos "" \ 16 + --home "/nonexistent" \ 17 + --shell "/sbin/nologin" \ 18 + --no-create-home \ 19 + --uid "${UID}" \ 20 + "${USER}" 21 + 22 + WORKDIR /buildah 23 + 24 + # Re-declare ARGs after FROM (Docker requirement) 25 + ARG TARGETPLATFORM 26 + ARG BUILDPLATFORM 27 + ARG TARGETARCH 28 + ARG TARGETOS 29 + 30 + # Debug platform detection before copying files 31 + RUN echo "DEBUG Before copy: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH BUILDPLATFORM=$BUILDPLATFORM" 32 + 33 + COPY ./ . 34 + 35 + # Setup lexicons and install dependencies 36 + RUN ./scripts/setup-lexicons.sh 37 + 38 + # Install Node.js and pnpm for lexicon generation 39 + RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/* 40 + RUN npm install -g pnpm 41 + 42 + # Install dependencies and generate lexicons 43 + RUN pnpm install 44 + RUN cd tools/lexicon-cli && pnpm build 45 + RUN pnpm lex:gen --rust-only 46 + 47 + # Install cross-compilation toolchains 48 + RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu 49 + 50 + # Enable ARM64 architecture and install cross-compilation tools 51 + RUN dpkg --add-architecture arm64 && \ 52 + apt-get update && \ 53 + apt-get install -y \ 54 + gcc-aarch64-linux-gnu \ 55 + libssl-dev:arm64 \ 56 + libssl-dev \ 57 + pkg-config \ 58 + && rm -rf /var/lib/apt/lists/* 59 + 60 + # Set up cross-compilation environment 61 + ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc 62 + ENV PKG_CONFIG_ALLOW_CROSS=1 63 + ENV PKG_CONFIG_PATH_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu/pkgconfig 64 + ENV OPENSSL_DIR_aarch64_unknown_linux_gnu=/usr 65 + ENV OPENSSL_LIB_DIR_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu 66 + ENV OPENSSL_INCLUDE_DIR_aarch64_unknown_linux_gnu=/usr/include/openssl 67 + 68 + 69 + # Debug platform detection and run build 70 + RUN . ./target.sh && \ 71 + touch apps/aqua/src/main.rs && \ 72 + echo "Building for $TARGET_ARCH" && \ 73 + cargo build --release --target $RUST_TARGET --package aqua && \ 74 + cp target/$RUST_TARGET/release/aqua target/aqua 75 + 76 + FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc 77 + 78 + # Import from builder. 79 + COPY --from=buildah /etc/passwd /etc/passwd 80 + COPY --from=buildah /etc/group /etc/group 81 + 82 + WORKDIR /app 83 + 84 + # Copy our build 85 + COPY --from=buildah /buildah/target/aqua ./ 86 + 87 + # Use an unprivileged user. 88 + USER app:app 89 + 90 + CMD ["/app/aqua"]

+62

apps/aqua/examples/did_demo.rs

··· 1 + use serde_json::json; 2 + 3 + /// Generate a DID document for did:web 4 + fn generate_did_document(host: &str) -> serde_json::Value { 5 + json!({ 6 + "@context": [ 7 + "https://www.w3.org/ns/did/v1", 8 + "https://w3id.org/security/multikey/v1", 9 + "https://w3id.org/security/suites/secp256k1-2019/v1" 10 + ], 11 + "id": format!("did:web:{}", host), 12 + "alsoKnownAs": [ 13 + format!("at://{}", host) 14 + ], 15 + "service": [ 16 + { 17 + "id": "#bsky_fg", 18 + "type": "BskyFeedGenerator", 19 + "serviceEndpoint": format!("https://{}", host) 20 + }, 21 + { 22 + "id": "#atproto_pds", 23 + "type": "AtprotoPersonalDataServer", 24 + "serviceEndpoint": format!("https://{}", host) 25 + } 26 + ], 27 + "verificationMethod": [ 28 + { 29 + "id": format!("did:web:{}#atproto", host), 30 + "type": "Multikey", 31 + "controller": format!("did:web:{}", host), 32 + "publicKeyMultibase": "z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK" 33 + } 34 + ] 35 + }) 36 + } 37 + 38 + fn main() { 39 + println!("DID Document Generation Demo"); 40 + println!("===========================\n"); 41 + 42 + let test_hosts = vec![ 43 + "localhost:3000", 44 + "bsky.social", 45 + "my-atproto-service.com", 46 + "example.org:8080", 47 + ]; 48 + 49 + for host in test_hosts { 50 + println!("DID Document for host: {}", host); 51 + println!("URL: https://{}/.well-known/did.json", host); 52 + println!("DID: did:web:{}", host); 53 + println!(); 54 + 55 + let did_doc = generate_did_document(host); 56 + println!("{}", serde_json::to_string_pretty(&did_doc).unwrap()); 57 + println!("\n{}\n", "=".repeat(80)); 58 + } 59 + 60 + println!("The well-known endpoint /.well-known/did.json will serve this JSON structure"); 61 + println!("when accessed via HTTP GET request to your Aqua server."); 62 + }

+275 -82

apps/aqua/src/api/mod.rs

··· 1 + use anyhow::Result; 1 2 use axum::{Extension, Json, extract::Multipart, extract::Path, http::StatusCode}; 2 3 use serde::{Deserialize, Serialize}; 3 - use tracing::{info, error}; 4 - use anyhow::Result; 5 - use uuid; 6 - 7 - use sys_info; 4 + use serde_json::{Value, json}; 5 + use tracing::{error, info}; 8 6 9 7 use crate::ctx::Context; 10 8 use crate::redis_client::RedisClient; 9 + use crate::types::CarImportJobStatus; 11 10 12 11 #[derive(Debug, Serialize, Deserialize)] 13 12 pub struct MetaOsInfo { ··· 61 60 /// Get CAR import job status 62 61 pub async fn get_car_import_job_status( 63 62 Path(job_id): Path<String>, 64 - ) -> Result<Json<types::jobs::CarImportJobStatus>, (StatusCode, Json<ErrorResponse>)> { 65 - use types::jobs::queue_keys; 66 - 63 + ) -> Result<Json<CarImportJobStatus>, (StatusCode, Json<ErrorResponse>)> { 64 + use crate::types::queue_keys; 65 + 67 66 info!("Getting status for job: {}", job_id); 68 - 67 + 69 68 // Parse job ID 70 69 let job_uuid = match uuid::Uuid::parse_str(&job_id) { 71 70 Ok(uuid) => uuid, ··· 77 76 return Err((StatusCode::BAD_REQUEST, Json(error_response))); 78 77 } 79 78 }; 80 - 79 + 81 80 // Connect to Redis 82 - let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 81 + let redis_url = 82 + std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 83 83 let redis_client = match RedisClient::new(&redis_url) { 84 84 Ok(client) => client, 85 85 Err(e) => { ··· 91 91 return Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response))); 92 92 } 93 93 }; 94 - 94 + 95 95 // Get job status 96 - match redis_client.get_job_status(&queue_keys::job_status_key(&job_uuid)).await { 97 - Ok(Some(status_data)) => { 98 - match serde_json::from_str::<types::jobs::CarImportJobStatus>(&status_data) { 99 - Ok(status) => Ok(Json(status)), 100 - Err(e) => { 101 - error!("Failed to parse job status: {}", e); 102 - let error_response = ErrorResponse { 103 - error: "Failed to parse job status".to_string(), 104 - details: Some(e.to_string()), 105 - }; 106 - Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response))) 107 - } 96 + match redis_client 97 + .get_job_status(&queue_keys::job_status_key(&job_uuid)) 98 + .await 99 + { 100 + Ok(Some(status_data)) => match serde_json::from_str::<CarImportJobStatus>(&status_data) { 101 + Ok(status) => Ok(Json(status)), 102 + Err(e) => { 103 + error!("Failed to parse job status: {}", e); 104 + let error_response = ErrorResponse { 105 + error: "Failed to parse job status".to_string(), 106 + details: Some(e.to_string()), 107 + }; 108 + Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response))) 108 109 } 109 - } 110 + }, 110 111 Ok(None) => { 111 112 let error_response = ErrorResponse { 112 113 error: "Job not found".to_string(), ··· 165 166 mut multipart: Multipart, 166 167 ) -> Result<Json<CarImportResponse>, StatusCode> { 167 168 info!("Received CAR file upload request"); 168 - 169 + 169 170 let mut car_data: Option<Vec<u8>> = None; 170 171 let mut import_id: Option<String> = None; 171 172 let mut description: Option<String> = None; 172 - 173 + 173 174 // Process multipart form data 174 - while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? { 175 + while let Some(field) = multipart 176 + .next_field() 177 + .await 178 + .map_err(|_| StatusCode::BAD_REQUEST)? 179 + { 175 180 let name = field.name().unwrap_or("").to_string(); 176 - 181 + 177 182 match name.as_str() { 178 183 "car_file" => { 179 184 let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?; ··· 192 197 } 193 198 } 194 199 } 195 - 200 + 196 201 let car_bytes = car_data.ok_or(StatusCode::BAD_REQUEST)?; 197 202 let final_import_id = import_id.unwrap_or_else(|| { 198 203 // Generate a unique import ID 199 204 format!("car-import-{}", chrono::Utc::now().timestamp()) 200 205 }); 201 - 206 + 202 207 // Validate CAR file format 203 208 match validate_car_file(&car_bytes).await { 204 209 Ok(_) => { 205 - info!("CAR file validation successful for import {}", final_import_id); 210 + info!( 211 + "CAR file validation successful for import {}", 212 + final_import_id 213 + ); 206 214 } 207 215 Err(e) => { 208 216 error!("CAR file validation failed: {}", e); 209 217 return Err(StatusCode::BAD_REQUEST); 210 218 } 211 219 } 212 - 220 + 213 221 // Store CAR import request in database for processing 214 - match store_car_import_request(&ctx, &final_import_id, &car_bytes, description.as_deref()).await { 222 + match store_car_import_request(&ctx, &final_import_id, &car_bytes, description.as_deref()).await 223 + { 215 224 Ok(_) => { 216 - info!("CAR import request stored successfully: {}", final_import_id); 225 + info!( 226 + "CAR import request stored successfully: {}", 227 + final_import_id 228 + ); 217 229 Ok(Json(CarImportResponse { 218 230 import_id: final_import_id, 219 231 status: "queued".to_string(), ··· 232 244 axum::extract::Path(import_id): axum::extract::Path<String>, 233 245 ) -> Result<Json<CarImportResponse>, StatusCode> { 234 246 match get_import_status(&ctx, &import_id).await { 235 - Ok(Some(status)) => { 236 - Ok(Json(CarImportResponse { 237 - import_id, 238 - status: status.status, 239 - message: status.message, 240 - })) 241 - } 247 + Ok(Some(status)) => Ok(Json(CarImportResponse { 248 + import_id, 249 + status: status.status, 250 + message: status.message, 251 + })), 242 252 Ok(None) => Err(StatusCode::NOT_FOUND), 243 253 Err(e) => { 244 254 error!("Failed to get import status: {}", e); ··· 248 258 } 249 259 250 260 async fn validate_car_file(car_data: &[u8]) -> Result<()> { 251 - use std::io::Cursor; 252 261 use iroh_car::CarReader; 253 - 262 + use std::io::Cursor; 263 + 254 264 let cursor = Cursor::new(car_data); 255 265 let reader = CarReader::new(cursor).await?; 256 266 let header = reader.header(); 257 - 267 + 258 268 // Basic validation - ensure we have at least one root CID 259 269 if header.roots().is_empty() { 260 270 return Err(anyhow::anyhow!("CAR file has no root CIDs")); 261 271 } 262 - 272 + 263 273 info!("CAR file validated: {} root CIDs", header.roots().len()); 264 274 Ok(()) 265 275 } ··· 293 303 Extension(ctx): Extension<Context>, 294 304 Json(request): Json<FetchCarRequest>, 295 305 ) -> Result<Json<FetchCarResponse>, (StatusCode, Json<ErrorResponse>)> { 296 - info!("Received CAR fetch request for user: {}", request.user_identifier); 297 - 306 + info!( 307 + "Received CAR fetch request for user: {}", 308 + request.user_identifier 309 + ); 310 + 298 311 // Resolve user identifier to DID and PDS 299 312 let (user_did, pds_host) = match resolve_user_to_pds(&request.user_identifier).await { 300 313 Ok(result) => result, ··· 302 315 error!("Failed to resolve user {}: {}", request.user_identifier, e); 303 316 let error_response = ErrorResponse { 304 317 error: "Failed to resolve user".to_string(), 305 - details: if request.debug.unwrap_or(false) { Some(e.to_string()) } else { None }, 318 + details: if request.debug.unwrap_or(false) { 319 + Some(e.to_string()) 320 + } else { 321 + None 322 + }, 306 323 }; 307 324 return Err((StatusCode::BAD_REQUEST, Json(error_response))); 308 325 } 309 326 }; 310 - 311 - info!("Resolved {} to DID {} on PDS {}", request.user_identifier, user_did, pds_host); 312 - 327 + 328 + info!( 329 + "Resolved {} to DID {} on PDS {}", 330 + request.user_identifier, user_did, pds_host 331 + ); 332 + 313 333 // Generate import ID 314 - let import_id = format!("pds-fetch-{}-{}", 315 - user_did.replace(":", "-"), 334 + let import_id = format!( 335 + "pds-fetch-{}-{}", 336 + user_did.replace(":", "-"), 316 337 chrono::Utc::now().timestamp() 317 338 ); 318 - 339 + 319 340 // Fetch CAR file from PDS 320 341 match fetch_car_from_pds(&pds_host, &user_did, request.since.as_deref()).await { 321 342 Ok(car_data) => { 322 - info!("Successfully fetched CAR file for {} ({} bytes)", user_did, car_data.len()); 323 - 343 + info!( 344 + "Successfully fetched CAR file for {} ({} bytes)", 345 + user_did, 346 + car_data.len() 347 + ); 348 + 324 349 // Store the fetched CAR file for processing 325 - let description = Some(format!("Fetched from PDS {} for user {}", pds_host, request.user_identifier)); 326 - match store_car_import_request(&ctx, &import_id, &car_data, description.as_deref()).await { 350 + let description = Some(format!( 351 + "Fetched from PDS {} for user {}", 352 + pds_host, request.user_identifier 353 + )); 354 + match store_car_import_request(&ctx, &import_id, &car_data, description.as_deref()) 355 + .await 356 + { 327 357 Ok(_) => { 328 358 info!("CAR import request stored successfully: {}", import_id); 329 359 Ok(Json(FetchCarResponse { ··· 371 401 372 402 /// Resolve a handle to a DID using com.atproto.identity.resolveHandle 373 403 async fn resolve_handle_to_did(handle: &str) -> Result<String> { 374 - let url = format!("https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", handle); 375 - 404 + let url = format!( 405 + "https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", 406 + handle 407 + ); 408 + 376 409 let response = reqwest::get(&url).await?; 377 410 if !response.status().is_success() { 378 - return Err(anyhow::anyhow!("Failed to resolve handle {}: {}", handle, response.status())); 411 + return Err(anyhow::anyhow!( 412 + "Failed to resolve handle {}: {}", 413 + handle, 414 + response.status() 415 + )); 379 416 } 380 - 417 + 381 418 let json: serde_json::Value = response.json().await?; 382 - let did = json["did"].as_str() 419 + let did = json["did"] 420 + .as_str() 383 421 .ok_or_else(|| anyhow::anyhow!("No DID found in response for handle {}", handle))?; 384 - 422 + 385 423 Ok(did.to_string()) 386 424 } 387 425 ··· 390 428 // For DID:plc, use the PLC directory 391 429 if did.starts_with("did:plc:") { 392 430 let url = format!("https://plc.directory/{}", did); 393 - 431 + 394 432 let response = reqwest::get(&url).await?; 395 433 if !response.status().is_success() { 396 - return Err(anyhow::anyhow!("Failed to resolve DID {}: {}", did, response.status())); 434 + return Err(anyhow::anyhow!( 435 + "Failed to resolve DID {}: {}", 436 + did, 437 + response.status() 438 + )); 397 439 } 398 - 440 + 399 441 let doc: serde_json::Value = response.json().await?; 400 - 442 + 401 443 // Find the PDS service endpoint 402 444 if let Some(services) = doc["service"].as_array() { 403 445 for service in services { ··· 405 447 if let Some(endpoint) = service["serviceEndpoint"].as_str() { 406 448 // Extract hostname from URL 407 449 let url = url::Url::parse(endpoint)?; 408 - let host = url.host_str() 409 - .ok_or_else(|| anyhow::anyhow!("Invalid PDS endpoint URL: {}", endpoint))?; 450 + let host = url.host_str().ok_or_else(|| { 451 + anyhow::anyhow!("Invalid PDS endpoint URL: {}", endpoint) 452 + })?; 410 453 return Ok(host.to_string()); 411 454 } 412 455 } 413 456 } 414 457 } 415 - 416 - Err(anyhow::anyhow!("No PDS service found in DID document for {}", did)) 458 + 459 + Err(anyhow::anyhow!( 460 + "No PDS service found in DID document for {}", 461 + did 462 + )) 417 463 } else { 418 464 Err(anyhow::anyhow!("Unsupported DID method: {}", did)) 419 465 } ··· 421 467 422 468 /// Fetch CAR file from PDS using com.atproto.sync.getRepo 423 469 pub async fn fetch_car_from_pds(pds_host: &str, did: &str, since: Option<&str>) -> Result<Vec<u8>> { 424 - let mut url = format!("https://{}/xrpc/com.atproto.sync.getRepo?did={}", pds_host, did); 425 - 470 + let mut url = format!( 471 + "https://{}/xrpc/com.atproto.sync.getRepo?did={}", 472 + pds_host, did 473 + ); 474 + 426 475 if let Some(since_rev) = since { 427 476 url.push_str(&format!("&since={}", since_rev)); 428 477 } 429 - 478 + 430 479 info!("Fetching CAR file from: {}", url); 431 - 480 + 432 481 let response = reqwest::get(&url).await?; 433 482 if !response.status().is_success() { 434 - return Err(anyhow::anyhow!("Failed to fetch CAR from PDS {}: {}", pds_host, response.status())); 483 + return Err(anyhow::anyhow!( 484 + "Failed to fetch CAR from PDS {}: {}", 485 + pds_host, 486 + response.status() 487 + )); 435 488 } 436 - 489 + 437 490 // Verify content type 438 - let content_type = response.headers() 491 + let content_type = response 492 + .headers() 439 493 .get("content-type") 440 494 .and_then(|h| h.to_str().ok()) 441 495 .unwrap_or(""); 442 - 496 + 443 497 if !content_type.contains("application/vnd.ipld.car") { 444 498 return Err(anyhow::anyhow!("Unexpected content type: {}", content_type)); 445 499 } 446 - 500 + 447 501 let car_data = response.bytes().await?; 448 502 Ok(car_data.to_vec()) 449 503 } 504 + 505 + /// Generate a DID document for did:web 506 + fn generate_did_document(host: &str, pubkey: &str) -> Value { 507 + json!({ 508 + "@context": [ 509 + "https://www.w3.org/ns/did/v1", 510 + "https://w3id.org/security/multikey/v1", 511 + "https://w3id.org/security/suites/secp256k1-2019/v1" 512 + ], 513 + "id": format!("did:web:{}", host), 514 + "alsoKnownAs": [ 515 + format!("at://{}", host) 516 + ], 517 + "service": [ 518 + { 519 + "id": "#bsky_fg", 520 + "type": "BskyFeedGenerator", 521 + "serviceEndpoint": format!("https://{}", host) 522 + }, 523 + { 524 + "id": "#atproto_pds", 525 + "type": "AtprotoPersonalDataServer", 526 + "serviceEndpoint": format!("https://{}", host) 527 + } 528 + ], 529 + "verificationMethod": [ 530 + { 531 + "id": format!("did:web:{}#atproto", host), 532 + "type": "Multikey", 533 + "controller": format!("did:web:{}", host), 534 + "publicKeyMultibase": pubkey 535 + } 536 + ] 537 + }) 538 + } 539 + 540 + /// Handler for /.well-known/did.json endpoint 541 + pub async fn get_did_document( 542 + Extension(_ctx): Extension<Context>, 543 + ) -> impl axum::response::IntoResponse { 544 + // Get the host from environment variable or use default 545 + let host = std::env::var("APP_HOST") 546 + .or_else(|_| std::env::var("HOST")) 547 + .unwrap_or_else(|_| "localhost:3000".to_string()); 548 + 549 + // get pubkey from environment variable or use default 550 + let pubkey = std::env::var("TEST_PUBKEY").unwrap_or_else(|_| { 551 + "z6Mkw5f8g3h4j5k6l7m8n9o0p1q2r3s4t5u6v7w8x9y0z1a2b3c4d5e6f7g8h9i".to_string() 552 + }); 553 + 554 + let did_doc = generate_did_document(&host, &pubkey); 555 + 556 + ( 557 + StatusCode::OK, 558 + [("Content-Type", "application/json")], 559 + Json(did_doc), 560 + ) 561 + } 562 + 563 + #[cfg(test)] 564 + mod tests { 565 + use super::*; 566 + 567 + const TEST_PUBKEY: &str = "z6Mkw5f8g3h4j5k6l7m8n9o0p1q2r3s4t5u6v7w8x9y0z1a2b3c4d5e6f7g8h9i"; 568 + 569 + #[test] 570 + fn test_generate_did_document() { 571 + let host = "example.com"; 572 + let did_doc = generate_did_document(host, TEST_PUBKEY); 573 + 574 + // Verify the structure of the generated DID document 575 + assert_eq!(did_doc["id"], format!("did:web:{}", host)); 576 + assert_eq!(did_doc["alsoKnownAs"][0], format!("at://{}", host)); 577 + 578 + // Check services 579 + let services = did_doc["service"].as_array().unwrap(); 580 + assert_eq!(services.len(), 2); 581 + 582 + let bsky_fg = &services[0]; 583 + assert_eq!(bsky_fg["id"], "#bsky_fg"); 584 + assert_eq!(bsky_fg["type"], "BskyFeedGenerator"); 585 + assert_eq!(bsky_fg["serviceEndpoint"], format!("https://{}", host)); 586 + 587 + let atproto_pds = &services[1]; 588 + assert_eq!(atproto_pds["id"], "#atproto_pds"); 589 + assert_eq!(atproto_pds["type"], "AtprotoPersonalDataServer"); 590 + assert_eq!(atproto_pds["serviceEndpoint"], format!("https://{}", host)); 591 + 592 + // Check verification method 593 + let verification_methods = did_doc["verificationMethod"].as_array().unwrap(); 594 + assert_eq!(verification_methods.len(), 1); 595 + 596 + let vm = &verification_methods[0]; 597 + assert_eq!(vm["id"], format!("did:web:{}#atproto", host)); 598 + assert_eq!(vm["type"], "Multikey"); 599 + assert_eq!(vm["controller"], format!("did:web:{}", host)); 600 + assert!(vm["publicKeyMultibase"].as_str().unwrap().starts_with("z")); 601 + } 602 + 603 + #[test] 604 + fn test_did_document_context() { 605 + let host = "test.example.org"; 606 + let did_doc = generate_did_document(host, TEST_PUBKEY); 607 + 608 + let context = did_doc["@context"].as_array().unwrap(); 609 + assert_eq!(context.len(), 3); 610 + assert_eq!(context[0], "https://www.w3.org/ns/did/v1"); 611 + assert_eq!(context[1], "https://w3id.org/security/multikey/v1"); 612 + assert_eq!( 613 + context[2], 614 + "https://w3id.org/security/suites/secp256k1-2019/v1" 615 + ); 616 + } 617 + 618 + #[test] 619 + fn test_different_hosts() { 620 + // Test with different host formats 621 + let hosts = vec![ 622 + "localhost:3000", 623 + "bsky.social", 624 + "example.org:8080", 625 + "my-service.com", 626 + ]; 627 + 628 + for host in hosts { 629 + let did_doc = generate_did_document(host, TEST_PUBKEY); 630 + 631 + // Verify basic structure for each host 632 + assert_eq!(did_doc["id"], format!("did:web:{}", host)); 633 + assert_eq!(did_doc["alsoKnownAs"][0], format!("at://{}", host)); 634 + 635 + let services = did_doc["service"].as_array().unwrap(); 636 + assert_eq!(services.len(), 2); 637 + 638 + let verification_methods = did_doc["verificationMethod"].as_array().unwrap(); 639 + assert_eq!(verification_methods.len(), 1); 640 + } 641 + } 642 + }

+50 -25

apps/aqua/src/main.rs

··· 1 - use axum::{Router, extract::Extension, routing::{get, post}}; 1 + use axum::{ 2 + Router, 3 + extract::Extension, 4 + routing::{get, post}, 5 + }; 6 + use chrono::Utc; 7 + use clap::{Arg, Command}; 2 8 use std::net::SocketAddr; 3 9 use tower_http::cors::CorsLayer; 4 - use clap::{Arg, Command}; 5 10 use uuid::Uuid; 6 - use chrono::Utc; 7 11 8 12 use ctx::RawContext; 13 + use redis_client::RedisClient; 9 14 use repos::DataSource; 10 15 use repos::pg::PgDataSource; 11 - use redis_client::RedisClient; 12 16 13 17 mod api; 14 18 mod ctx; 15 19 mod db; 20 + mod redis_client; 16 21 mod repos; 22 + mod types; 17 23 mod xrpc; 18 - mod redis_client; 19 24 20 25 #[tokio::main] 21 26 async fn main() -> Result<(), String> { ··· 32 37 .long("import-identity-car") 33 38 .value_name("HANDLE_OR_DID") 34 39 .help("Import CAR file for a specific identity (handle or DID)") 35 - .action(clap::ArgAction::Set) 40 + .action(clap::ArgAction::Set), 36 41 ) 37 42 .get_matches(); 38 43 39 44 let db = db::init_pool().await.expect("failed to init db"); 40 45 let pgds = PgDataSource::new(db.clone()).boxed(); 41 - let ctx = RawContext::new(pgds).build(); 46 + let ctx = RawContext::new(pgds).build(); // Arc<RawContext> 42 47 43 48 // Check if we should import a CAR file instead of starting the server 44 49 if let Some(identity) = matches.get_one::<String>("import-identity-car") { ··· 50 55 51 56 let app = Router::new() 52 57 .route("/meta_info", get(api::get_meta_info)) 58 + .route("/.well-known/did.json", get(api::get_did_document)) 53 59 .route("/api/car/upload", post(api::upload_car_import)) 54 60 .route("/api/car/fetch", post(api::fetch_car_from_user)) 55 - .route("/api/car/status/{import_id}", get(api::get_car_import_status)) 56 - .route("/api/car/job-status/{job_id}", get(api::get_car_import_job_status)) 61 + .route( 62 + "/api/car/status/{import_id}", 63 + get(api::get_car_import_status), 64 + ) 65 + .route( 66 + "/api/car/job-status/{job_id}", 67 + get(api::get_car_import_job_status), 68 + ) 57 69 .nest("/xrpc/", xrpc::actor::actor_routes()) 58 70 .nest("/xrpc/", xrpc::feed::feed_routes()) 59 71 .nest("/xrpc/", xrpc::stats::stats_routes()) ··· 69 81 } 70 82 71 83 async fn import_identity_car(_ctx: &ctx::Context, identity: &str) -> Result<(), String> { 72 - use tracing::{info, error}; 73 - use types::jobs::{CarImportJob, CarImportJobStatus, JobStatus, queue_keys}; 74 - 84 + use crate::types::{CarImportJob, CarImportJobStatus, JobStatus, queue_keys}; 85 + use tracing::{error, info}; 86 + 75 87 info!("Submitting CAR import job for identity: {}", identity); 76 - 88 + 77 89 // Connect to Redis 78 - let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 79 - let redis_client = RedisClient::new(&redis_url).map_err(|e| format!("Failed to connect to Redis: {}", e))?; 80 - 90 + let redis_url = 91 + std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 92 + let redis_client = 93 + RedisClient::new(&redis_url).map_err(|e| format!("Failed to connect to Redis: {}", e))?; 94 + 81 95 // Create job 82 96 let job = CarImportJob { 83 97 request_id: Uuid::new_v4(), ··· 86 100 created_at: Utc::now(), 87 101 description: Some(format!("CLI import request for {}", identity)), 88 102 }; 89 - 103 + 90 104 // Serialize job for queue 91 - let job_data = serde_json::to_string(&job).map_err(|e| format!("Failed to serialize job: {}", e))?; 92 - 105 + let job_data = 106 + serde_json::to_string(&job).map_err(|e| format!("Failed to serialize job: {}", e))?; 107 + 93 108 // Initialize job status 94 109 let status = CarImportJobStatus { 95 110 status: JobStatus::Pending, ··· 99 114 error_message: None, 100 115 progress: None, 101 116 }; 102 - let status_data = serde_json::to_string(&status).map_err(|e| format!("Failed to serialize status: {}", e))?; 103 - 117 + let status_data = 118 + serde_json::to_string(&status).map_err(|e| format!("Failed to serialize status: {}", e))?; 119 + 104 120 // Submit to queue and set initial status 105 - match redis_client.queue_job(queue_keys::CAR_IMPORT_JOBS, &job_data).await { 121 + match redis_client 122 + .queue_job(queue_keys::CAR_IMPORT_JOBS, &job_data) 123 + .await 124 + { 106 125 Ok(_) => { 107 126 // Set initial status 108 - if let Err(e) = redis_client.set_job_status(&queue_keys::job_status_key(&job.request_id), &status_data).await { 127 + if let Err(e) = redis_client 128 + .set_job_status(&queue_keys::job_status_key(&job.request_id), &status_data) 129 + .await 130 + { 109 131 error!("Failed to set job status: {}", e); 110 132 } 111 - 133 + 112 134 info!("✅ CAR import job queued successfully!"); 113 135 info!("Job ID: {}", job.request_id); 114 136 info!("Identity: {}", identity); 115 - info!("Monitor status with: curl http://localhost:3000/api/car/status/{}", job.request_id); 137 + info!( 138 + "Monitor status with: curl http://localhost:3000/api/car/status/{}", 139 + job.request_id 140 + ); 116 141 Ok(()) 117 142 } 118 143 Err(e) => {

+1 -1

apps/aqua/src/redis_client.rs

··· 36 36 let status: Option<String> = conn.get(status_key).await?; 37 37 Ok(status) 38 38 } 39 - } 39 + }

+7 -5

apps/aqua/src/repos/actor_profile.rs

··· 9 9 async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>>; 10 10 async fn get_multiple_actor_profiles( 11 11 &self, 12 - identities: &Vec<String>, 12 + identities: &[String], 13 13 ) -> anyhow::Result<Vec<ProfileViewData>>; 14 14 } 15 15 ··· 30 30 avatar: row.avatar, 31 31 banner: row.banner, 32 32 // chrono -> atrium time 33 - created_at: row.created_at.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 33 + created_at: row 34 + .created_at 35 + .map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 34 36 description: row.description, 35 37 description_facets: row 36 38 .description_facets 37 39 .and_then(|v| serde_json::from_value(v).ok()), 38 40 did: row.did, 39 - featured_item: None, 40 41 display_name: row.display_name, 42 + featured_item: None, 41 43 status: row.status.and_then(|v| serde_json::from_value(v).ok()), 42 44 } 43 45 } ··· 46 48 #[async_trait] 47 49 impl ActorProfileRepo for PgDataSource { 48 50 async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>> { 49 - self.get_multiple_actor_profiles(&vec![identity.to_string()]) 51 + self.get_multiple_actor_profiles(&[identity.to_string()]) 50 52 .await 51 53 .map(|p| p.first().cloned()) 52 54 } 53 55 async fn get_multiple_actor_profiles( 54 56 &self, 55 - identities: &Vec<String>, 57 + identities: &[String], 56 58 ) -> anyhow::Result<Vec<ProfileViewData>> { 57 59 // split identities into dids (prefixed with "did:") and handles (not prefixed) in one iteration 58 60 let mut dids = Vec::new();

+22 -18

apps/aqua/src/repos/feed_play.rs

··· 8 8 async fn get_feed_play(&self, identity: &str) -> anyhow::Result<Option<PlayViewData>>; 9 9 async fn get_feed_plays_for_profile( 10 10 &self, 11 - identities: &Vec<String>, 11 + identities: &[String], 12 12 ) -> anyhow::Result<Vec<PlayViewData>>; 13 13 } 14 14 ··· 49 49 }; 50 50 51 51 Ok(Some(PlayViewData { 52 + track_name: row.track_name.clone(), 53 + track_mb_id: row.recording_mbid.map(|u| u.to_string()), 54 + recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 55 + duration: row.duration.map(|d| d as i64), 52 56 artists, 53 - duration: row.duration.map(|d| d as i64), 57 + release_name: row.release_name.clone(), 58 + release_mb_id: row.release_mbid.map(|u| u.to_string()), 54 59 isrc: row.isrc, 55 - music_service_base_domain: row.music_service_base_domain, 56 60 origin_url: row.origin_url, 57 - played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))), 58 - recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 59 - release_mb_id: row.release_mbid.map(|u| u.to_string()), 60 - release_name: row.release_name, 61 + music_service_base_domain: row.music_service_base_domain, 61 62 submission_client_agent: row.submission_client_agent, 62 - track_mb_id: Some(row.rkey.clone()), 63 - track_name: row.track_name.clone(), 63 + played_time: row 64 + .played_time 65 + .map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 64 66 })) 65 67 } 66 68 67 69 async fn get_feed_plays_for_profile( 68 70 &self, 69 - identities: &Vec<String>, 71 + identities: &[String], 70 72 ) -> anyhow::Result<Vec<PlayViewData>> { 71 73 let rows = sqlx::query!( 72 74 r#" ··· 105 107 }; 106 108 107 109 result.push(PlayViewData { 110 + track_name: row.track_name.clone(), 111 + track_mb_id: row.recording_mbid.map(|u| u.to_string()), 112 + recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 113 + duration: row.duration.map(|d| d as i64), 108 114 artists, 109 - duration: row.duration.map(|d| d as i64), 115 + release_name: row.release_name.clone(), 116 + release_mb_id: row.release_mbid.map(|u| u.to_string()), 110 117 isrc: row.isrc, 111 - music_service_base_domain: row.music_service_base_domain, 112 118 origin_url: row.origin_url, 113 - played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))), 114 - recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 115 - release_mb_id: row.release_mbid.map(|u| u.to_string()), 116 - release_name: row.release_name, 119 + music_service_base_domain: row.music_service_base_domain, 117 120 submission_client_agent: row.submission_client_agent, 118 - track_mb_id: Some(row.rkey.clone()), 119 - track_name: row.track_name.clone(), 121 + played_time: row 122 + .played_time 123 + .map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 120 124 }); 121 125 } 122 126

+1 -2

apps/aqua/src/repos/mod.rs

··· 27 27 } 28 28 29 29 pub fn time_to_chrono_utc(dt: time::OffsetDateTime) -> chrono::DateTime<chrono::Utc> { 30 - chrono::DateTime::from_timestamp(dt.unix_timestamp(), dt.nanosecond()) 31 - .unwrap_or_default() 30 + chrono::DateTime::from_timestamp(dt.unix_timestamp(), dt.nanosecond()).unwrap_or_default() 32 31 }

+10 -9

apps/aqua/src/repos/stats.rs

··· 85 85 if let (Some(mbid), Some(name)) = (row.mbid, row.name) { 86 86 result.push(ReleaseViewData { 87 87 mbid: mbid.to_string(), 88 - 89 88 name, 90 89 play_count: row.play_count.unwrap_or(0), 91 90 }); ··· 217 216 }; 218 217 219 218 result.push(PlayViewData { 219 + track_name: row.track_name.clone(), 220 + track_mb_id: row.recording_mbid.map(|u| u.to_string()), 221 + recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 222 + duration: row.duration.map(|d| d as i64), 220 223 artists, 221 - duration: row.duration.map(|d| d as i64), 224 + release_name: row.release_name.clone(), 225 + release_mb_id: row.release_mbid.map(|u| u.to_string()), 222 226 isrc: row.isrc, 223 - music_service_base_domain: row.music_service_base_domain, 224 227 origin_url: row.origin_url, 225 - played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))), 226 - recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 227 - release_mb_id: row.release_mbid.map(|u| u.to_string()), 228 - release_name: row.release_name, 228 + music_service_base_domain: row.music_service_base_domain, 229 229 submission_client_agent: row.submission_client_agent, 230 - track_mb_id: Some(row.rkey.clone()), 231 - track_name: row.track_name.clone(), 230 + played_time: row 231 + .played_time 232 + .map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 232 233 }); 233 234 } 234 235

+51

apps/aqua/src/types/jobs.rs

··· 1 + use chrono::{DateTime, Utc}; 2 + use serde::{Deserialize, Serialize}; 3 + use uuid::Uuid; 4 + 5 + #[derive(Debug, Clone, Serialize, Deserialize)] 6 + pub struct CarImportJob { 7 + pub request_id: Uuid, 8 + pub identity: String, 9 + pub since: Option<DateTime<Utc>>, 10 + pub created_at: DateTime<Utc>, 11 + pub description: Option<String>, 12 + } 13 + 14 + #[derive(Debug, Clone, Serialize, Deserialize)] 15 + pub struct CarImportJobStatus { 16 + pub status: JobStatus, 17 + pub created_at: DateTime<Utc>, 18 + pub started_at: Option<DateTime<Utc>>, 19 + pub completed_at: Option<DateTime<Utc>>, 20 + pub error_message: Option<String>, 21 + pub progress: Option<JobProgress>, 22 + } 23 + 24 + #[derive(Debug, Clone, Serialize, Deserialize)] 25 + pub enum JobStatus { 26 + Pending, 27 + Processing, 28 + Completed, 29 + Failed, 30 + Cancelled, 31 + } 32 + 33 + #[derive(Debug, Clone, Serialize, Deserialize)] 34 + pub struct JobProgress { 35 + step: String, 36 + pub user_did: Option<String>, 37 + pub pds_host: Option<String>, 38 + pub car_size_bytes: Option<u64>, 39 + pub blocks_processed: Option<u64>, 40 + } 41 + 42 + pub mod queue_keys { 43 + use uuid::Uuid; 44 + 45 + pub const CAR_IMPORT_JOBS: &str = "car_import_jobs"; 46 + pub const CAR_IMPORT_STATUS_PREFIX: &str = "car_import_status"; 47 + 48 + pub fn job_status_key(job_id: &Uuid) -> String { 49 + format!("{}:{}", CAR_IMPORT_STATUS_PREFIX, job_id) 50 + } 51 + }

+3

apps/aqua/src/types/mod.rs

··· 1 + pub mod jobs; 2 + 3 + pub use jobs::*;

+1 -1

apps/aqua/src/xrpc/actor.rs

··· 58 58 let repo = &ctx.db; // assuming ctx.db is Box<dyn ActorProfileRepo + Send + Sync> 59 59 let actor = &query.actors; 60 60 61 - if actor.len() == 0 { 61 + if actor.is_empty() { 62 62 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 63 63 } 64 64

+17 -11

apps/aqua/src/xrpc/stats.rs

··· 1 1 use crate::ctx::Context; 2 2 use axum::{Extension, http::StatusCode, response::IntoResponse, routing::get}; 3 3 use serde::{Deserialize, Serialize}; 4 - use types::fm::teal::alpha::stats::defs::{ArtistViewData, ReleaseViewData}; 5 4 use types::fm::teal::alpha::feed::defs::PlayViewData; 5 + use types::fm::teal::alpha::stats::defs::{ArtistViewData, ReleaseViewData}; 6 6 7 7 // mount stats routes 8 8 pub fn stats_routes() -> axum::Router { 9 9 axum::Router::new() 10 10 .route("/fm.teal.alpha.stats.getTopArtists", get(get_top_artists)) 11 11 .route("/fm.teal.alpha.stats.getTopReleases", get(get_top_releases)) 12 - .route("/fm.teal.alpha.stats.getUserTopArtists", get(get_user_top_artists)) 13 - .route("/fm.teal.alpha.stats.getUserTopReleases", get(get_user_top_releases)) 12 + .route( 13 + "/fm.teal.alpha.stats.getUserTopArtists", 14 + get(get_user_top_artists), 15 + ) 16 + .route( 17 + "/fm.teal.alpha.stats.getUserTopReleases", 18 + get(get_user_top_releases), 19 + ) 14 20 .route("/fm.teal.alpha.stats.getLatest", get(get_latest)) 15 21 } 16 22 ··· 29 35 axum::extract::Query(query): axum::extract::Query<GetTopArtistsQuery>, 30 36 ) -> Result<impl IntoResponse, (StatusCode, String)> { 31 37 let repo = &ctx.db; 32 - 38 + 33 39 match repo.get_top_artists(query.limit).await { 34 40 Ok(artists) => Ok(axum::Json(GetTopArtistsResponse { artists })), 35 41 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 51 57 axum::extract::Query(query): axum::extract::Query<GetTopReleasesQuery>, 52 58 ) -> Result<impl IntoResponse, (StatusCode, String)> { 53 59 let repo = &ctx.db; 54 - 60 + 55 61 match repo.get_top_releases(query.limit).await { 56 62 Ok(releases) => Ok(axum::Json(GetTopReleasesResponse { releases })), 57 63 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 74 80 axum::extract::Query(query): axum::extract::Query<GetUserTopArtistsQuery>, 75 81 ) -> Result<impl IntoResponse, (StatusCode, String)> { 76 82 let repo = &ctx.db; 77 - 83 + 78 84 if query.actor.is_empty() { 79 85 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 80 86 } 81 - 87 + 82 88 match repo.get_user_top_artists(&query.actor, query.limit).await { 83 89 Ok(artists) => Ok(axum::Json(GetUserTopArtistsResponse { artists })), 84 90 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 101 107 axum::extract::Query(query): axum::extract::Query<GetUserTopReleasesQuery>, 102 108 ) -> Result<impl IntoResponse, (StatusCode, String)> { 103 109 let repo = &ctx.db; 104 - 110 + 105 111 if query.actor.is_empty() { 106 112 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 107 113 } 108 - 114 + 109 115 match repo.get_user_top_releases(&query.actor, query.limit).await { 110 116 Ok(releases) => Ok(axum::Json(GetUserTopReleasesResponse { releases })), 111 117 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 127 133 axum::extract::Query(query): axum::extract::Query<GetLatestQuery>, 128 134 ) -> Result<impl IntoResponse, (StatusCode, String)> { 129 135 let repo = &ctx.db; 130 - 136 + 131 137 match repo.get_latest(query.limit).await { 132 138 Ok(plays) => Ok(axum::Json(GetLatestResponse { plays })), 133 139 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), 134 140 } 135 - } 141 + }

+82 -5

lexicons/README.md

··· 17 17 18 18 ### Initial Setup 19 19 20 - If you're cloning this repository for the first time, you'll need to initialize the submodules: 20 + If you're cloning this repository for the first time, you'll need to initialize the submodules and create the symbolic links: 21 21 22 22 ```bash 23 + # Initialize submodules 23 24 git submodule update --init --recursive 25 + 26 + # Create symbolic links to atproto lexicons 27 + cd lexicons 28 + ln -s ../vendor/atproto/lexicons/app app 29 + ln -s ../vendor/atproto/lexicons/chat chat 30 + ln -s ../vendor/atproto/lexicons/com com 31 + ln -s ../vendor/atproto/lexicons/tools tools 32 + cd .. 33 + ``` 34 + 35 + Or use the provided setup script: 36 + 37 + ```bash 38 + ./scripts/setup-lexicons.sh 24 39 ``` 25 40 26 41 ### Updating ATProto Lexicons 27 42 28 - To update to the latest ATProto lexicons: 43 + To update to the latest ATProto lexicons, use the provided update script: 29 44 30 45 ```bash 46 + ./scripts/update-lexicons.sh 47 + ``` 48 + 49 + This will: 50 + 1. Fetch the latest changes from the atproto repository 51 + 2. Show you what changed 52 + 3. Stage the submodule update for commit 53 + 54 + Then commit the changes: 55 + ```bash 56 + git commit -m "Update atproto lexicons to latest" 57 + ``` 58 + 59 + **Manual approach:** 60 + ```bash 31 61 cd vendor/atproto 32 62 git pull origin main 33 63 cd ../.. ··· 35 65 git commit -m "Update atproto lexicons to latest" 36 66 ``` 37 67 68 + ### Available Scripts 69 + 70 + Two convenience scripts are available: 71 + 72 + **Setup Script** - Handle the initial setup: 73 + 74 + ```bash 75 + #!/bin/bash 76 + # scripts/setup-lexicons.sh 77 + 78 + echo "Setting up lexicons..." 79 + 80 + # Initialize submodules 81 + git submodule update --init --recursive 82 + 83 + # Create symbolic links if they don't exist 84 + cd lexicons 85 + if [ ! -L app ]; then 86 + ln -s ../vendor/atproto/lexicons/app app 87 + echo "Created symlink: lexicons/app" 88 + fi 89 + if [ ! -L chat ]; then 90 + ln -s ../vendor/atproto/lexicons/chat chat 91 + echo "Created symlink: lexicons/chat" 92 + fi 93 + if [ ! -L com ]; then 94 + ln -s ../vendor/atproto/lexicons/com com 95 + echo "Created symlink: lexicons/com" 96 + fi 97 + if [ ! -L tools ]; then 98 + ln -s ../vendor/atproto/lexicons/tools tools 99 + echo "Created symlink: lexicons/tools" 100 + fi 101 + cd .. 102 + 103 + echo "Lexicons setup complete!" 104 + ``` 105 + 106 + **Update Script** - Update ATProto lexicons: 107 + 108 + ```bash 109 + #!/bin/bash 110 + # scripts/update-lexicons.sh 111 + 112 + # Fetches latest changes from atproto repository 113 + # Shows what changed and stages the update for commit 114 + ./scripts/update-lexicons.sh 115 + ``` 116 + 38 117 ### Adding Custom Lexicons 39 118 40 119 Custom lexicons should be added to the `fm.teal.alpha/` directory following the ATProto lexicon schema format. These files are tracked directly in our repository and not affected by submodule updates. 41 120 42 - ## Generated Files 43 - 44 - This directory may contain generated files (`.js`, `.d.ts`, etc.) that are created by lexicon compilation tools. These are ignored by git as specified in the `.gitignore` file. 121 + **Note**: The symbolic links (`app`, `chat`, `com`, `tools`) are not tracked in git and will be created during setup. They are ignored in `.gitignore` to avoid conflicts.

+21 -1

lexicons/fm.teal.alpha/actor/defs.json

··· 36 36 }, 37 37 "status": { 38 38 "type": "ref", 39 - "ref": "fm.teal.alpha.actor.status#main" 39 + "ref": "#statusView" 40 40 }, 41 41 "createdAt": { "type": "string", "format": "datetime" } 42 42 } ··· 57 57 "avatar": { 58 58 "type": "string", 59 59 "description": "IPLD of the avatar" 60 + } 61 + } 62 + }, 63 + "statusView": { 64 + "type": "object", 65 + "description": "A declaration of the status of the actor.", 66 + "properties": { 67 + "time": { 68 + "type": "string", 69 + "format": "datetime", 70 + "description": "The unix timestamp of when the item was recorded" 71 + }, 72 + "expiry": { 73 + "type": "string", 74 + "format": "datetime", 75 + "description": "The unix timestamp of the expiry time of the item. If unavailable, default to 10 minutes past the start time." 76 + }, 77 + "item": { 78 + "type": "ref", 79 + "ref": "fm.teal.alpha.feed.defs#playView" 60 80 } 61 81 } 62 82 }

+12 -1

lexicons/fm.teal.alpha/feed/play.json

··· 19 19 }, 20 20 "trackMbId": { 21 21 "type": "string", 22 - 23 22 "description": "The Musicbrainz ID of the track" 24 23 }, 25 24 "recordingMbId": { ··· 87 86 "type": "string", 88 87 "format": "datetime", 89 88 "description": "The unix timestamp of when the track was played" 89 + }, 90 + "trackDiscriminant": { 91 + "type": "string", 92 + "maxLength": 128, 93 + "maxGraphemes": 1280, 94 + "description": "Distinguishing information for track variants (e.g. 'Acoustic Version', 'Live at Wembley', 'Radio Edit', 'Demo'). Used to differentiate between different versions of the same base track while maintaining grouping capabilities." 95 + }, 96 + "releaseDiscriminant": { 97 + "type": "string", 98 + "maxLength": 128, 99 + "maxGraphemes": 1280, 100 + "description": "Distinguishing information for release variants (e.g. 'Deluxe Edition', 'Remastered', '2023 Remaster', 'Special Edition'). Used to differentiate between different versions of the same base release while maintaining grouping capabilities." 90 101 } 91 102 } 92 103 }

+54

lexicons/fm.teal.alpha/feed/social/defs.json

···

+24

lexicons/fm.teal.alpha/feed/social/like.json

···

+30

lexicons/fm.teal.alpha/feed/social/playlist.json

···

+32

lexicons/fm.teal.alpha/feed/social/playlistItem.json

···

+104

lexicons/fm.teal.alpha/feed/social/post.json

···

+24

lexicons/fm.teal.alpha/feed/social/repost.json

···

+24

lexicons/fm.teal.alpha/richtext/facet.json

··· 1 + { 2 + "lexicon": 1, 3 + "id": "fm.teal.alpha.richtext.facet", 4 + "defs": { 5 + "main": { 6 + "type": "object", 7 + "description": "Annotation of a sub-string within rich text.", 8 + "required": ["index", "features"], 9 + "properties": { 10 + "index": { "type": "ref", "ref": "app.bsky.richtext.facet#byteSlice" }, 11 + "features": { 12 + "type": "array", 13 + "items": { 14 + "type": "union", 15 + "refs": [ 16 + "app.bsky.richtext.facet#mention", 17 + "app.bsky.richtext.facet#link" 18 + ] 19 + } 20 + } 21 + } 22 + } 23 + } 24 + }

+226

migrations/20241220000001_initial_schema.sql

··· 1 + -- Initial comprehensive schema for Teal music platform 2 + -- Based on services/cadet/sql/base.sql 3 + 4 + CREATE TABLE artists ( 5 + mbid UUID PRIMARY KEY, 6 + name TEXT NOT NULL, 7 + play_count INTEGER DEFAULT 0 8 + ); 9 + 10 + -- releases are synologous to 'albums' 11 + CREATE TABLE releases ( 12 + mbid UUID PRIMARY KEY, 13 + name TEXT NOT NULL, 14 + play_count INTEGER DEFAULT 0 15 + ); 16 + 17 + -- recordings are synologous to 'tracks' BUT tracks can be in multiple releases! 18 + CREATE TABLE recordings ( 19 + mbid UUID PRIMARY KEY, 20 + name TEXT NOT NULL, 21 + play_count INTEGER DEFAULT 0 22 + ); 23 + 24 + CREATE TABLE plays ( 25 + uri TEXT PRIMARY KEY, 26 + did TEXT NOT NULL, 27 + rkey TEXT NOT NULL, 28 + cid TEXT NOT NULL, 29 + isrc TEXT, 30 + duration INTEGER, 31 + track_name TEXT NOT NULL, 32 + played_time TIMESTAMP WITH TIME ZONE, 33 + processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 34 + release_mbid UUID, 35 + release_name TEXT, 36 + recording_mbid UUID, 37 + submission_client_agent TEXT, 38 + music_service_base_domain TEXT, 39 + origin_url TEXT, 40 + FOREIGN KEY (release_mbid) REFERENCES releases (mbid), 41 + FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid) 42 + ); 43 + 44 + CREATE INDEX idx_plays_release_mbid ON plays (release_mbid); 45 + CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid); 46 + CREATE INDEX idx_plays_played_time ON plays (played_time); 47 + CREATE INDEX idx_plays_did ON plays (did); 48 + 49 + CREATE TABLE play_to_artists ( 50 + play_uri TEXT, -- references plays(uri) 51 + artist_mbid UUID REFERENCES artists (mbid), 52 + artist_name TEXT, -- storing here for ease of use when joining 53 + PRIMARY KEY (play_uri, artist_mbid), 54 + FOREIGN KEY (play_uri) REFERENCES plays (uri) 55 + ); 56 + 57 + CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid); 58 + 59 + -- Profiles table 60 + CREATE TABLE profiles ( 61 + did TEXT PRIMARY KEY, 62 + handle TEXT, 63 + display_name TEXT, 64 + description TEXT, 65 + description_facets JSONB, 66 + avatar TEXT, -- IPLD of the image, bafy... 67 + banner TEXT, 68 + created_at TIMESTAMP WITH TIME ZONE 69 + ); 70 + 71 + -- User featured items table 72 + CREATE TABLE featured_items ( 73 + did TEXT PRIMARY KEY, 74 + mbid TEXT NOT NULL, 75 + type TEXT NOT NULL 76 + ); 77 + 78 + -- Statii table (status records) 79 + CREATE TABLE statii ( 80 + uri TEXT PRIMARY KEY, 81 + did TEXT NOT NULL, 82 + rkey TEXT NOT NULL, 83 + cid TEXT NOT NULL, 84 + record JSONB NOT NULL, 85 + indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 86 + ); 87 + 88 + CREATE INDEX idx_statii_did_rkey ON statii (did, rkey); 89 + 90 + -- Materialized view for artists' play counts 91 + CREATE MATERIALIZED VIEW mv_artist_play_counts AS 92 + SELECT 93 + a.mbid AS artist_mbid, 94 + a.name AS artist_name, 95 + COUNT(p.uri) AS play_count 96 + FROM 97 + artists a 98 + LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 99 + LEFT JOIN plays p ON p.uri = pta.play_uri 100 + GROUP BY 101 + a.mbid, 102 + a.name; 103 + 104 + CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid); 105 + 106 + -- Materialized view for releases' play counts 107 + CREATE MATERIALIZED VIEW mv_release_play_counts AS 108 + SELECT 109 + r.mbid AS release_mbid, 110 + r.name AS release_name, 111 + COUNT(p.uri) AS play_count 112 + FROM 113 + releases r 114 + LEFT JOIN plays p ON p.release_mbid = r.mbid 115 + GROUP BY 116 + r.mbid, 117 + r.name; 118 + 119 + CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid); 120 + 121 + -- Materialized view for recordings' play counts 122 + CREATE MATERIALIZED VIEW mv_recording_play_counts AS 123 + SELECT 124 + rec.mbid AS recording_mbid, 125 + rec.name AS recording_name, 126 + COUNT(p.uri) AS play_count 127 + FROM 128 + recordings rec 129 + LEFT JOIN plays p ON p.recording_mbid = rec.mbid 130 + GROUP BY 131 + rec.mbid, 132 + rec.name; 133 + 134 + CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid); 135 + 136 + -- Global play count materialized view 137 + CREATE MATERIALIZED VIEW mv_global_play_count AS 138 + SELECT 139 + COUNT(uri) AS total_plays, 140 + COUNT(DISTINCT did) AS unique_listeners 141 + FROM plays; 142 + 143 + CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays); 144 + 145 + -- Top artists in the last 30 days 146 + CREATE MATERIALIZED VIEW mv_top_artists_30days AS 147 + SELECT 148 + a.mbid AS artist_mbid, 149 + a.name AS artist_name, 150 + COUNT(p.uri) AS play_count 151 + FROM artists a 152 + INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 153 + INNER JOIN plays p ON p.uri = pta.play_uri 154 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 155 + GROUP BY a.mbid, a.name 156 + ORDER BY COUNT(p.uri) DESC; 157 + 158 + -- Top releases in the last 30 days 159 + CREATE MATERIALIZED VIEW mv_top_releases_30days AS 160 + SELECT 161 + r.mbid AS release_mbid, 162 + r.name AS release_name, 163 + COUNT(p.uri) AS play_count 164 + FROM releases r 165 + INNER JOIN plays p ON p.release_mbid = r.mbid 166 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 167 + GROUP BY r.mbid, r.name 168 + ORDER BY COUNT(p.uri) DESC; 169 + 170 + -- Top artists for user in the last 30 days 171 + CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS 172 + SELECT 173 + prof.did, 174 + a.mbid AS artist_mbid, 175 + a.name AS artist_name, 176 + COUNT(p.uri) AS play_count 177 + FROM artists a 178 + INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 179 + INNER JOIN plays p ON p.uri = pta.play_uri 180 + INNER JOIN profiles prof ON prof.did = p.did 181 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 182 + GROUP BY prof.did, a.mbid, a.name 183 + ORDER BY COUNT(p.uri) DESC; 184 + 185 + -- Top artists for user in the last 7 days 186 + CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS 187 + SELECT 188 + prof.did, 189 + a.mbid AS artist_mbid, 190 + a.name AS artist_name, 191 + COUNT(p.uri) AS play_count 192 + FROM artists a 193 + INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 194 + INNER JOIN plays p ON p.uri = pta.play_uri 195 + INNER JOIN profiles prof ON prof.did = p.did 196 + WHERE p.played_time >= NOW() - INTERVAL '7 days' 197 + GROUP BY prof.did, a.mbid, a.name 198 + ORDER BY COUNT(p.uri) DESC; 199 + 200 + -- Top releases for user in the last 30 days 201 + CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS 202 + SELECT 203 + prof.did, 204 + r.mbid AS release_mbid, 205 + r.name AS release_name, 206 + COUNT(p.uri) AS play_count 207 + FROM releases r 208 + INNER JOIN plays p ON p.release_mbid = r.mbid 209 + INNER JOIN profiles prof ON prof.did = p.did 210 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 211 + GROUP BY prof.did, r.mbid, r.name 212 + ORDER BY COUNT(p.uri) DESC; 213 + 214 + -- Top releases for user in the last 7 days 215 + CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS 216 + SELECT 217 + prof.did, 218 + r.mbid AS release_mbid, 219 + r.name AS release_name, 220 + COUNT(p.uri) AS play_count 221 + FROM releases r 222 + INNER JOIN plays p ON p.release_mbid = r.mbid 223 + INNER JOIN profiles prof ON prof.did = p.did 224 + WHERE p.played_time >= NOW() - INTERVAL '7 days' 225 + GROUP BY prof.did, r.mbid, r.name 226 + ORDER BY COUNT(p.uri) DESC;

+59

migrations/20241220000002_car_import_tables.sql

··· 1 + -- CAR import functionality tables 2 + -- For handling AT Protocol CAR file imports and processing 3 + 4 + -- Tracks uploaded CAR files that are queued for processing 5 + CREATE TABLE IF NOT EXISTS car_import_requests ( 6 + import_id TEXT PRIMARY KEY, 7 + car_data_base64 TEXT NOT NULL, 8 + status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed 9 + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 10 + processed_at TIMESTAMP WITH TIME ZONE, 11 + error_message TEXT, 12 + file_size_bytes INTEGER, 13 + block_count INTEGER, 14 + extracted_records_count INTEGER DEFAULT 0 15 + ); 16 + 17 + CREATE INDEX idx_car_import_requests_status ON car_import_requests (status); 18 + CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at); 19 + 20 + -- Tracks raw IPLD blocks extracted from CAR files 21 + CREATE TABLE IF NOT EXISTS car_blocks ( 22 + cid TEXT PRIMARY KEY, 23 + import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 24 + block_data BYTEA NOT NULL, 25 + decoded_successfully BOOLEAN DEFAULT FALSE, 26 + collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc. 27 + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 28 + ); 29 + 30 + CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id); 31 + CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type); 32 + 33 + -- Tracks records extracted from CAR imports that were successfully processed 34 + CREATE TABLE IF NOT EXISTS car_extracted_records ( 35 + id SERIAL PRIMARY KEY, 36 + import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 37 + cid TEXT NOT NULL REFERENCES car_blocks(cid), 38 + collection_type TEXT NOT NULL, 39 + record_uri TEXT, -- AT URI if applicable (e.g., for play records) 40 + synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123') 41 + rkey TEXT, 42 + extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 43 + processing_notes TEXT 44 + ); 45 + 46 + CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id); 47 + CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type); 48 + CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri); 49 + 50 + -- Tracks import metadata and commit information 51 + CREATE TABLE IF NOT EXISTS car_import_metadata ( 52 + import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 53 + metadata_key TEXT NOT NULL, 54 + metadata_value JSONB NOT NULL, 55 + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 56 + PRIMARY KEY (import_id, metadata_key) 57 + ); 58 + 59 + CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);

+112

migrations/20241220000003_artists_without_mbids.sql

··· 1 + -- Migration to support artists without MusicBrainz IDs 2 + -- This allows the system to comply with the Teal lexicon where only trackName is required 3 + 4 + -- Add a field to plays table to store raw artist names for records without MBIDs 5 + ALTER TABLE plays ADD COLUMN artist_names_raw JSONB; 6 + 7 + -- Create a new artists table that doesn't require MBID as primary key 8 + CREATE TABLE artists_extended ( 9 + id SERIAL PRIMARY KEY, 10 + mbid UUID UNIQUE, -- Optional MusicBrainz ID 11 + name TEXT NOT NULL, 12 + name_normalized TEXT GENERATED ALWAYS AS (LOWER(TRIM(name))) STORED, 13 + play_count INTEGER DEFAULT 0, 14 + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 15 + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 16 + ); 17 + 18 + -- Create index for efficient lookups 19 + CREATE INDEX idx_artists_extended_mbid ON artists_extended (mbid) WHERE mbid IS NOT NULL; 20 + CREATE INDEX idx_artists_extended_name_normalized ON artists_extended (name_normalized); 21 + CREATE UNIQUE INDEX idx_artists_extended_name_unique ON artists_extended (name_normalized) WHERE mbid IS NULL; 22 + 23 + -- Create a new junction table that can handle both MBID and non-MBID artists 24 + CREATE TABLE play_to_artists_extended ( 25 + play_uri TEXT NOT NULL REFERENCES plays(uri), 26 + artist_id INTEGER NOT NULL REFERENCES artists_extended(id), 27 + artist_name TEXT NOT NULL, -- Denormalized for performance 28 + PRIMARY KEY (play_uri, artist_id) 29 + ); 30 + 31 + CREATE INDEX idx_play_to_artists_extended_artist ON play_to_artists_extended (artist_id); 32 + 33 + -- Migrate existing data from old tables to new structure 34 + INSERT INTO artists_extended (mbid, name, play_count) 35 + SELECT mbid, name, play_count FROM artists; 36 + 37 + INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) 38 + SELECT 39 + pta.play_uri, 40 + ae.id, 41 + pta.artist_name 42 + FROM play_to_artists pta 43 + JOIN artists_extended ae ON ae.mbid = pta.artist_mbid; 44 + 45 + -- Update materialized views to use new structure 46 + DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts; 47 + CREATE MATERIALIZED VIEW mv_artist_play_counts AS 48 + SELECT 49 + ae.id AS artist_id, 50 + ae.mbid AS artist_mbid, 51 + ae.name AS artist_name, 52 + COUNT(p.uri) AS play_count 53 + FROM 54 + artists_extended ae 55 + LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 56 + LEFT JOIN plays p ON p.uri = ptae.play_uri 57 + GROUP BY 58 + ae.id, ae.mbid, ae.name; 59 + 60 + CREATE UNIQUE INDEX idx_mv_artist_play_counts_new ON mv_artist_play_counts (artist_id); 61 + 62 + -- Update other materialized views that reference artists 63 + DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_30days; 64 + CREATE MATERIALIZED VIEW mv_top_artists_30days AS 65 + SELECT 66 + ae.id AS artist_id, 67 + ae.mbid AS artist_mbid, 68 + ae.name AS artist_name, 69 + COUNT(p.uri) AS play_count 70 + FROM artists_extended ae 71 + INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 72 + INNER JOIN plays p ON p.uri = ptae.play_uri 73 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 74 + GROUP BY ae.id, ae.mbid, ae.name 75 + ORDER BY COUNT(p.uri) DESC; 76 + 77 + DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_30days; 78 + CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS 79 + SELECT 80 + prof.did, 81 + ae.id AS artist_id, 82 + ae.mbid AS artist_mbid, 83 + ae.name AS artist_name, 84 + COUNT(p.uri) AS play_count 85 + FROM artists_extended ae 86 + INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 87 + INNER JOIN plays p ON p.uri = ptae.play_uri 88 + INNER JOIN profiles prof ON prof.did = p.did 89 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 90 + GROUP BY prof.did, ae.id, ae.mbid, ae.name 91 + ORDER BY COUNT(p.uri) DESC; 92 + 93 + DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_7days; 94 + CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS 95 + SELECT 96 + prof.did, 97 + ae.id AS artist_id, 98 + ae.mbid AS artist_mbid, 99 + ae.name AS artist_name, 100 + COUNT(p.uri) AS play_count 101 + FROM artists_extended ae 102 + INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 103 + INNER JOIN plays p ON p.uri = ptae.play_uri 104 + INNER JOIN profiles prof ON prof.did = p.did 105 + WHERE p.played_time >= NOW() - INTERVAL '7 days' 106 + GROUP BY prof.did, ae.id, ae.mbid, ae.name 107 + ORDER BY COUNT(p.uri) DESC; 108 + 109 + -- Comment explaining the migration strategy 110 + COMMENT ON TABLE artists_extended IS 'Extended artists table that supports both MusicBrainz and non-MusicBrainz artists. Uses serial ID as primary key with optional MBID.'; 111 + COMMENT ON TABLE play_to_artists_extended IS 'Junction table linking plays to artists using the new artists_extended table structure.'; 112 + COMMENT ON COLUMN plays.artist_names_raw IS 'Raw artist names as JSON array for plays without MusicBrainz data, used as fallback when artist relationships cannot be established.';

+76

migrations/20241220000004_synthetic_mbids.sql

··· 1 + -- Migration to support synthetic MBIDs for artists without MusicBrainz data 2 + -- This ensures all artists have some form of ID while maintaining uniqueness 3 + 4 + -- Enable UUID extension for v5 UUID generation 5 + CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; 6 + 7 + -- Add a column to track MBID type (musicbrainz, synthetic, unknown) 8 + ALTER TABLE artists_extended ADD COLUMN mbid_type TEXT DEFAULT 'unknown' NOT NULL; 9 + 10 + -- Add check constraint for valid MBID types 11 + ALTER TABLE artists_extended ADD CONSTRAINT chk_mbid_type 12 + CHECK (mbid_type IN ('musicbrainz', 'synthetic', 'unknown')); 13 + 14 + -- Update existing records to set proper MBID type 15 + UPDATE artists_extended SET mbid_type = 'musicbrainz' WHERE mbid IS NOT NULL; 16 + 17 + -- Drop the unique constraint on name_normalized for null MBIDs since we'll handle duplicates differently 18 + DROP INDEX IF EXISTS idx_artists_extended_name_unique; 19 + 20 + -- Add index for efficient querying by MBID type 21 + CREATE INDEX idx_artists_extended_mbid_type ON artists_extended (mbid_type); 22 + 23 + -- Create a view to easily work with different artist types 24 + CREATE VIEW artists_with_type AS 25 + SELECT 26 + id, 27 + mbid, 28 + name, 29 + mbid_type, 30 + play_count, 31 + created_at, 32 + updated_at, 33 + -- For synthetic MBIDs, we can show the source name used for generation 34 + CASE 35 + WHEN mbid_type = 'synthetic' THEN 'Generated from: ' || name 36 + WHEN mbid_type = 'musicbrainz' THEN 'MusicBrainz: ' || mbid::text 37 + ELSE 'No MBID available' 38 + END as mbid_info 39 + FROM artists_extended; 40 + 41 + -- Update materialized views to include MBID type information 42 + DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts; 43 + CREATE MATERIALIZED VIEW mv_artist_play_counts AS 44 + SELECT 45 + ae.id AS artist_id, 46 + ae.mbid AS artist_mbid, 47 + ae.name AS artist_name, 48 + ae.mbid_type, 49 + COUNT(p.uri) AS play_count 50 + FROM 51 + artists_extended ae 52 + LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 53 + LEFT JOIN plays p ON p.uri = ptae.play_uri 54 + GROUP BY 55 + ae.id, ae.mbid, ae.name, ae.mbid_type; 56 + 57 + CREATE UNIQUE INDEX idx_mv_artist_play_counts_with_type ON mv_artist_play_counts (artist_id); 58 + 59 + -- Add comments explaining the synthetic MBID system 60 + COMMENT ON COLUMN artists_extended.mbid_type IS 'Type of MBID: musicbrainz (real), synthetic (generated), or unknown (legacy data)'; 61 + COMMENT ON COLUMN artists_extended.mbid IS 'MusicBrainz ID (for musicbrainz type) or synthetic UUID (for synthetic type)'; 62 + COMMENT ON VIEW artists_with_type IS 'View that provides human-readable information about artist MBID sources'; 63 + 64 + -- Add a function to generate synthetic MBIDs 65 + CREATE OR REPLACE FUNCTION generate_synthetic_mbid(artist_name TEXT) RETURNS UUID AS $$ 66 + DECLARE 67 + namespace_uuid UUID := '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; -- DNS namespace 68 + result_uuid UUID; 69 + BEGIN 70 + -- Generate deterministic UUID v5 based on artist name 71 + SELECT uuid_generate_v5(namespace_uuid, artist_name) INTO result_uuid; 72 + RETURN result_uuid; 73 + END; 74 + $$ LANGUAGE plpgsql IMMUTABLE; 75 + 76 + COMMENT ON FUNCTION generate_synthetic_mbid IS 'Generates a deterministic UUID v5 for artist names without MusicBrainz IDs';

+101

migrations/20241220000005_fuzzy_matching.sql

··· 1 + -- Migration to add fuzzy text matching capabilities 2 + -- This enables better artist name matching using trigram similarity 3 + 4 + -- Enable pg_trgm extension for trigram similarity matching 5 + CREATE EXTENSION IF NOT EXISTS pg_trgm; 6 + 7 + -- Create indexes for efficient trigram matching on artist names 8 + CREATE INDEX idx_artists_extended_name_trgm ON artists_extended USING gin (name gin_trgm_ops); 9 + CREATE INDEX idx_artists_extended_name_normalized_trgm ON artists_extended USING gin (name_normalized gin_trgm_ops); 10 + 11 + -- Create a function to calculate comprehensive artist similarity 12 + CREATE OR REPLACE FUNCTION calculate_artist_similarity( 13 + input_name TEXT, 14 + existing_name TEXT, 15 + input_album TEXT DEFAULT NULL, 16 + existing_album TEXT DEFAULT NULL 17 + ) RETURNS FLOAT AS $$ 18 + DECLARE 19 + name_similarity FLOAT; 20 + album_similarity FLOAT := 0.0; 21 + final_score FLOAT; 22 + BEGIN 23 + -- Calculate trigram similarity for artist names 24 + name_similarity := similarity(LOWER(TRIM(input_name)), LOWER(TRIM(existing_name))); 25 + 26 + -- Boost for exact matches after normalization 27 + IF LOWER(TRIM(regexp_replace(input_name, '[^a-zA-Z0-9\s]', '', 'g'))) = 28 + LOWER(TRIM(regexp_replace(existing_name, '[^a-zA-Z0-9\s]', '', 'g'))) THEN 29 + name_similarity := GREATEST(name_similarity, 0.95); 30 + END IF; 31 + 32 + -- Factor in album similarity if both are provided 33 + IF input_album IS NOT NULL AND existing_album IS NOT NULL THEN 34 + album_similarity := similarity(LOWER(TRIM(input_album)), LOWER(TRIM(existing_album))); 35 + -- Weight: 80% name, 20% album 36 + final_score := (name_similarity * 0.8) + (album_similarity * 0.2); 37 + ELSE 38 + final_score := name_similarity; 39 + END IF; 40 + 41 + RETURN final_score; 42 + END; 43 + $$ LANGUAGE plpgsql IMMUTABLE; 44 + 45 + -- Create a view for fuzzy artist matching with confidence scores 46 + CREATE VIEW fuzzy_artist_matches AS 47 + SELECT DISTINCT 48 + ae1.id as query_artist_id, 49 + ae1.name as query_artist_name, 50 + ae1.mbid_type as query_mbid_type, 51 + ae2.id as match_artist_id, 52 + ae2.name as match_artist_name, 53 + ae2.mbid as match_mbid, 54 + ae2.mbid_type as match_mbid_type, 55 + similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as name_similarity, 56 + CASE 57 + WHEN ae2.mbid_type = 'musicbrainz' THEN 'upgrade_to_mb' 58 + WHEN ae1.mbid_type = 'musicbrainz' AND ae2.mbid_type = 'synthetic' THEN 'consolidate_to_mb' 59 + ELSE 'merge_synthetic' 60 + END as match_action 61 + FROM artists_extended ae1 62 + CROSS JOIN artists_extended ae2 63 + WHERE ae1.id != ae2.id 64 + AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) > 0.8 65 + AND ( 66 + ae1.mbid_type = 'synthetic' OR ae2.mbid_type = 'musicbrainz' 67 + ); 68 + 69 + -- Add comments 70 + COMMENT ON EXTENSION pg_trgm IS 'Trigram extension for fuzzy text matching'; 71 + COMMENT ON INDEX idx_artists_extended_name_trgm IS 'GIN index for trigram similarity on artist names'; 72 + COMMENT ON FUNCTION calculate_artist_similarity IS 'Calculates similarity score between artists considering name and optional album context'; 73 + COMMENT ON VIEW fuzzy_artist_matches IS 'Shows potential artist matches with confidence scores and recommended actions'; 74 + 75 + -- Create a function to suggest artist consolidations 76 + CREATE OR REPLACE FUNCTION suggest_artist_consolidations(min_similarity FLOAT DEFAULT 0.9) 77 + RETURNS TABLE( 78 + action TEXT, 79 + synthetic_artist TEXT, 80 + target_artist TEXT, 81 + similarity_score FLOAT, 82 + synthetic_plays INTEGER, 83 + target_plays INTEGER 84 + ) AS $$ 85 + BEGIN 86 + RETURN QUERY 87 + SELECT 88 + fam.match_action as action, 89 + fam.query_artist_name as synthetic_artist, 90 + fam.match_artist_name as target_artist, 91 + fam.name_similarity as similarity_score, 92 + (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.query_artist_id) as synthetic_plays, 93 + (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.match_artist_id) as target_plays 94 + FROM fuzzy_artist_matches fam 95 + WHERE fam.name_similarity >= min_similarity 96 + AND fam.match_action = 'upgrade_to_mb' 97 + ORDER BY fam.name_similarity DESC, synthetic_plays DESC; 98 + END; 99 + $$ LANGUAGE plpgsql; 100 + 101 + COMMENT ON FUNCTION suggest_artist_consolidations IS 'Returns suggestions for consolidating synthetic artists with MusicBrainz artists based on similarity';

+138

migrations/20241220000006_discriminant_fields.sql

··· 1 + -- Migration to add discriminant fields for track and release variants 2 + -- This enables proper handling of different versions while maintaining grouping capabilities 3 + 4 + -- Add discriminant fields to plays table 5 + ALTER TABLE plays ADD COLUMN track_discriminant TEXT; 6 + ALTER TABLE plays ADD COLUMN release_discriminant TEXT; 7 + 8 + -- Add discriminant field to releases table 9 + ALTER TABLE releases ADD COLUMN discriminant TEXT; 10 + 11 + -- Add discriminant field to recordings table 12 + ALTER TABLE recordings ADD COLUMN discriminant TEXT; 13 + 14 + -- Create indexes for efficient searching and filtering 15 + CREATE INDEX idx_plays_track_discriminant ON plays (track_discriminant); 16 + CREATE INDEX idx_plays_release_discriminant ON plays (release_discriminant); 17 + CREATE INDEX idx_releases_discriminant ON releases (discriminant); 18 + CREATE INDEX idx_recordings_discriminant ON recordings (discriminant); 19 + 20 + -- Create composite indexes for grouping by base name + discriminant 21 + CREATE INDEX idx_plays_track_name_discriminant ON plays (track_name, track_discriminant); 22 + CREATE INDEX idx_plays_release_name_discriminant ON plays (release_name, release_discriminant); 23 + 24 + -- Update materialized views to include discriminant information 25 + DROP MATERIALIZED VIEW IF EXISTS mv_release_play_counts; 26 + CREATE MATERIALIZED VIEW mv_release_play_counts AS 27 + SELECT 28 + r.mbid AS release_mbid, 29 + r.name AS release_name, 30 + r.discriminant AS release_discriminant, 31 + COUNT(p.uri) AS play_count 32 + FROM 33 + releases r 34 + LEFT JOIN plays p ON p.release_mbid = r.mbid 35 + GROUP BY 36 + r.mbid, r.name, r.discriminant; 37 + 38 + CREATE UNIQUE INDEX idx_mv_release_play_counts_discriminant ON mv_release_play_counts (release_mbid); 39 + 40 + DROP MATERIALIZED VIEW IF EXISTS mv_recording_play_counts; 41 + CREATE MATERIALIZED VIEW mv_recording_play_counts AS 42 + SELECT 43 + rec.mbid AS recording_mbid, 44 + rec.name AS recording_name, 45 + rec.discriminant AS recording_discriminant, 46 + COUNT(p.uri) AS play_count 47 + FROM 48 + recordings rec 49 + LEFT JOIN plays p ON p.recording_mbid = rec.mbid 50 + GROUP BY 51 + rec.mbid, rec.name, rec.discriminant; 52 + 53 + CREATE UNIQUE INDEX idx_mv_recording_play_counts_discriminant ON mv_recording_play_counts (recording_mbid); 54 + 55 + -- Create views for analyzing track/release variants 56 + CREATE VIEW track_variants AS 57 + SELECT 58 + track_name, 59 + track_discriminant, 60 + COUNT(*) AS play_count, 61 + COUNT(DISTINCT did) AS unique_listeners, 62 + COUNT(DISTINCT recording_mbid) AS unique_recordings 63 + FROM plays 64 + WHERE track_name IS NOT NULL 65 + GROUP BY track_name, track_discriminant 66 + ORDER BY track_name, play_count DESC; 67 + 68 + CREATE VIEW release_variants AS 69 + SELECT 70 + release_name, 71 + release_discriminant, 72 + COUNT(*) AS play_count, 73 + COUNT(DISTINCT did) AS unique_listeners, 74 + COUNT(DISTINCT release_mbid) AS unique_releases 75 + FROM plays 76 + WHERE release_name IS NOT NULL 77 + GROUP BY release_name, release_discriminant 78 + ORDER BY release_name, play_count DESC; 79 + 80 + -- Create function to extract potential discriminants from existing names 81 + CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 82 + DECLARE 83 + discriminant_patterns TEXT[] := ARRAY[ 84 + '$([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)$', 85 + '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\]', 86 + '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\}' 87 + ]; 88 + pattern TEXT; 89 + match_result TEXT; 90 + BEGIN 91 + -- Try each pattern to find discriminant information 92 + FOREACH pattern IN ARRAY discriminant_patterns 93 + LOOP 94 + SELECT substring(name_text FROM pattern) INTO match_result; 95 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 96 + RETURN trim(match_result); 97 + END IF; 98 + END LOOP; 99 + 100 + RETURN NULL; 101 + END; 102 + $$ LANGUAGE plpgsql IMMUTABLE; 103 + 104 + -- Create function to get base name without discriminant 105 + CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 106 + DECLARE 107 + cleanup_patterns TEXT[] := ARRAY[ 108 + '\s*$[^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?$\s*', 109 + '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\]\s*', 110 + '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\}\s*' 111 + ]; 112 + pattern TEXT; 113 + result_text TEXT := name_text; 114 + BEGIN 115 + -- Remove discriminant patterns to get base name 116 + FOREACH pattern IN ARRAY cleanup_patterns 117 + LOOP 118 + result_text := regexp_replace(result_text, pattern, ' ', 'gi'); 119 + END LOOP; 120 + 121 + -- Clean up extra whitespace 122 + result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 123 + 124 + RETURN result_text; 125 + END; 126 + $$ LANGUAGE plpgsql IMMUTABLE; 127 + 128 + -- Add comments explaining the discriminant system 129 + COMMENT ON COLUMN plays.track_discriminant IS 'Distinguishing information for track variants (e.g., "Acoustic Version", "Live at Wembley", "Radio Edit")'; 130 + COMMENT ON COLUMN plays.release_discriminant IS 'Distinguishing information for release variants (e.g., "Deluxe Edition", "Remastered", "2023 Remaster")'; 131 + COMMENT ON COLUMN releases.discriminant IS 'Distinguishing information for release variants to enable proper grouping'; 132 + COMMENT ON COLUMN recordings.discriminant IS 'Distinguishing information for recording variants to enable proper grouping'; 133 + 134 + COMMENT ON VIEW track_variants IS 'Shows all variants of tracks with their play counts and unique listeners'; 135 + COMMENT ON VIEW release_variants IS 'Shows all variants of releases with their play counts and unique listeners'; 136 + 137 + COMMENT ON FUNCTION extract_discriminant IS 'Extracts discriminant information from track/release names for migration purposes'; 138 + COMMENT ON FUNCTION get_base_name IS 'Returns the base name without discriminant information for grouping purposes';

+276

migrations/20241220000007_enhanced_discriminant_extraction.sql

··· 1 + -- Enhanced discriminant extraction with comprehensive edition/version patterns 2 + -- This migration improves the auto-population of discriminants for better metadata handling 3 + 4 + -- Drop existing functions to replace them with enhanced versions 5 + DROP FUNCTION IF EXISTS extract_discriminant(TEXT); 6 + DROP FUNCTION IF EXISTS get_base_name(TEXT); 7 + 8 + -- Enhanced function to extract discriminants with comprehensive patterns 9 + CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 10 + DECLARE 11 + -- Comprehensive patterns for discriminant extraction 12 + discriminant_patterns TEXT[] := ARRAY[ 13 + -- Parentheses patterns 14 + '$([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)$', 15 + '$([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)$', 16 + '$([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)$', 17 + '$([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)$', 18 + '$([^)]*(?:from|soundtrack|ost|score|theme).*?)$', 19 + 20 + -- Brackets patterns 21 + '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]', 22 + '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]', 23 + '\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]', 24 + '\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]', 25 + '\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]', 26 + 27 + -- Braces patterns 28 + '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}', 29 + '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}', 30 + '\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}', 31 + '\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}', 32 + '\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}', 33 + 34 + -- Dash/hyphen patterns (common for editions) 35 + '[-–—]\s*([^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$', 36 + '[-–—]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 37 + 38 + -- Colon patterns (common for subtitles and versions) 39 + ':\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$', 40 + ':\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 41 + ]; 42 + 43 + pattern TEXT; 44 + match_result TEXT; 45 + BEGIN 46 + -- Return early if input is null or empty 47 + IF name_text IS NULL OR trim(name_text) = '' THEN 48 + RETURN NULL; 49 + END IF; 50 + 51 + -- Try each pattern to find discriminant information 52 + FOREACH pattern IN ARRAY discriminant_patterns 53 + LOOP 54 + SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result; 55 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 56 + -- Clean up the match result 57 + match_result := trim(match_result); 58 + -- Remove leading/trailing punctuation 59 + match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 60 + -- Ensure it's not just whitespace or empty after cleanup 61 + IF length(trim(match_result)) > 0 THEN 62 + RETURN match_result; 63 + END IF; 64 + END IF; 65 + END LOOP; 66 + 67 + RETURN NULL; 68 + END; 69 + $$ LANGUAGE plpgsql IMMUTABLE; 70 + 71 + -- Enhanced function to get base name without discriminant 72 + CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 73 + DECLARE 74 + -- Comprehensive cleanup patterns matching the extraction patterns 75 + cleanup_patterns TEXT[] := ARRAY[ 76 + -- Remove parentheses content 77 + '\s*$[^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?$\s*', 78 + '\s*$[^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$\s*', 79 + '\s*$[^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?$\s*', 80 + '\s*$[^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?$\s*', 81 + '\s*$[^)]*(?:from|soundtrack|ost|score|theme).*?$\s*', 82 + 83 + -- Remove brackets content 84 + '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*', 85 + '\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*', 86 + '\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*', 87 + '\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*', 88 + '\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*', 89 + 90 + -- Remove braces content 91 + '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*', 92 + '\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*', 93 + '\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*', 94 + '\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*', 95 + '\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*', 96 + 97 + -- Remove dash/hyphen patterns 98 + '\s*[-–—]\s*[^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$', 99 + '\s*[-–—]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 100 + 101 + -- Remove colon patterns 102 + '\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$', 103 + '\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 104 + ]; 105 + 106 + pattern TEXT; 107 + result_text TEXT := name_text; 108 + BEGIN 109 + -- Return early if input is null or empty 110 + IF name_text IS NULL OR trim(name_text) = '' THEN 111 + RETURN name_text; 112 + END IF; 113 + 114 + -- Remove discriminant patterns to get base name 115 + FOREACH pattern IN ARRAY cleanup_patterns 116 + LOOP 117 + result_text := regexp_replace(result_text, pattern, ' ', 'gi'); 118 + END LOOP; 119 + 120 + -- Clean up extra whitespace and normalize 121 + result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 122 + 123 + -- Remove trailing punctuation that might be left after removal 124 + result_text := regexp_replace(result_text, '[,;:\-–—]\s*$', '', 'g'); 125 + result_text := trim(result_text); 126 + 127 + -- Ensure we don't return an empty string 128 + IF length(result_text) = 0 THEN 129 + RETURN name_text; 130 + END IF; 131 + 132 + RETURN result_text; 133 + END; 134 + $$ LANGUAGE plpgsql IMMUTABLE; 135 + 136 + -- Create function to extract discriminant specifically for editions and versions 137 + CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$ 138 + DECLARE 139 + -- Focused patterns for edition/version extraction 140 + edition_patterns TEXT[] := ARRAY[ 141 + -- Edition patterns 142 + '$([^)]*edition[^)]*)$', 143 + '\[([^]]*edition[^]]*)\]', 144 + '\{([^}]*edition[^}]*)\}', 145 + '[-–—]\s*([^-–—]*edition[^-–—]*)$', 146 + ':\s*([^:]*edition[^:]*)$', 147 + 148 + -- Version patterns 149 + '$([^)]*version[^)]*)$', 150 + '\[([^]]*version[^]]*)\]', 151 + '\{([^}]*version[^}]*)\}', 152 + '[-–—]\s*([^-–—]*version[^-–—]*)$', 153 + ':\s*([^:]*version[^:]*)$', 154 + 155 + -- Remaster patterns 156 + '$([^)]*remaster[^)]*)$', 157 + '\[([^]]*remaster[^]]*)\]', 158 + '\{([^}]*remaster[^}]*)\}', 159 + '[-–—]\s*([^-–—]*remaster[^-–—]*)$', 160 + ':\s*([^:]*remaster[^:]*)$', 161 + 162 + -- Year-based patterns 163 + '$([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)$', 164 + '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]', 165 + '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}' 166 + ]; 167 + 168 + pattern TEXT; 169 + match_result TEXT; 170 + BEGIN 171 + -- Return early if input is null or empty 172 + IF name_text IS NULL OR trim(name_text) = '' THEN 173 + RETURN NULL; 174 + END IF; 175 + 176 + -- Try edition-specific patterns first 177 + FOREACH pattern IN ARRAY edition_patterns 178 + LOOP 179 + SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result; 180 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 181 + match_result := trim(match_result); 182 + match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 183 + IF length(trim(match_result)) > 0 THEN 184 + RETURN match_result; 185 + END IF; 186 + END IF; 187 + END LOOP; 188 + 189 + RETURN NULL; 190 + END; 191 + $$ LANGUAGE plpgsql IMMUTABLE; 192 + 193 + -- Update recordings table to populate discriminants from existing names 194 + UPDATE recordings 195 + SET discriminant = extract_discriminant(name) 196 + WHERE discriminant IS NULL 197 + AND extract_discriminant(name) IS NOT NULL; 198 + 199 + -- Update releases table to populate discriminants from existing names 200 + UPDATE releases 201 + SET discriminant = extract_discriminant(name) 202 + WHERE discriminant IS NULL 203 + AND extract_discriminant(name) IS NOT NULL; 204 + 205 + -- Update plays table to populate discriminants from existing names where not already set 206 + UPDATE plays 207 + SET track_discriminant = extract_discriminant(track_name) 208 + WHERE track_discriminant IS NULL 209 + AND extract_discriminant(track_name) IS NOT NULL; 210 + 211 + UPDATE plays 212 + SET release_discriminant = extract_discriminant(release_name) 213 + WHERE release_discriminant IS NULL 214 + AND release_name IS NOT NULL 215 + AND extract_discriminant(release_name) IS NOT NULL; 216 + 217 + -- Create indexes for efficient discriminant queries 218 + CREATE INDEX IF NOT EXISTS idx_recordings_name_discriminant ON recordings (name, discriminant); 219 + CREATE INDEX IF NOT EXISTS idx_releases_name_discriminant ON releases (name, discriminant); 220 + 221 + -- Add comments for the new function 222 + COMMENT ON FUNCTION extract_discriminant IS 'Enhanced discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons'; 223 + COMMENT ON FUNCTION get_base_name IS 'Enhanced base name extraction removing comprehensive discriminant patterns to enable proper grouping'; 224 + COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized function for extracting edition and version discriminants with focused patterns'; 225 + 226 + -- Create a view to show discriminant extraction results for analysis 227 + CREATE OR REPLACE VIEW discriminant_analysis AS 228 + SELECT 229 + 'recordings' as table_name, 230 + name as original_name, 231 + discriminant, 232 + get_base_name(name) as base_name, 233 + extract_discriminant(name) as extracted_discriminant, 234 + extract_edition_discriminant(name) as edition_discriminant 235 + FROM recordings 236 + WHERE name IS NOT NULL 237 + UNION ALL 238 + SELECT 239 + 'releases' as table_name, 240 + name as original_name, 241 + discriminant, 242 + get_base_name(name) as base_name, 243 + extract_discriminant(name) as extracted_discriminant, 244 + extract_edition_discriminant(name) as edition_discriminant 245 + FROM releases 246 + WHERE name IS NOT NULL; 247 + 248 + COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing discriminant extraction results for quality assessment and debugging'; 249 + 250 + -- Refresh materialized views to include discriminant information 251 + REFRESH MATERIALIZED VIEW mv_release_play_counts; 252 + REFRESH MATERIALIZED VIEW mv_recording_play_counts; 253 + 254 + -- Create summary statistics for discriminant usage 255 + CREATE OR REPLACE VIEW discriminant_stats AS 256 + SELECT 257 + 'recordings' as entity_type, 258 + COUNT(*) as total_count, 259 + COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant, 260 + COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant, 261 + ROUND( 262 + COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2 263 + ) as discriminant_percentage 264 + FROM recordings 265 + UNION ALL 266 + SELECT 267 + 'releases' as entity_type, 268 + COUNT(*) as total_count, 269 + COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant, 270 + COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant, 271 + ROUND( 272 + COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2 273 + ) as discriminant_percentage 274 + FROM releases; 275 + 276 + COMMENT ON VIEW discriminant_stats IS 'Statistics showing discriminant usage and extraction potential across entity types';

+252

migrations/20241220000008_fix_discriminant_case_sensitivity.sql

··· 1 + -- Fix case sensitivity in discriminant extraction patterns 2 + -- This migration updates the discriminant extraction functions to properly handle case-insensitive matching 3 + 4 + -- Drop dependent views first, then functions, then recreate everything 5 + DROP VIEW IF EXISTS discriminant_analysis CASCADE; 6 + DROP VIEW IF EXISTS discriminant_stats CASCADE; 7 + 8 + -- Drop existing functions to replace with case-insensitive versions 9 + DROP FUNCTION IF EXISTS extract_discriminant(TEXT) CASCADE; 10 + DROP FUNCTION IF EXISTS get_base_name(TEXT) CASCADE; 11 + DROP FUNCTION IF EXISTS extract_edition_discriminant(TEXT) CASCADE; 12 + 13 + -- Enhanced function to extract discriminants with case-insensitive matching 14 + CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 15 + DECLARE 16 + -- Comprehensive patterns for discriminant extraction with case-insensitive flags 17 + discriminant_patterns TEXT[] := ARRAY[ 18 + -- Parentheses patterns 19 + '(?i)$([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)$', 20 + '(?i)$([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)$', 21 + '(?i)$([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)$', 22 + '(?i)$([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)$', 23 + '(?i)$([^)]*(?:from|soundtrack|ost|score|theme).*?)$', 24 + 25 + -- Brackets patterns 26 + '(?i)\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]', 27 + '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]', 28 + '(?i)\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]', 29 + '(?i)\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]', 30 + '(?i)\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]', 31 + 32 + -- Braces patterns 33 + '(?i)\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}', 34 + '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}', 35 + '(?i)\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}', 36 + '(?i)\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}', 37 + '(?i)\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}', 38 + 39 + -- Dash/hyphen patterns (common for editions) 40 + '(?i)[-–—]\s*([^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$', 41 + '(?i)[-–—]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 42 + 43 + -- Colon patterns (common for subtitles and versions) 44 + '(?i):\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$', 45 + '(?i):\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 46 + ]; 47 + 48 + pattern TEXT; 49 + match_result TEXT; 50 + BEGIN 51 + -- Return early if input is null or empty 52 + IF name_text IS NULL OR trim(name_text) = '' THEN 53 + RETURN NULL; 54 + END IF; 55 + 56 + -- Try each pattern to find discriminant information 57 + FOREACH pattern IN ARRAY discriminant_patterns 58 + LOOP 59 + SELECT substring(name_text FROM pattern) INTO match_result; 60 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 61 + -- Clean up the match result 62 + match_result := trim(match_result); 63 + -- Remove leading/trailing punctuation 64 + match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 65 + -- Ensure it's not just whitespace or empty after cleanup 66 + IF length(trim(match_result)) > 0 THEN 67 + RETURN match_result; 68 + END IF; 69 + END IF; 70 + END LOOP; 71 + 72 + RETURN NULL; 73 + END; 74 + $$ LANGUAGE plpgsql IMMUTABLE; 75 + 76 + -- Enhanced function to get base name without discriminant with case-insensitive matching 77 + CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 78 + DECLARE 79 + -- Comprehensive cleanup patterns matching the extraction patterns 80 + cleanup_patterns TEXT[] := ARRAY[ 81 + -- Remove parentheses content 82 + '(?i)\s*$[^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?$\s*', 83 + '(?i)\s*$[^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$\s*', 84 + '(?i)\s*$[^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?$\s*', 85 + '(?i)\s*$[^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?$\s*', 86 + '(?i)\s*$[^)]*(?:from|soundtrack|ost|score|theme).*?$\s*', 87 + 88 + -- Remove brackets content 89 + '(?i)\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*', 90 + '(?i)\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*', 91 + '(?i)\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*', 92 + '(?i)\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*', 93 + '(?i)\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*', 94 + 95 + -- Remove braces content 96 + '(?i)\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*', 97 + '(?i)\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*', 98 + '(?i)\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*', 99 + '(?i)\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*', 100 + '(?i)\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*', 101 + 102 + -- Remove dash/hyphen patterns 103 + '(?i)\s*[-–—]\s*[^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$', 104 + '(?i)\s*[-–—]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 105 + 106 + -- Remove colon patterns 107 + '(?i)\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$', 108 + '(?i)\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 109 + ]; 110 + 111 + pattern TEXT; 112 + result_text TEXT := name_text; 113 + BEGIN 114 + -- Return early if input is null or empty 115 + IF name_text IS NULL OR trim(name_text) = '' THEN 116 + RETURN name_text; 117 + END IF; 118 + 119 + -- Remove discriminant patterns to get base name 120 + FOREACH pattern IN ARRAY cleanup_patterns 121 + LOOP 122 + result_text := regexp_replace(result_text, pattern, ' ', 'g'); 123 + END LOOP; 124 + 125 + -- Clean up extra whitespace and normalize 126 + result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 127 + 128 + -- Remove trailing punctuation that might be left after removal 129 + result_text := regexp_replace(result_text, '[,;:\-–—]\s*$', '', 'g'); 130 + result_text := trim(result_text); 131 + 132 + -- Ensure we don't return an empty string 133 + IF length(result_text) = 0 THEN 134 + RETURN name_text; 135 + END IF; 136 + 137 + RETURN result_text; 138 + END; 139 + $$ LANGUAGE plpgsql IMMUTABLE; 140 + 141 + -- Enhanced function to extract discriminant specifically for editions and versions with case-insensitive matching 142 + CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$ 143 + DECLARE 144 + -- Focused patterns for edition/version extraction with case-insensitive flags 145 + edition_patterns TEXT[] := ARRAY[ 146 + -- Edition patterns 147 + '(?i)$([^)]*edition[^)]*)$', 148 + '(?i)\[([^]]*edition[^]]*)\]', 149 + '(?i)\{([^}]*edition[^}]*)\}', 150 + '(?i)[-–—]\s*([^-–—]*edition[^-–—]*)$', 151 + '(?i):\s*([^:]*edition[^:]*)$', 152 + 153 + -- Version patterns 154 + '(?i)$([^)]*version[^)]*)$', 155 + '(?i)\[([^]]*version[^]]*)\]', 156 + '(?i)\{([^}]*version[^}]*)\}', 157 + '(?i)[-–—]\s*([^-–—]*version[^-–—]*)$', 158 + '(?i):\s*([^:]*version[^:]*)$', 159 + 160 + -- Remaster patterns 161 + '(?i)$([^)]*remaster[^)]*)$', 162 + '(?i)\[([^]]*remaster[^]]*)\]', 163 + '(?i)\{([^}]*remaster[^}]*)\}', 164 + '(?i)[-–—]\s*([^-–—]*remaster[^-–—]*)$', 165 + '(?i):\s*([^:]*remaster[^:]*)$', 166 + 167 + -- Year-based patterns 168 + '(?i)$([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)$', 169 + '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]', 170 + '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}' 171 + ]; 172 + 173 + pattern TEXT; 174 + match_result TEXT; 175 + BEGIN 176 + -- Return early if input is null or empty 177 + IF name_text IS NULL OR trim(name_text) = '' THEN 178 + RETURN NULL; 179 + END IF; 180 + 181 + -- Try edition-specific patterns first 182 + FOREACH pattern IN ARRAY edition_patterns 183 + LOOP 184 + SELECT substring(name_text FROM pattern) INTO match_result; 185 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 186 + match_result := trim(match_result); 187 + match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 188 + IF length(trim(match_result)) > 0 THEN 189 + RETURN match_result; 190 + END IF; 191 + END IF; 192 + END LOOP; 193 + 194 + RETURN NULL; 195 + END; 196 + $$ LANGUAGE plpgsql IMMUTABLE; 197 + 198 + -- Update existing records with newly extracted discriminants (case-insensitive) 199 + UPDATE recordings 200 + SET discriminant = extract_discriminant(name) 201 + WHERE discriminant IS NULL 202 + AND extract_discriminant(name) IS NOT NULL; 203 + 204 + UPDATE releases 205 + SET discriminant = extract_discriminant(name) 206 + WHERE discriminant IS NULL 207 + AND extract_discriminant(name) IS NOT NULL; 208 + 209 + UPDATE plays 210 + SET track_discriminant = extract_discriminant(track_name) 211 + WHERE track_discriminant IS NULL 212 + AND extract_discriminant(track_name) IS NOT NULL; 213 + 214 + UPDATE plays 215 + SET release_discriminant = extract_discriminant(release_name) 216 + WHERE release_discriminant IS NULL 217 + AND release_name IS NOT NULL 218 + AND extract_discriminant(release_name) IS NOT NULL; 219 + 220 + -- Update comments for the enhanced functions 221 + COMMENT ON FUNCTION extract_discriminant IS 'Enhanced case-insensitive discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons'; 222 + COMMENT ON FUNCTION get_base_name IS 'Enhanced case-insensitive base name extraction removing comprehensive discriminant patterns to enable proper grouping'; 223 + COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized case-insensitive function for extracting edition and version discriminants with focused patterns'; 224 + 225 + -- Refresh materialized views to reflect the case-insensitive improvements 226 + REFRESH MATERIALIZED VIEW mv_release_play_counts; 227 + REFRESH MATERIALIZED VIEW mv_recording_play_counts; 228 + 229 + -- Update discriminant analysis view to include case-insensitive results 230 + DROP VIEW IF EXISTS discriminant_analysis; 231 + CREATE OR REPLACE VIEW discriminant_analysis AS 232 + SELECT 233 + 'recordings' as table_name, 234 + name as original_name, 235 + discriminant, 236 + get_base_name(name) as base_name, 237 + extract_discriminant(name) as extracted_discriminant, 238 + extract_edition_discriminant(name) as edition_discriminant 239 + FROM recordings 240 + WHERE name IS NOT NULL 241 + UNION ALL 242 + SELECT 243 + 'releases' as table_name, 244 + name as original_name, 245 + discriminant, 246 + get_base_name(name) as base_name, 247 + extract_discriminant(name) as extracted_discriminant, 248 + extract_edition_discriminant(name) as edition_discriminant 249 + FROM releases 250 + WHERE name IS NOT NULL; 251 + 252 + COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing case-insensitive discriminant extraction results for quality assessment and debugging';

+12 -3

package.json

··· 7 7 "dev": "turbo dev", 8 8 "build": "pnpm turbo run build --filter='./packages/*' --filter='./apps/*'", 9 9 "build:rust": "turbo run build:rust", 10 - "typecheck": "pnpm -r exec tsc --noEmit", 10 + "typecheck": "pnpm -r --filter='!./vendor/*' exec tsc --noEmit", 11 11 "test": "turbo run test test:rust", 12 - "rust:fmt": "cd services && cargo fmt", 13 - "rust:clippy": "cd services && cargo clippy", 12 + "rust:fmt": "pnpm rust:fmt:services && pnpm rust:fmt:apps", 13 + "rust:clippy": "pnpm rust:clippy:services && pnpm rust:clippy:apps", 14 + "rust:fmt:services": "cd services && cargo fmt", 15 + "rust:clippy:services": "cd services && cargo clippy -- -D warnings", 16 + "rust:fmt:apps": "for dir in apps/*/; do if [ -f \"$dir/Cargo.toml\" ]; then echo \"Formatting $dir\" && cd \"$dir\" && cargo fmt && cd ../..; fi; done", 17 + "rust:clippy:apps": "for dir in apps/*/; do if [ -f \"$dir/Cargo.toml\" ]; then echo \"Linting $dir\" && cd \"$dir\" && cargo clippy -- -D warnings && cd ../..; fi; done", 14 18 "fix": "biome lint --apply . && biome format --write . && biome check . --apply", 19 + "hooks:install": "./scripts/install-git-hooks.sh", 20 + "hooks:install-precommit": "pre-commit install", 21 + "postinstall": "pnpm lex:gen-server", 15 22 "nuke": "rimraf node_modules */*/node_modules", 16 23 "lex:gen-server": "turbo lex:gen-server", 17 24 "format": "prettier --write .", ··· 19 26 "lex:watch": "cd tools/lexicon-cli && node dist/index.js watch", 20 27 "lex:validate": "cd tools/lexicon-cli && node dist/index.js validate", 21 28 "lex:diff": "cd tools/lexicon-cli && node dist/index.js diff", 29 + "lex:build-amethyst": "pnpm lex:gen-server && pnpm turbo build --filter=@teal/amethyst", 30 + "lex:dev": "pnpm lex:gen-server && pnpm turbo dev --filter=@teal/amethyst", 22 31 "db:migrate": "cd services && sqlx migrate run", 23 32 "db:migrate:revert": "cd services && sqlx migrate revert", 24 33 "db:create": "cd services && sqlx database create",

+25

packages/lexicons/lex-gen.sh

··· 1 + #!/bin/bash 2 + set -e 3 + 4 + # Navigate to the lexicons directory and find all .json files 5 + cd ../../lexicons 6 + json_files=$(find . -name "*.json" -type f) 7 + 8 + # Go back to the lexicons package directory 9 + cd ../packages/lexicons 10 + 11 + # Check if we found any lexicon files 12 + if [ -z "$json_files" ]; then 13 + echo "No lexicon files found in ../../lexicons/" 14 + exit 1 15 + fi 16 + 17 + # Convert the file list to absolute paths 18 + lexicon_paths="" 19 + for file in $json_files; do 20 + lexicon_paths="$lexicon_paths ../../lexicons/$file" 21 + done 22 + 23 + # Generate lexicons 24 + echo "Generating lexicons from: $lexicon_paths" 25 + lex gen-server ./src $lexicon_paths --yes

+14

packages/lexicons/package.json

··· 1 + { 2 + "name": "@teal/lexicons", 3 + "type": "module", 4 + "main": "./index.ts", 5 + "dependencies": { 6 + "@atproto/lex-cli": "^0.5.4", 7 + "@atproto/lexicon": "^0.4.2", 8 + "@atproto/xrpc-server": "^0.7.4", 9 + "@teal/tsconfig": "workspace:*" 10 + }, 11 + "scripts": { 12 + "lex:gen-server": "bash ./lex-gen.sh" 13 + } 14 + }

-4

pnpm-lock.yaml

··· 254 254 255 255 services/cadet: {} 256 256 257 - services/rocketman: {} 258 - 259 257 services/satellite: {} 260 - 261 - services/types: {} 262 258 263 259 tools/lexicon-cli: 264 260 dependencies:

+100

scripts/install-git-hooks.sh

··· 1 + #!/bin/bash 2 + 3 + # Install git hooks for the Teal project 4 + # This script sets up pre-commit hooks for code formatting and linting 5 + 6 + set -e 7 + 8 + # Colors for output 9 + RED='\033[0;31m' 10 + GREEN='\033[0;32m' 11 + YELLOW='\033[1;33m' 12 + BLUE='\033[0;34m' 13 + NC='\033[0m' # No Color 14 + 15 + print_status() { 16 + echo -e "${BLUE}[INFO]${NC} $1" 17 + } 18 + 19 + print_success() { 20 + echo -e "${GREEN}[SUCCESS]${NC} $1" 21 + } 22 + 23 + print_error() { 24 + echo -e "${RED}[ERROR]${NC} $1" 25 + } 26 + 27 + print_warning() { 28 + echo -e "${YELLOW}[WARNING]${NC} $1" 29 + } 30 + 31 + # Check if we're in a git repository 32 + if [ ! -d ".git" ]; then 33 + print_error "This script must be run from the root of a git repository" 34 + exit 1 35 + fi 36 + 37 + print_status "Installing git hooks for Teal project..." 38 + 39 + # Create hooks directory if it doesn't exist 40 + mkdir -p .git/hooks 41 + 42 + # Install pre-commit hook 43 + if [ -f "scripts/pre-commit-hook.sh" ]; then 44 + print_status "Installing pre-commit hook..." 45 + cp scripts/pre-commit-hook.sh .git/hooks/pre-commit 46 + chmod +x .git/hooks/pre-commit 47 + print_success "Pre-commit hook installed" 48 + else 49 + print_error "Pre-commit hook script not found at scripts/pre-commit-hook.sh" 50 + exit 1 51 + fi 52 + 53 + # Optional: Install other hooks 54 + # You can add more hooks here if needed 55 + 56 + print_status "Testing hook installation..." 57 + 58 + # Test if the hook is executable 59 + if [ -x ".git/hooks/pre-commit" ]; then 60 + print_success "Pre-commit hook is executable" 61 + else 62 + print_error "Pre-commit hook is not executable" 63 + exit 1 64 + fi 65 + 66 + # Check if required tools are available 67 + print_status "Checking required tools..." 68 + 69 + MISSING_TOOLS="" 70 + 71 + if ! command -v pnpm >/dev/null 2>&1; then 72 + MISSING_TOOLS="$MISSING_TOOLS pnpm" 73 + fi 74 + 75 + if ! command -v node >/dev/null 2>&1; then 76 + MISSING_TOOLS="$MISSING_TOOLS node" 77 + fi 78 + 79 + if ! command -v cargo >/dev/null 2>&1; then 80 + MISSING_TOOLS="$MISSING_TOOLS cargo" 81 + fi 82 + 83 + if [ -n "$MISSING_TOOLS" ]; then 84 + print_warning "Some tools are missing:$MISSING_TOOLS" 85 + print_warning "The git hooks may not work properly without these tools" 86 + else 87 + print_success "All required tools are available" 88 + fi 89 + 90 + print_success "Git hooks installation complete! 🎉" 91 + print_status "The following hooks have been installed:" 92 + echo " - pre-commit: Runs formatting and linting checks before commits" 93 + 94 + print_status "To test the pre-commit hook, try making a commit with staged files" 95 + print_status "To temporarily skip hooks, use: git commit --no-verify" 96 + 97 + # Optional: Show hook status 98 + echo "" 99 + print_status "Installed hooks:" 100 + ls -la .git/hooks/ | grep -v sample | grep -v "^d" | sed 's/^/ /'

+213

scripts/pre-commit-hook.sh

··· 1 + #!/bin/bash 2 + 3 + # Pre-commit hook for Teal project 4 + # This script runs code formatting and linting checks before allowing commits 5 + 6 + set -e 7 + 8 + echo "🔍 Running pre-commit checks..." 9 + 10 + # Colors for output 11 + RED='\033[0;31m' 12 + GREEN='\033[0;32m' 13 + YELLOW='\033[1;33m' 14 + BLUE='\033[0;34m' 15 + NC='\033[0m' # No Color 16 + 17 + # Function to print colored output 18 + print_status() { 19 + echo -e "${BLUE}[INFO]${NC} $1" 20 + } 21 + 22 + print_success() { 23 + echo -e "${GREEN}[SUCCESS]${NC} $1" 24 + } 25 + 26 + print_warning() { 27 + echo -e "${YELLOW}[WARNING]${NC} $1" 28 + } 29 + 30 + print_error() { 31 + echo -e "${RED}[ERROR]${NC} $1" 32 + } 33 + 34 + # Get list of staged files 35 + STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM) 36 + 37 + if [ -z "$STAGED_FILES" ]; then 38 + print_warning "No staged files found" 39 + exit 0 40 + fi 41 + 42 + # Check if we have TypeScript/JavaScript files 43 + TS_JS_FILES=$(echo "$STAGED_FILES" | grep -E '\.(ts|tsx|js|jsx)$' || true) 44 + # Check if we have Rust files 45 + RUST_FILES=$(echo "$STAGED_FILES" | grep -E '\.rs$' || true) 46 + # Check if we have lexicon files 47 + LEXICON_FILES=$(echo "$STAGED_FILES" | grep -E 'lexicons/.*\.json$' || true) 48 + 49 + print_status "Staged files to check:" 50 + echo "$STAGED_FILES" | sed 's/^/ - /' 51 + 52 + # 1. TypeScript/JavaScript checks 53 + if [ -n "$TS_JS_FILES" ]; then 54 + print_status "Running TypeScript/JavaScript checks..." 55 + 56 + # Check if biome is available and run it 57 + if command -v pnpm >/dev/null 2>&1; then 58 + print_status "Running Biome formatting and linting..." 59 + if ! pnpm biome check . --apply --no-errors-on-unmatched 2>/dev/null; then 60 + print_error "Biome check failed. Please fix the issues and try again." 61 + exit 1 62 + fi 63 + 64 + print_status "Running Prettier formatting..." 65 + if ! pnpm prettier --write $TS_JS_FILES 2>/dev/null; then 66 + print_error "Prettier formatting failed. Please fix the issues and try again." 67 + exit 1 68 + fi 69 + 70 + # TypeScript checking temporarily disabled due to vendor compilation issues 71 + # Re-enable once vendor code is fixed 72 + else 73 + print_warning "pnpm not found, skipping JS/TS checks" 74 + fi 75 + fi 76 + 77 + # 2. Rust checks 78 + if [ -n "$RUST_FILES" ]; then 79 + print_status "Running Rust checks..." 80 + 81 + if command -v cargo >/dev/null 2>&1; then 82 + RUST_ERRORS=0 83 + 84 + # Check services workspace 85 + if [ -f "services/Cargo.toml" ]; then 86 + print_status "Running cargo fmt on services workspace..." 87 + if ! (cd services && cargo fmt --check) 2>/dev/null; then 88 + print_status "Auto-formatting Rust code in services..." 89 + (cd services && cargo fmt) 2>/dev/null || true 90 + fi 91 + 92 + print_status "Running cargo clippy on services workspace..." 93 + if (cd services && cargo check); then 94 + if ! (cd services && cargo clippy -- -D warnings); then 95 + print_warning "Cargo clippy found issues in services workspace. Please fix the warnings." 96 + print_warning "Run 'pnpm rust:clippy:services' to see detailed errors." 97 + # Don't fail the commit for clippy warnings, just warn 98 + fi 99 + else 100 + print_warning "Services workspace has compilation errors. Skipping clippy." 101 + print_warning "Run 'pnpm rust:clippy:services' to see detailed errors." 102 + fi 103 + fi 104 + 105 + # Check individual Rust projects outside services 106 + CHECKED_DIRS="" 107 + for rust_file in $RUST_FILES; do 108 + rust_dir=$(dirname "$rust_file") 109 + # Find the nearest Cargo.toml going up the directory tree 110 + check_dir="$rust_dir" 111 + while [ "$check_dir" != "." ] && [ "$check_dir" != "/" ]; do 112 + if [ -f "$check_dir/Cargo.toml" ] && [ "$check_dir" != "services" ]; then 113 + # Skip if we already checked this directory 114 + if echo "$CHECKED_DIRS" | grep -q "$check_dir"; then 115 + break 116 + fi 117 + CHECKED_DIRS="$CHECKED_DIRS $check_dir" 118 + 119 + # Found a Cargo.toml outside services workspace 120 + print_status "Running cargo fmt on $check_dir..." 121 + if ! (cd "$check_dir" && cargo fmt --check) 2>/dev/null; then 122 + print_status "Auto-formatting Rust code in $check_dir..." 123 + (cd "$check_dir" && cargo fmt) 2>/dev/null || true 124 + fi 125 + 126 + print_status "Running cargo clippy on $check_dir..." 127 + if (cd "$check_dir" && cargo check); then 128 + if ! (cd "$check_dir" && cargo clippy -- -D warnings); then 129 + print_error "Cargo clippy found issues in $check_dir. Please fix the warnings and try again." 130 + RUST_ERRORS=1 131 + fi 132 + else 133 + print_warning "Project $check_dir has compilation errors. Skipping clippy." 134 + print_warning "Run 'cd $check_dir && cargo check' to see detailed errors." 135 + fi 136 + break 137 + fi 138 + check_dir=$(dirname "$check_dir") 139 + done 140 + done 141 + 142 + if [ $RUST_ERRORS -eq 1 ]; then 143 + exit 1 144 + fi 145 + else 146 + print_warning "Cargo not found, skipping Rust checks" 147 + fi 148 + fi 149 + 150 + # 3. Lexicon checks 151 + if [ -n "$LEXICON_FILES" ]; then 152 + print_status "Lexicon files changed, validating and regenerating..." 153 + 154 + if command -v pnpm >/dev/null 2>&1; then 155 + print_status "Validating lexicons..." 156 + if ! pnpm lex:validate 2>/dev/null; then 157 + print_error "Lexicon validation failed. Please fix the lexicon files and try again." 158 + exit 1 159 + fi 160 + 161 + print_status "Regenerating lexicons..." 162 + if ! pnpm lex:gen-server 2>/dev/null; then 163 + print_error "Lexicon generation failed. Please check the lexicon files and try again." 164 + exit 1 165 + fi 166 + 167 + # Note: Generated lexicon files are ignored by .gitignore and not added to staging 168 + print_status "Generated lexicon files are ignored by .gitignore (as intended)" 169 + else 170 + print_warning "pnpm not found, skipping lexicon checks" 171 + fi 172 + fi 173 + 174 + # 4. Re-add files that might have been formatted 175 + FORMATTED_FILES="" 176 + for file in $STAGED_FILES; do 177 + if [ -f "$file" ]; then 178 + # Check if file was modified by formatters 179 + if [ -n "$(git diff "$file")" ]; then 180 + FORMATTED_FILES="$FORMATTED_FILES $file" 181 + git add "$file" 182 + fi 183 + fi 184 + done 185 + 186 + if [ -n "$FORMATTED_FILES" ]; then 187 + print_success "Auto-formatted files have been re-staged:" 188 + echo "$FORMATTED_FILES" | tr ' ' '\n' | sed 's/^/ - /' 189 + fi 190 + 191 + # 5. Final validation - ensure no syntax errors in staged files 192 + print_status "Running final validation..." 193 + 194 + # Check for common issues 195 + for file in $TS_JS_FILES; do 196 + if [ -f "$file" ]; then 197 + # Check for console.log statements (optional - remove if you want to allow them) 198 + if grep -n "console\.log" "$file" >/dev/null 2>&1; then 199 + print_warning "Found console.log statements in $file! yooo!!!" 200 + # Uncomment the next two lines if you want to block commits with console.log 201 + # print_error "Please remove console.log statements before committing" 202 + # exit 1 203 + fi 204 + 205 + # Check for TODO/FIXME comments in committed code (optional) 206 + if grep -n -i "TODO\|FIXME" "$file" >/dev/null 2>&1; then 207 + print_warning "Found TODO/FIXME comments in $file" 208 + fi 209 + fi 210 + done 211 + 212 + print_success "All pre-commit checks passed! 🎉" 213 + exit 0

+66

scripts/setup-lexicons.sh

··· 1 + #!/bin/bash 2 + # scripts/setup-lexicons.sh 3 + # Setup script for ATProto lexicons submodule and symbolic links 4 + 5 + set -e 6 + 7 + echo "Setting up lexicons..." 8 + 9 + # Check if we're in the right directory 10 + if [ ! -f "package.json" ] || [ ! -d "lexicons" ]; then 11 + echo "Error: This script must be run from the project root directory" 12 + exit 1 13 + fi 14 + 15 + # Initialize submodules 16 + echo "Initializing submodules..." 17 + git submodule update --init --recursive 18 + 19 + # Check if vendor/atproto exists 20 + if [ ! -d "vendor/atproto" ]; then 21 + echo "Error: vendor/atproto submodule not found" 22 + exit 1 23 + fi 24 + 25 + # Create symbolic links if they don't exist 26 + echo "Creating symbolic links..." 27 + cd lexicons 28 + 29 + if [ ! -L app ]; then 30 + ln -s ../vendor/atproto/lexicons/app app 31 + echo "Created symlink: lexicons/app" 32 + else 33 + echo "Symlink already exists: lexicons/app" 34 + fi 35 + 36 + if [ ! -L chat ]; then 37 + ln -s ../vendor/atproto/lexicons/chat chat 38 + echo "Created symlink: lexicons/chat" 39 + else 40 + echo "Symlink already exists: lexicons/chat" 41 + fi 42 + 43 + if [ ! -L com ]; then 44 + ln -s ../vendor/atproto/lexicons/com com 45 + echo "Created symlink: lexicons/com" 46 + else 47 + echo "Symlink already exists: lexicons/com" 48 + fi 49 + 50 + if [ ! -L tools ]; then 51 + ln -s ../vendor/atproto/lexicons/tools tools 52 + echo "Created symlink: lexicons/tools" 53 + else 54 + echo "Symlink already exists: lexicons/tools" 55 + fi 56 + 57 + cd .. 58 + 59 + echo "Lexicons setup complete!" 60 + echo "" 61 + echo "You should now have access to:" 62 + echo " - lexicons/app -> ATProto app lexicons" 63 + echo " - lexicons/chat -> ATProto chat lexicons" 64 + echo " - lexicons/com -> ATProto protocol lexicons" 65 + echo " - lexicons/tools -> ATProto tools lexicons" 66 + echo " - lexicons/fm.teal.alpha -> Custom Teal lexicons"

+69

scripts/setup-sqlx-offline.sh

··· 1 + #!/bin/bash 2 + 3 + # Script to copy .sqlx files to all Rust projects that use SQLx 4 + # This is needed for offline SQLx builds (SQLX_OFFLINE=true) 5 + 6 + set -e 7 + 8 + # Get the script directory (should be in teal/scripts/) 9 + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 10 + PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" 11 + 12 + # Source .sqlx directory 13 + SQLX_SOURCE="$PROJECT_ROOT/.sqlx" 14 + 15 + # List of projects that use SQLx (relative to project root) 16 + SQLX_PROJECTS=( 17 + "apps/aqua" 18 + "services/cadet" 19 + "services/satellite" 20 + ) 21 + 22 + echo "🔧 Setting up SQLx offline files..." 23 + 24 + # Check if source .sqlx directory exists 25 + if [ ! -d "$SQLX_SOURCE" ]; then 26 + echo "❌ Source .sqlx directory not found at: $SQLX_SOURCE" 27 + echo " Make sure you've run 'cargo sqlx prepare' from the services directory first." 28 + exit 1 29 + fi 30 + 31 + # Copy .sqlx files to each project that needs them 32 + for project in "${SQLX_PROJECTS[@]}"; do 33 + project_path="$PROJECT_ROOT/$project" 34 + target_sqlx="$project_path/.sqlx" 35 + 36 + if [ ! -d "$project_path" ]; then 37 + echo "⚠️ Project directory not found: $project_path (skipping)" 38 + continue 39 + fi 40 + 41 + # Check if project actually uses SQLx 42 + if [ ! -f "$project_path/Cargo.toml" ]; then 43 + echo "⚠️ No Cargo.toml found in $project (skipping)" 44 + continue 45 + fi 46 + 47 + if ! grep -q "sqlx" "$project_path/Cargo.toml"; then 48 + echo "⚠️ Project $project doesn't appear to use SQLx (skipping)" 49 + continue 50 + fi 51 + 52 + echo "📦 Copying .sqlx files to $project..." 53 + 54 + # Remove existing .sqlx directory if it exists 55 + if [ -d "$target_sqlx" ]; then 56 + rm -rf "$target_sqlx" 57 + fi 58 + 59 + # Copy the .sqlx directory 60 + cp -r "$SQLX_SOURCE" "$target_sqlx" 61 + 62 + echo " ✅ Copied $(ls -1 "$target_sqlx" | wc -l) query files" 63 + done 64 + 65 + echo "✅ SQLx offline setup complete!" 66 + echo "" 67 + echo "Note: If you add new SQL queries or modify existing ones, you'll need to:" 68 + echo "1. Run 'cargo sqlx prepare' from the services directory" 69 + echo "2. Run this script again to update all project copies"

+62

scripts/update-lexicons.sh

··· 1 + #!/bin/bash 2 + # scripts/update-lexicons.sh 3 + # Update script for ATProto lexicons from upstream 4 + 5 + set -e 6 + 7 + echo "Updating ATProto lexicons..." 8 + 9 + # Check if we're in the right directory 10 + if [ ! -f "package.json" ] || [ ! -d "vendor/atproto" ]; then 11 + echo "Error: This script must be run from the project root directory" 12 + echo "Make sure vendor/atproto submodule exists" 13 + exit 1 14 + fi 15 + 16 + # Save current directory 17 + PROJECT_ROOT=$(pwd) 18 + 19 + # Update the submodule 20 + echo "Fetching latest changes from atproto repository..." 21 + cd vendor/atproto 22 + 23 + # Fetch latest changes 24 + git fetch origin 25 + 26 + # Get current commit 27 + CURRENT_COMMIT=$(git rev-parse HEAD) 28 + CURRENT_SHORT=$(git rev-parse --short HEAD) 29 + 30 + # Get latest commit on main 31 + LATEST_COMMIT=$(git rev-parse origin/main) 32 + LATEST_SHORT=$(git rev-parse --short origin/main) 33 + 34 + if [ "$CURRENT_COMMIT" = "$LATEST_COMMIT" ]; then 35 + echo "Already up to date (${CURRENT_SHORT})" 36 + cd "$PROJECT_ROOT" 37 + exit 0 38 + fi 39 + 40 + echo "Updating from ${CURRENT_SHORT} to ${LATEST_SHORT}..." 41 + 42 + # Pull latest changes 43 + git pull origin main 44 + 45 + # Go back to project root 46 + cd "$PROJECT_ROOT" 47 + 48 + # Stage the submodule update 49 + git add vendor/atproto 50 + 51 + # Show what changed 52 + echo "" 53 + echo "Submodule updated successfully!" 54 + echo "Changes:" 55 + git diff --cached --submodule=log vendor/atproto 56 + 57 + echo "" 58 + echo "To complete the update, commit the changes:" 59 + echo " git commit -m \"Update atproto lexicons to ${LATEST_SHORT}\"" 60 + echo "" 61 + echo "Or to see what lexicon files changed:" 62 + echo " cd vendor/atproto && git log --oneline ${CURRENT_SHORT}..${LATEST_SHORT} -- lexicons/"

-6

services/.sqlx/.sqlxrc

··· 1 - [database] 2 - url = "postgres://localhost/teal" 3 - migrations = "./migrations" 4 - 5 - [compile_time_verification] 6 - offline = false

+96 -366

services/Cargo.lock

··· 60 60 ] 61 61 62 62 [[package]] 63 - name = "anstream" 64 - version = "0.6.19" 65 - source = "registry+https://github.com/rust-lang/crates.io-index" 66 - checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" 67 - dependencies = [ 68 - "anstyle", 69 - "anstyle-parse", 70 - "anstyle-query", 71 - "anstyle-wincon", 72 - "colorchoice", 73 - "is_terminal_polyfill", 74 - "utf8parse", 75 - ] 76 - 77 - [[package]] 78 - name = "anstyle" 79 - version = "1.0.11" 80 - source = "registry+https://github.com/rust-lang/crates.io-index" 81 - checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" 82 - 83 - [[package]] 84 - name = "anstyle-parse" 85 - version = "0.2.7" 86 - source = "registry+https://github.com/rust-lang/crates.io-index" 87 - checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" 88 - dependencies = [ 89 - "utf8parse", 90 - ] 91 - 92 - [[package]] 93 - name = "anstyle-query" 94 - version = "1.1.3" 95 - source = "registry+https://github.com/rust-lang/crates.io-index" 96 - checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" 97 - dependencies = [ 98 - "windows-sys 0.59.0", 99 - ] 100 - 101 - [[package]] 102 - name = "anstyle-wincon" 103 - version = "3.0.9" 104 - source = "registry+https://github.com/rust-lang/crates.io-index" 105 - checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" 106 - dependencies = [ 107 - "anstyle", 108 - "once_cell_polyfill", 109 - "windows-sys 0.59.0", 110 - ] 111 - 112 - [[package]] 113 63 name = "anyhow" 114 64 version = "1.0.98" 115 65 source = "registry+https://github.com/rust-lang/crates.io-index" 116 66 checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" 117 67 118 68 [[package]] 119 - name = "aqua" 120 - version = "0.1.0" 121 - dependencies = [ 122 - "anyhow", 123 - "async-trait", 124 - "atrium-api", 125 - "axum", 126 - "base64", 127 - "chrono", 128 - "clap", 129 - "dotenvy", 130 - "iroh-car", 131 - "redis", 132 - "reqwest", 133 - "serde", 134 - "serde_json", 135 - "sqlx", 136 - "sys-info", 137 - "time", 138 - "tokio", 139 - "tower-http", 140 - "tracing", 141 - "tracing-subscriber", 142 - "types", 143 - "url", 144 - "uuid", 145 - "vergen", 146 - "vergen-gitcl", 147 - ] 148 - 149 - [[package]] 150 69 name = "arc-swap" 151 70 version = "1.7.1" 152 71 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 187 106 ] 188 107 189 108 [[package]] 109 + name = "atmst" 110 + version = "0.0.1" 111 + source = "registry+https://github.com/rust-lang/crates.io-index" 112 + checksum = "aeb2a4631a64a242ae62c3ceb140adfa2a8bdacb1b22a6549db5de2ce3389c1d" 113 + dependencies = [ 114 + "async-trait", 115 + "bytes", 116 + "cid 0.11.1", 117 + "dashmap", 118 + "futures", 119 + "ipld-core", 120 + "iroh-car 0.5.1", 121 + "log", 122 + "multihash 0.19.3", 123 + "serde", 124 + "serde_ipld_dagcbor", 125 + "serde_ipld_dagjson", 126 + "sha2", 127 + "thiserror 1.0.69", 128 + "tokio", 129 + ] 130 + 131 + [[package]] 190 132 name = "atoi" 191 133 version = "2.0.0" 192 134 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 287 229 checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" 288 230 dependencies = [ 289 231 "axum-core", 290 - "axum-macros", 291 232 "bytes", 292 233 "form_urlencoded", 293 234 "futures-util", ··· 300 241 "matchit", 301 242 "memchr", 302 243 "mime", 303 - "multer", 304 244 "percent-encoding", 305 245 "pin-project-lite", 306 246 "rustversion", ··· 337 277 ] 338 278 339 279 [[package]] 340 - name = "axum-macros" 341 - version = "0.5.0" 342 - source = "registry+https://github.com/rust-lang/crates.io-index" 343 - checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" 344 - dependencies = [ 345 - "proc-macro2", 346 - "quote", 347 - "syn 2.0.104", 348 - ] 349 - 350 - [[package]] 351 280 name = "backtrace" 352 281 version = "0.3.75" 353 282 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 504 433 version = "1.10.1" 505 434 source = "registry+https://github.com/rust-lang/crates.io-index" 506 435 checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 436 + dependencies = [ 437 + "serde", 438 + ] 507 439 508 440 [[package]] 509 441 name = "cadet" ··· 511 443 dependencies = [ 512 444 "anyhow", 513 445 "async-trait", 446 + "atmst", 514 447 "atrium-api", 515 448 "base64", 516 449 "chrono", 517 450 "cid 0.11.1", 518 451 "dotenvy", 519 452 "flume", 520 - "iroh-car", 453 + "futures", 454 + "iroh-car 0.4.0", 521 455 "libipld", 522 456 "metrics 0.23.1", 523 457 "metrics-exporter-prometheus", ··· 528 462 "reqwest", 529 463 "rocketman", 530 464 "serde", 465 + "serde_ipld_dagcbor", 531 466 "serde_json", 532 467 "sqlx", 533 468 "time", ··· 541 476 ] 542 477 543 478 [[package]] 544 - name = "camino" 545 - version = "1.1.10" 546 - source = "registry+https://github.com/rust-lang/crates.io-index" 547 - checksum = "0da45bc31171d8d6960122e222a67740df867c1dd53b4d51caa297084c185cab" 548 - dependencies = [ 549 - "serde", 550 - ] 551 - 552 - [[package]] 553 - name = "cargo-platform" 554 - version = "0.1.9" 555 - source = "registry+https://github.com/rust-lang/crates.io-index" 556 - checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea" 557 - dependencies = [ 558 - "serde", 559 - ] 560 - 561 - [[package]] 562 - name = "cargo_metadata" 563 - version = "0.19.2" 564 - source = "registry+https://github.com/rust-lang/crates.io-index" 565 - checksum = "dd5eb614ed4c27c5d706420e4320fbe3216ab31fa1c33cd8246ac36dae4479ba" 566 - dependencies = [ 567 - "camino", 568 - "cargo-platform", 569 - "semver", 570 - "serde", 571 - "serde_json", 572 - "thiserror 2.0.12", 573 - ] 574 - 575 - [[package]] 576 479 name = "cbor4ii" 577 480 version = "0.2.14" 578 481 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 661 564 ] 662 565 663 566 [[package]] 664 - name = "clap" 665 - version = "4.5.41" 666 - source = "registry+https://github.com/rust-lang/crates.io-index" 667 - checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" 668 - dependencies = [ 669 - "clap_builder", 670 - "clap_derive", 671 - ] 672 - 673 - [[package]] 674 - name = "clap_builder" 675 - version = "4.5.41" 676 - source = "registry+https://github.com/rust-lang/crates.io-index" 677 - checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" 678 - dependencies = [ 679 - "anstream", 680 - "anstyle", 681 - "clap_lex", 682 - "strsim", 683 - ] 684 - 685 - [[package]] 686 - name = "clap_derive" 687 - version = "4.5.41" 688 - source = "registry+https://github.com/rust-lang/crates.io-index" 689 - checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" 690 - dependencies = [ 691 - "heck", 692 - "proc-macro2", 693 - "quote", 694 - "syn 2.0.104", 695 - ] 696 - 697 - [[package]] 698 - name = "clap_lex" 699 - version = "0.7.5" 700 - source = "registry+https://github.com/rust-lang/crates.io-index" 701 - checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" 702 - 703 - [[package]] 704 567 name = "cmake" 705 568 version = "0.1.54" 706 569 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 708 571 dependencies = [ 709 572 "cc", 710 573 ] 711 - 712 - [[package]] 713 - name = "colorchoice" 714 - version = "1.0.4" 715 - source = "registry+https://github.com/rust-lang/crates.io-index" 716 - checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 717 574 718 575 [[package]] 719 576 name = "combine" ··· 1296 1153 "libc", 1297 1154 "log", 1298 1155 "rustversion", 1299 - "windows 0.61.3", 1156 + "windows", 1300 1157 ] 1301 1158 1302 1159 [[package]] ··· 1568 1425 "js-sys", 1569 1426 "log", 1570 1427 "wasm-bindgen", 1571 - "windows-core 0.61.2", 1428 + "windows-core", 1572 1429 ] 1573 1430 1574 1431 [[package]] ··· 1757 1614 ] 1758 1615 1759 1616 [[package]] 1760 - name = "is_terminal_polyfill" 1761 - version = "1.70.1" 1617 + name = "iroh-car" 1618 + version = "0.5.1" 1762 1619 source = "registry+https://github.com/rust-lang/crates.io-index" 1763 - checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 1620 + checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 1621 + dependencies = [ 1622 + "anyhow", 1623 + "cid 0.11.1", 1624 + "futures", 1625 + "serde", 1626 + "serde_ipld_dagcbor", 1627 + "thiserror 1.0.69", 1628 + "tokio", 1629 + "unsigned-varint 0.7.2", 1630 + ] 1764 1631 1765 1632 [[package]] 1766 1633 name = "itertools" ··· 2150 2017 ] 2151 2018 2152 2019 [[package]] 2153 - name = "multer" 2154 - version = "3.1.0" 2155 - source = "registry+https://github.com/rust-lang/crates.io-index" 2156 - checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b" 2157 - dependencies = [ 2158 - "bytes", 2159 - "encoding_rs", 2160 - "futures-util", 2161 - "http", 2162 - "httparse", 2163 - "memchr", 2164 - "mime", 2165 - "spin", 2166 - "version_check", 2167 - ] 2168 - 2169 - [[package]] 2170 2020 name = "multibase" 2171 2021 version = "0.9.1" 2172 2022 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2300 2150 ] 2301 2151 2302 2152 [[package]] 2303 - name = "ntapi" 2304 - version = "0.4.1" 2305 - source = "registry+https://github.com/rust-lang/crates.io-index" 2306 - checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" 2307 - dependencies = [ 2308 - "winapi", 2309 - ] 2310 - 2311 - [[package]] 2312 2153 name = "nu-ansi-term" 2313 2154 version = "0.46.0" 2314 2155 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2383 2224 ] 2384 2225 2385 2226 [[package]] 2386 - name = "num_threads" 2387 - version = "0.1.7" 2388 - source = "registry+https://github.com/rust-lang/crates.io-index" 2389 - checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" 2390 - dependencies = [ 2391 - "libc", 2392 - ] 2393 - 2394 - [[package]] 2395 - name = "objc2-core-foundation" 2396 - version = "0.3.1" 2397 - source = "registry+https://github.com/rust-lang/crates.io-index" 2398 - checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166" 2399 - dependencies = [ 2400 - "bitflags 2.9.1", 2401 - ] 2402 - 2403 - [[package]] 2404 2227 name = "object" 2405 2228 version = "0.36.7" 2406 2229 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2414 2237 version = "1.21.3" 2415 2238 source = "registry+https://github.com/rust-lang/crates.io-index" 2416 2239 checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 2417 - 2418 - [[package]] 2419 - name = "once_cell_polyfill" 2420 - version = "1.70.1" 2421 - source = "registry+https://github.com/rust-lang/crates.io-index" 2422 - checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 2423 2240 2424 2241 [[package]] 2425 2242 name = "openssl" ··· 3019 2836 dependencies = [ 3020 2837 "aws-lc-rs", 3021 2838 "once_cell", 2839 + "ring", 3022 2840 "rustls-pki-types", 3023 2841 "rustls-webpki", 3024 2842 "subtle", ··· 3150 2968 version = "1.0.26" 3151 2969 source = "registry+https://github.com/rust-lang/crates.io-index" 3152 2970 checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" 3153 - dependencies = [ 3154 - "serde", 3155 - ] 3156 2971 3157 2972 [[package]] 3158 2973 name = "serde" ··· 3209 3024 ] 3210 3025 3211 3026 [[package]] 3027 + name = "serde_ipld_dagjson" 3028 + version = "0.2.0" 3029 + source = "registry+https://github.com/rust-lang/crates.io-index" 3030 + checksum = "3359b47ba7f4a306ef5984665e10539e212e97217afa489437d533208eecda36" 3031 + dependencies = [ 3032 + "ipld-core", 3033 + "serde", 3034 + "serde_json", 3035 + ] 3036 + 3037 + [[package]] 3212 3038 name = "serde_json" 3213 3039 version = "1.0.141" 3214 3040 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3296 3122 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 3297 3123 3298 3124 [[package]] 3125 + name = "signal-hook-registry" 3126 + version = "1.4.5" 3127 + source = "registry+https://github.com/rust-lang/crates.io-index" 3128 + checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" 3129 + dependencies = [ 3130 + "libc", 3131 + ] 3132 + 3133 + [[package]] 3299 3134 name = "signature" 3300 3135 version = "2.2.0" 3301 3136 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3402 3237 "memchr", 3403 3238 "once_cell", 3404 3239 "percent-encoding", 3240 + "rustls", 3405 3241 "serde", 3406 3242 "serde_json", 3407 3243 "sha2", ··· 3413 3249 "tracing", 3414 3250 "url", 3415 3251 "uuid", 3252 + "webpki-roots 0.26.11", 3416 3253 ] 3417 3254 3418 3255 [[package]] ··· 3662 3499 ] 3663 3500 3664 3501 [[package]] 3665 - name = "sys-info" 3666 - version = "0.9.1" 3667 - source = "registry+https://github.com/rust-lang/crates.io-index" 3668 - checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c" 3669 - dependencies = [ 3670 - "cc", 3671 - "libc", 3672 - ] 3673 - 3674 - [[package]] 3675 - name = "sysinfo" 3676 - version = "0.34.2" 3677 - source = "registry+https://github.com/rust-lang/crates.io-index" 3678 - checksum = "a4b93974b3d3aeaa036504b8eefd4c039dced109171c1ae973f1dc63b2c7e4b2" 3679 - dependencies = [ 3680 - "libc", 3681 - "memchr", 3682 - "ntapi", 3683 - "objc2-core-foundation", 3684 - "windows 0.57.0", 3685 - ] 3686 - 3687 - [[package]] 3688 3502 name = "system-configuration" 3689 3503 version = "0.6.1" 3690 3504 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3781 3595 dependencies = [ 3782 3596 "deranged", 3783 3597 "itoa", 3784 - "libc", 3785 3598 "num-conv", 3786 - "num_threads", 3787 3599 "powerfmt", 3788 3600 "serde", 3789 3601 "time-core", ··· 3842 3654 "io-uring", 3843 3655 "libc", 3844 3656 "mio", 3657 + "parking_lot", 3845 3658 "pin-project-lite", 3659 + "signal-hook-registry", 3846 3660 "slab", 3847 3661 "socket2 0.5.10", 3848 3662 "tokio-macros", ··· 4133 3947 "serde_ipld_dagcbor", 4134 3948 "serde_json", 4135 3949 "thiserror 2.0.12", 4136 - "uuid", 4137 3950 ] 4138 3951 4139 3952 [[package]] ··· 4211 4024 checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" 4212 4025 4213 4026 [[package]] 4214 - name = "utf8parse" 4215 - version = "0.2.2" 4216 - source = "registry+https://github.com/rust-lang/crates.io-index" 4217 - checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 4218 - 4219 - [[package]] 4220 4027 name = "uuid" 4221 4028 version = "1.17.0" 4222 4029 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4241 4048 checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 4242 4049 4243 4050 [[package]] 4244 - name = "vergen" 4245 - version = "9.0.6" 4246 - source = "registry+https://github.com/rust-lang/crates.io-index" 4247 - checksum = "6b2bf58be11fc9414104c6d3a2e464163db5ef74b12296bda593cac37b6e4777" 4248 - dependencies = [ 4249 - "anyhow", 4250 - "cargo_metadata", 4251 - "derive_builder", 4252 - "regex", 4253 - "rustc_version", 4254 - "rustversion", 4255 - "sysinfo", 4256 - "time", 4257 - "vergen-lib", 4258 - ] 4259 - 4260 - [[package]] 4261 - name = "vergen-gitcl" 4262 - version = "1.0.8" 4263 - source = "registry+https://github.com/rust-lang/crates.io-index" 4264 - checksum = "b9dfc1de6eb2e08a4ddf152f1b179529638bedc0ea95e6d667c014506377aefe" 4265 - dependencies = [ 4266 - "anyhow", 4267 - "derive_builder", 4268 - "rustversion", 4269 - "time", 4270 - "vergen", 4271 - "vergen-lib", 4272 - ] 4273 - 4274 - [[package]] 4275 - name = "vergen-lib" 4276 - version = "0.1.6" 4277 - source = "registry+https://github.com/rust-lang/crates.io-index" 4278 - checksum = "9b07e6010c0f3e59fcb164e0163834597da68d1f864e2b8ca49f74de01e9c166" 4279 - dependencies = [ 4280 - "anyhow", 4281 - "derive_builder", 4282 - "rustversion", 4283 - ] 4284 - 4285 - [[package]] 4286 4051 name = "version_check" 4287 4052 version = "0.9.5" 4288 4053 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4410 4175 ] 4411 4176 4412 4177 [[package]] 4178 + name = "webpki-roots" 4179 + version = "0.26.11" 4180 + source = "registry+https://github.com/rust-lang/crates.io-index" 4181 + checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" 4182 + dependencies = [ 4183 + "webpki-roots 1.0.2", 4184 + ] 4185 + 4186 + [[package]] 4187 + name = "webpki-roots" 4188 + version = "1.0.2" 4189 + source = "registry+https://github.com/rust-lang/crates.io-index" 4190 + checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" 4191 + dependencies = [ 4192 + "rustls-pki-types", 4193 + ] 4194 + 4195 + [[package]] 4413 4196 name = "which" 4414 4197 version = "4.4.2" 4415 4198 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4455 4238 4456 4239 [[package]] 4457 4240 name = "windows" 4458 - version = "0.57.0" 4459 - source = "registry+https://github.com/rust-lang/crates.io-index" 4460 - checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" 4461 - dependencies = [ 4462 - "windows-core 0.57.0", 4463 - "windows-targets 0.52.6", 4464 - ] 4465 - 4466 - [[package]] 4467 - name = "windows" 4468 4241 version = "0.61.3" 4469 4242 source = "registry+https://github.com/rust-lang/crates.io-index" 4470 4243 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" 4471 4244 dependencies = [ 4472 4245 "windows-collections", 4473 - "windows-core 0.61.2", 4246 + "windows-core", 4474 4247 "windows-future", 4475 4248 "windows-link", 4476 4249 "windows-numerics", ··· 4482 4255 source = "registry+https://github.com/rust-lang/crates.io-index" 4483 4256 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" 4484 4257 dependencies = [ 4485 - "windows-core 0.61.2", 4486 - ] 4487 - 4488 - [[package]] 4489 - name = "windows-core" 4490 - version = "0.57.0" 4491 - source = "registry+https://github.com/rust-lang/crates.io-index" 4492 - checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" 4493 - dependencies = [ 4494 - "windows-implement 0.57.0", 4495 - "windows-interface 0.57.0", 4496 - "windows-result 0.1.2", 4497 - "windows-targets 0.52.6", 4258 + "windows-core", 4498 4259 ] 4499 4260 4500 4261 [[package]] ··· 4503 4264 source = "registry+https://github.com/rust-lang/crates.io-index" 4504 4265 checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" 4505 4266 dependencies = [ 4506 - "windows-implement 0.60.0", 4507 - "windows-interface 0.59.1", 4267 + "windows-implement", 4268 + "windows-interface", 4508 4269 "windows-link", 4509 - "windows-result 0.3.4", 4270 + "windows-result", 4510 4271 "windows-strings", 4511 4272 ] 4512 4273 ··· 4516 4277 source = "registry+https://github.com/rust-lang/crates.io-index" 4517 4278 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" 4518 4279 dependencies = [ 4519 - "windows-core 0.61.2", 4280 + "windows-core", 4520 4281 "windows-link", 4521 4282 "windows-threading", 4522 4283 ] 4523 4284 4524 4285 [[package]] 4525 4286 name = "windows-implement" 4526 - version = "0.57.0" 4527 - source = "registry+https://github.com/rust-lang/crates.io-index" 4528 - checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" 4529 - dependencies = [ 4530 - "proc-macro2", 4531 - "quote", 4532 - "syn 2.0.104", 4533 - ] 4534 - 4535 - [[package]] 4536 - name = "windows-implement" 4537 4287 version = "0.60.0" 4538 4288 source = "registry+https://github.com/rust-lang/crates.io-index" 4539 4289 checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" ··· 4545 4295 4546 4296 [[package]] 4547 4297 name = "windows-interface" 4548 - version = "0.57.0" 4549 - source = "registry+https://github.com/rust-lang/crates.io-index" 4550 - checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" 4551 - dependencies = [ 4552 - "proc-macro2", 4553 - "quote", 4554 - "syn 2.0.104", 4555 - ] 4556 - 4557 - [[package]] 4558 - name = "windows-interface" 4559 4298 version = "0.59.1" 4560 4299 source = "registry+https://github.com/rust-lang/crates.io-index" 4561 4300 checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" ··· 4577 4316 source = "registry+https://github.com/rust-lang/crates.io-index" 4578 4317 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" 4579 4318 dependencies = [ 4580 - "windows-core 0.61.2", 4319 + "windows-core", 4581 4320 "windows-link", 4582 4321 ] 4583 4322 ··· 4588 4327 checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" 4589 4328 dependencies = [ 4590 4329 "windows-link", 4591 - "windows-result 0.3.4", 4330 + "windows-result", 4592 4331 "windows-strings", 4593 - ] 4594 - 4595 - [[package]] 4596 - name = "windows-result" 4597 - version = "0.1.2" 4598 - source = "registry+https://github.com/rust-lang/crates.io-index" 4599 - checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" 4600 - dependencies = [ 4601 - "windows-targets 0.52.6", 4602 4332 ] 4603 4333 4604 4334 [[package]]

+5 -4

services/Cargo.toml

··· 1 1 [workspace] 2 - members = ["aqua", "cadet", "rocketman", "satellite", "types"] 2 + members = ["cadet", "satellite", "types"] 3 3 resolver = "2" 4 4 5 5 [workspace.dependencies] ··· 12 12 "postgres", 13 13 "uuid", 14 14 "chrono", 15 + "tls-rustls", 15 16 ] } 16 17 serde = { version = "1.0", features = ["derive"] } 17 18 anyhow = "1.0" ··· 19 20 tracing = "0.1" 20 21 tracing-subscriber = "0.3" 21 22 metrics = "0.23" 22 - reqwest = { version = "0.12", features = ["json"] } 23 + reqwest.workspace = true 23 24 url = "2.5" 24 25 rand = "0.8" 25 26 flume = "0.11" 26 27 async-trait = "0.1" 27 28 time = "0.3" 28 29 dotenvy = "0.15" 29 - tokio-tungstenite = "0.24" 30 + tokio-tungstenite.workspace = true 30 31 atrium-api = "0.25" 31 32 chrono = { version = "0.4", features = ["serde"] } 32 33 uuid = { version = "1.0", features = ["v4", "serde"] } 33 34 types = { path = "types" } 34 - rocketman = { path = "rocketman" } 35 + rocketman = "0.2.5" 35 36 36 37 # CAR and IPLD dependencies 37 38 iroh-car = "0.4"

+20

services/Cross.toml

··· 1 + [build.env] 2 + passthrough = [ 3 + "CARGO_HOME", 4 + "CARGO_TARGET_DIR", 5 + "SQLX_OFFLINE", 6 + "PKG_CONFIG_ALLOW_CROSS", 7 + ] 8 + 9 + [target.aarch64-unknown-linux-gnu] 10 + image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main" 11 + 12 + [target.aarch64-unknown-linux-gnu.env] 13 + passthrough = ["CARGO_HOME", "CARGO_TARGET_DIR", "SQLX_OFFLINE"] 14 + # Allow cross-compilation of native dependencies 15 + PKG_CONFIG_ALLOW_CROSS = "1" 16 + # Use static linking to reduce runtime dependencies 17 + RUSTFLAGS = "-C target-feature=+crt-static -C link-arg=-s" 18 + # Disable problematic features that might require OpenSSL 19 + CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc" 20 + CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"

+4

services/cadet/Cargo.toml

··· 3 3 version = "0.1.0" 4 4 edition = "2021" 5 5 6 + 6 7 [dependencies] 7 8 atrium-api.workspace = true 8 9 tokio.workspace = true ··· 32 33 libipld.workspace = true 33 34 cid.workspace = true 34 35 base64.workspace = true 36 + atmst = "0.0.1" 37 + serde_ipld_dagcbor = "0.6" 38 + futures = "0.3" 35 39 36 40 # Redis for job queues 37 41 redis.workspace = true

+61 -1

services/cadet/Dockerfile

··· 1 + # Docker build args for cross-platform builds (must be at the top) 2 + ARG TARGETPLATFORM 3 + ARG BUILDPLATFORM 4 + ARG TARGETARCH 5 + ARG TARGETOS 6 + 1 7 FROM --platform=${BUILDPLATFORM} rust:latest AS buildah 2 8 3 9 # Create appuser ··· 15 21 16 22 WORKDIR /buildah 17 23 24 + # Re-declare ARGs after FROM (Docker requirement) 25 + ARG TARGETPLATFORM 26 + ARG BUILDPLATFORM 27 + ARG TARGETARCH 28 + ARG TARGETOS 29 + 30 + # Debug platform detection before copying files 31 + RUN echo "DEBUG Before copy: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH BUILDPLATFORM=$BUILDPLATFORM" 32 + 18 33 COPY ./ . 19 34 20 - RUN . ./target.sh && touch src/main.rs && echo "Building for $TARGET_ARCH" && cargo build --release --target $RUST_TARGET && cp target/$RUST_TARGET/release/cadet target/cadet 35 + # Setup lexicons and install dependencies 36 + RUN ./scripts/setup-lexicons.sh 37 + 38 + # Install Node.js and pnpm for lexicon generation 39 + RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/* 40 + RUN npm install -g pnpm 41 + 42 + # Install dependencies and generate lexicons 43 + RUN pnpm install 44 + RUN cd tools/lexicon-cli && pnpm build 45 + RUN pnpm lex:gen 46 + 47 + # Install cross-compilation toolchains 48 + RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu 49 + 50 + # Enable ARM64 architecture and install cross-compilation tools 51 + RUN dpkg --add-architecture arm64 && \ 52 + apt-get update && \ 53 + apt-get install -y \ 54 + gcc-aarch64-linux-gnu \ 55 + libssl-dev:arm64 \ 56 + libssl-dev \ 57 + pkg-config \ 58 + && rm -rf /var/lib/apt/lists/* 59 + 60 + # Set up cross-compilation environment 61 + ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc 62 + ENV PKG_CONFIG_ALLOW_CROSS=1 63 + ENV PKG_CONFIG_PATH_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu/pkgconfig 64 + ENV OPENSSL_DIR_aarch64_unknown_linux_gnu=/usr 65 + ENV OPENSSL_LIB_DIR_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu 66 + ENV OPENSSL_INCLUDE_DIR_aarch64_unknown_linux_gnu=/usr/include/openssl 67 + 68 + # Force SQLx to use offline mode with workspace cache 69 + ENV SQLX_OFFLINE=true 70 + 71 + # copy sqlx in 72 + COPY ./.sqlx ./services/cadet/.sqlx 73 + 74 + # Debug platform detection and run build 75 + RUN echo "DEBUG Before target.sh: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH" && \ 76 + . ./target.sh && \ 77 + touch services/cadet/src/main.rs && \ 78 + echo "Building for $TARGET_ARCH" && \ 79 + cargo build --release --target $RUST_TARGET --package cadet && \ 80 + cp target/$RUST_TARGET/release/cadet target/cadet 21 81 22 82 FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc 23 83

+240

services/cadet/src/ingestors/car/README.md

··· 1 + # CAR Import System with `atmst` 2 + 3 + This directory contains the implementation of Teal's CAR (Content Addressable aRchive) import functionality, now powered by the `atmst` library for proper AT Protocol-style Merkle Search Tree handling. 4 + 5 + ## Overview 6 + 7 + The CAR import system allows Teal to ingest historical music listening data from AT Protocol repositories. Previously, this was done with manual IPLD parsing, but we've now migrated to use the specialized `atmst` library for more accurate and robust CAR file processing. 8 + 9 + ## Key Components 10 + 11 + ### `CarImportIngestor` 12 + 13 + The main entry point for CAR file processing. This ingestor: 14 + 15 + 1. **Accepts CAR data** via the `LexiconIngestor` interface (base64 or URL) 16 + 2. **Uses `atmst::CarImporter`** to parse CAR files with proper MST handling 17 + 3. **Converts to MST structure** for tree traversal and record extraction 18 + 4. **Delegates to existing ingestors** for Teal record types (play, profile, status) 19 + 20 + ### Migration from `iroh-car` to `atmst` 21 + 22 + **Previous Implementation:** 23 + - Used `iroh-car` for basic CAR parsing 24 + - Manual IPLD block decoding with `libipld` 25 + - Complex two-pass processing to extract rkey mappings from commit operations 26 + - Error-prone MST parsing that could miss records 27 + 28 + **New Implementation:** 29 + - Uses `atmst::CarImporter` for specialized AT Protocol CAR handling 30 + - Built-in MST structure understanding 31 + - Proper tree traversal with guaranteed rkey extraction 32 + - More reliable and maintainable code 33 + 34 + ## Usage 35 + 36 + ### As a LexiconIngestor 37 + 38 + The CAR importer integrates seamlessly with Teal's existing ingestion pipeline: 39 + 40 + ```rust 41 + // CAR data in a record 42 + { 43 + "$type": "com.teal.car.import", 44 + "carData": "base64-encoded-car-file-here" 45 + } 46 + 47 + // Or as a URL reference 48 + { 49 + "$type": "com.teal.car.import", 50 + "carData": { 51 + "url": "https://example.com/repo.car" 52 + } 53 + } 54 + ``` 55 + 56 + ### Direct Import 57 + 58 + ```rust 59 + let ingestor = CarImportIngestor::new(db_pool); 60 + 61 + // Import from bytes 62 + let import_id = ingestor.import_car_bytes(&car_data, "did:plc:example").await?; 63 + 64 + // Import from PDS 65 + let import_id = ingestor.fetch_and_process_identity_car("user.bsky.social").await?; 66 + ``` 67 + 68 + ## Supported Record Types 69 + 70 + The CAR importer automatically detects and processes these Teal record types: 71 + 72 + - **`fm.teal.alpha.feed.play`** - Music play records 73 + - **`fm.teal.alpha.profile`** - User profile data 74 + - **`fm.teal.alpha.status`** - User status updates 75 + 76 + Records are processed using the same logic as real-time Jetstream ingestion, ensuring data consistency. 77 + 78 + ## Architecture 79 + 80 + ### MST Processing Flow 81 + 82 + 1. **CAR Import**: `atmst::CarImporter` loads and validates the CAR file 83 + 2. **MST Conversion**: CAR data is converted to an `atmst::Mst` structure 84 + 3. **Tree Traversal**: MST is traversed depth-first to find all records 85 + 4. **Record Extraction**: Each MST entry is examined for Teal record types 86 + 5. **Delegation**: Valid records are passed to existing Teal ingestors 87 + 88 + ### Key Benefits 89 + 90 + - **Proper rkey handling**: MST structure ensures correct record key extraction 91 + - **AT Protocol compliance**: Uses specialized library designed for AT Protocol 92 + - **Maintainable code**: Eliminates complex manual MST parsing 93 + - **Better error handling**: More robust than previous implementation 94 + 95 + ## Current Status 96 + 97 + ### ✅ Completed 98 + - Basic `atmst` integration 99 + - MST structure setup and conversion 100 + - Record type detection and routing 101 + - Integration with existing Teal ingestors 102 + - Error handling and logging 103 + 104 + ### 🚧 In Progress 105 + - **Block data access**: Full implementation of record data extraction from MST 106 + - **MST traversal**: Complete iteration through MST entries 107 + - **Testing**: Comprehensive test suite with real CAR files 108 + 109 + ### 📋 TODO 110 + - Complete `get_record_from_mst()` implementation 111 + - Add MST entry iteration logic 112 + - Performance optimization for large CAR files 113 + - Comprehensive integration tests 114 + 115 + ## Implementation Notes 116 + 117 + ### Block Data Access 118 + 119 + The current implementation has a placeholder for accessing actual record data from the MST: 120 + 121 + ```rust 122 + fn get_record_from_mst(&self, cid: &atmst::Cid, mst: &Mst) -> Option<Value> { 123 + // TODO: Implement proper block data access using atmst API 124 + // This requires understanding how to extract IPLD data for a given CID 125 + // from the MST's internal block storage 126 + None 127 + } 128 + ``` 129 + 130 + This is the key missing piece that needs to be completed based on `atmst` library documentation. 131 + 132 + ### MST Traversal 133 + 134 + Similarly, the MST traversal logic needs completion: 135 + 136 + ```rust 137 + // TODO: Implement proper MST iteration 138 + // for (cid, node) in mst.iter() { 139 + // // Process MST entries 140 + // } 141 + ``` 142 + 143 + ### Error Handling 144 + 145 + The system is designed to be resilient: 146 + - Invalid records are logged and skipped 147 + - Network errors during PDS fetching are properly reported 148 + - Database errors are propagated with context 149 + 150 + ## Testing 151 + 152 + ### Test Structure 153 + 154 + ```bash 155 + # Unit tests (no database required) 156 + cargo test test_parse_teal_key 157 + cargo test test_is_teal_record_key 158 + 159 + # Integration tests (requires database) 160 + cargo test test_atmst_car_import --ignored 161 + 162 + # CLI testing 163 + cd tools/teal-cli 164 + cargo run -- car analyze path/to/file.car 165 + ``` 166 + 167 + ### Test Data 168 + 169 + Test CAR files should be placed in `services/cadet/` for integration testing: 170 + - `test.car` - Basic test file with Teal records 171 + - `large.car` - Performance testing file 172 + - `empty.car` - Edge case testing 173 + 174 + ## Dependencies 175 + 176 + ### Key Dependencies 177 + - **`atmst`**: AT Protocol MST library (v0.0.1) 178 + - **`serde_json`**: JSON serialization for record processing 179 + - **`anyhow`**: Error handling 180 + - **`uuid`**: Import ID generation 181 + - **`reqwest`**: HTTP client for PDS fetching 182 + 183 + ### Workspace Dependencies 184 + The implementation uses existing Teal workspace dependencies for database access, logging, and record processing. 185 + 186 + ## Configuration 187 + 188 + No additional configuration is required. The CAR importer uses the same database connection and logging setup as other Teal ingestors. 189 + 190 + ## Monitoring 191 + 192 + The CAR importer provides detailed logging: 193 + 194 + - **Info**: Successful imports, record counts, processing progress 195 + - **Warn**: Skipped records, missing data, network issues 196 + - **Error**: Database failures, invalid CAR files, processing errors 197 + 198 + Metrics are integrated with Teal's existing observability stack. 199 + 200 + ## Performance 201 + 202 + ### Optimization Strategies 203 + 204 + 1. **Streaming processing**: Records are processed as they're discovered 205 + 2. **Batch database operations**: Multiple records can be inserted in batches 206 + 3. **Memory management**: Large CAR files are processed without loading entirely into memory 207 + 4. **Parallel processing**: Future enhancement for concurrent record processing 208 + 209 + ### Benchmarks 210 + 211 + Performance testing should be conducted with: 212 + - Small CAR files (< 1MB, ~100 records) 213 + - Medium CAR files (1-50MB, ~10K records) 214 + - Large CAR files (> 50MB, ~100K+ records) 215 + 216 + ## Future Enhancements 217 + 218 + ### Planned Features 219 + - **Incremental imports**: Support for delta/since-based CAR fetching 220 + - **Batch processing**: Queue-based processing for multiple CAR files 221 + - **Validation**: Pre-import validation of CAR file integrity 222 + - **Metrics**: Detailed import statistics and performance monitoring 223 + 224 + ### Integration Opportunities 225 + - **Admin API**: Trigger imports via HTTP API 226 + - **Scheduled imports**: Cron-based periodic imports from known users 227 + - **Real-time sync**: Hybrid approach combining Jetstream + CAR imports 228 + 229 + --- 230 + 231 + ## Contributing 232 + 233 + When working on the CAR import system: 234 + 235 + 1. **Test thoroughly**: Use both unit and integration tests 236 + 2. **Document changes**: Update this README for significant modifications 237 + 3. **Monitor performance**: Large CAR files can impact system performance 238 + 4. **Handle errors gracefully**: Network and parsing errors are expected 239 + 240 + For questions about `atmst` integration or MST processing, refer to the library documentation or consider reaching out to the `atmst` maintainers.

+677 -421

services/cadet/src/ingestors/car/car_import.rs

··· 1 + //! CAR (Content Addressable aRchive) Import Ingestor using atmst 2 + //! 3 + //! This module handles importing Teal records from CAR files using the atmst library, 4 + //! which provides proper AT Protocol-style Merkle Search Tree handling. The CAR import process: 5 + //! 6 + //! 1. Receives CAR data via the LexiconIngestor interface (base64 encoded or URL) 7 + //! 2. Uses atmst::CarImporter to parse the CAR file and extract MST structure 8 + //! 3. Converts the CarImporter to an MST for proper tree traversal 9 + //! 4. Iterates through MST nodes to find Teal record types (play, profile, status) 10 + //! 5. Delegates to existing Teal ingestors using the actual DID and proper rkey 11 + //! 12 + //! ## Usage Example 13 + //! 14 + //! ```rust,ignore 15 + //! // CAR data can be provided in a record like: 16 + //! { 17 + //! "carData": "base64-encoded-car-file-here" 18 + //! } 19 + //! 20 + //! // Or as a URL reference: 21 + //! { 22 + //! "carData": { 23 + //! "url": "https://example.com/my-archive.car" 24 + //! } 25 + //! } 26 + //! ``` 27 + //! 28 + //! The ingestor will automatically detect record types and store them using the 29 + //! same logic as real-time Jetstream ingestion, ensuring data consistency. 30 + //! All imported records will be attributed to the DID that initiated the import 31 + //! and use the original rkey from the AT Protocol MST structure. 32 + 33 + use crate::ingestors::car::jobs::{queue_keys, CarImportJob}; 34 + use crate::redis_client::RedisClient; 1 35 use anyhow::{anyhow, Result}; 2 36 use async_trait::async_trait; 3 - use base64::{engine::general_purpose, Engine as _}; 4 - use chrono; 5 - use cid::Cid; 6 - use iroh_car::{CarHeader, CarReader}; 7 - use libipld::cbor::DagCborCodec; 8 - use libipld::{Block, Cid as LibipldCid, Ipld}; 9 - use reqwest; 37 + use atmst::{mst::Mst, Bytes, CarImporter}; 38 + use base64::Engine; 39 + use futures::StreamExt; 40 + use redis::AsyncCommands; 10 41 use rocketman::{ingestion::LexiconIngestor, types::event::Event}; 11 42 use serde_json::Value; 12 43 use sqlx::PgPool; 13 - use std::io::Cursor; 14 44 use tracing::{info, warn}; 15 - use url; 16 45 46 + /// Helper struct for extracted records 47 + #[derive(Debug)] 48 + pub struct ExtractedRecord { 49 + pub collection: String, 50 + pub rkey: String, 51 + pub data: serde_json::Value, 52 + } 53 + 54 + /// CAR Import Ingestor handles importing Teal records from CAR files using atmst 17 55 pub struct CarImportIngestor { 18 56 sql: PgPool, 19 57 } 20 58 21 59 impl CarImportIngestor { 60 + /// Create a new CAR import ingestor with database connection 22 61 pub fn new(sql: PgPool) -> Self { 23 62 Self { sql } 24 63 } 25 64 26 - /// Process a CAR file from bytes 27 - async fn process_car_data(&self, car_data: &[u8], import_id: &str) -> Result<()> { 28 - info!("Starting CAR file processing for import {}", import_id); 65 + /// Helper to get a Redis connection for job queueing 66 + pub async fn get_redis_connection(&self) -> Result<redis::aio::MultiplexedConnection> { 67 + let redis_url = 68 + std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 69 + let client = RedisClient::new(&redis_url)?; 70 + client 71 + .get_connection() 72 + .await 73 + .map_err(|e| anyhow!("Redis connection error: {}", e)) 74 + } 75 + 76 + /// Process CAR file data using atmst library and extract Teal records 77 + async fn process_car_data(&self, car_data: &[u8], import_id: &str, did: &str) -> Result<()> { 78 + info!( 79 + "Starting CAR file processing with atmst for import {} (DID: {})", 80 + import_id, did 81 + ); 82 + 83 + // Convert to Bytes for atmst 84 + let car_bytes: Bytes = Bytes::from(car_data.to_vec()); 85 + 86 + // Create CarImporter and import the CAR data 87 + let mut car_importer = CarImporter::new(); 88 + car_importer 89 + .import_from_bytes(car_bytes.clone()) 90 + .await 91 + .map_err(|e| anyhow!("Failed to import CAR with atmst: {}", e))?; 29 92 30 - let cursor = Cursor::new(car_data); 31 - let mut reader = CarReader::new(cursor).await?; 93 + info!( 94 + "CAR imported successfully. Root CIDs: {:?}, Total blocks: {}", 95 + car_importer.roots(), 96 + car_importer.len() 97 + ); 98 + 99 + // Convert CarImporter to MST for proper tree traversal 100 + let mst = Mst::from_car_importer(car_importer) 101 + .await 102 + .map_err(|e| anyhow!("Failed to convert CAR to MST: {}", e))?; 32 103 33 - // Read the header 34 - let header = reader.header(); 35 - info!("CAR header: {} root CIDs", header.roots().len()); 104 + info!("MST conversion successful, starting record extraction"); 105 + 106 + // Create a new CarImporter for data access since the previous one was consumed 107 + let mut data_importer = CarImporter::new(); 108 + data_importer 109 + .import_from_bytes(car_bytes) 110 + .await 111 + .map_err(|e| anyhow!("Failed to re-import CAR for data access: {}", e))?; 36 112 37 - // Track import metadata 38 - // self.store_import_metadata(import_id, header).await?; 113 + // Extract all records from the MST 114 + let records = self 115 + .extract_records_from_mst(&mst, &data_importer, did) 116 + .await?; 39 117 40 - // Process blocks 41 - let mut block_count = 0; 42 - while let Some((cid, block_data)) = reader.next_block().await? { 43 - // Convert iroh-car CID to our CID type for processing 44 - let our_cid: Cid = cid.to_string().parse()?; 45 - self.process_car_block(&our_cid, &block_data, import_id) 46 - .await?; 47 - block_count += 1; 118 + info!("Extracted {} records from MST", records.len()); 48 119 49 - if block_count % 100 == 0 { 50 - info!("Processed {} blocks for import {}", block_count, import_id); 120 + // Process each record through the appropriate ingestor 121 + let mut processed_count = 0; 122 + for record in records { 123 + match self.process_extracted_record(&record, import_id, did).await { 124 + Ok(()) => { 125 + processed_count += 1; 126 + if processed_count % 10 == 0 { 127 + info!("Processed {} records so far", processed_count); 128 + } 129 + } 130 + Err(e) => { 131 + warn!("Failed to process record {}: {}", record.rkey, e); 132 + // Continue processing other records 133 + } 51 134 } 52 135 } 53 136 54 137 info!( 55 - "Completed CAR file processing: {} blocks for import {}", 56 - block_count, import_id 138 + "Completed CAR file processing: {} records processed for import {}", 139 + processed_count, import_id 57 140 ); 58 - // self.mark_import_complete(import_id, block_count).await?; 59 141 60 142 Ok(()) 61 143 } 62 144 63 - /// Process an individual IPLD block from the CAR file 64 - async fn process_car_block(&self, cid: &Cid, block_data: &[u8], import_id: &str) -> Result<()> { 65 - // Store the raw block first 66 - // self.store_raw_block(cid, block_data, import_id).await?; 145 + /// Extract all Teal records from the MST 146 + async fn extract_records_from_mst( 147 + &self, 148 + mst: &Mst, 149 + car_importer: &CarImporter, 150 + _did: &str, 151 + ) -> Result<Vec<ExtractedRecord>> { 152 + let mut records = Vec::new(); 153 + 154 + // Use the MST iterator to traverse all entries 155 + let mut stream = mst.iter().into_stream(); 67 156 68 - // Try to decode as IPLD and extract meaningful data 69 - match self.decode_and_extract_data(cid, block_data).await { 70 - Ok(Some(extracted_data)) => { 71 - self.process_extracted_data(&extracted_data, cid, import_id) 72 - .await?; 73 - } 74 - Ok(None) => { 75 - // Block doesn't contain extractable data, just stored raw 76 - } 77 - Err(e) => { 78 - warn!("Failed to decode block {}: {}", cid, e); 79 - // Continue processing other blocks 157 + while let Some(result) = stream.next().await { 158 + match result { 159 + Ok((key, record_cid)) => { 160 + // Check if this is a Teal record based on the key pattern 161 + if self.is_teal_record_key(&key) { 162 + info!("🎵 Found Teal record: {} -> {}", key, record_cid); 163 + if let Some((collection, rkey)) = self.parse_teal_key(&key) { 164 + info!(" Collection: {}, rkey: {}", collection, rkey); 165 + // Get the actual record data using the CID 166 + match self.get_record_data(&record_cid, car_importer).await { 167 + Ok(Some(data)) => { 168 + info!(" ✅ Successfully got record data for {}", record_cid); 169 + records.push(ExtractedRecord { 170 + collection, 171 + rkey, 172 + data, 173 + }); 174 + } 175 + Ok(None) => { 176 + warn!(" ❌ No data found for record CID: {}", record_cid); 177 + } 178 + Err(e) => { 179 + warn!( 180 + " ❌ Failed to get record data for {}: {}", 181 + record_cid, e 182 + ); 183 + } 184 + } 185 + } else { 186 + warn!(" ❌ Failed to parse Teal key: {}", key); 187 + } 188 + } 189 + } 190 + Err(e) => { 191 + warn!("Error iterating MST: {}", e); 192 + // Continue with other entries 193 + } 80 194 } 81 195 } 82 196 83 - Ok(()) 197 + Ok(records) 84 198 } 85 199 86 - /// Decode IPLD block and extract AT Protocol data if present 87 - async fn decode_and_extract_data( 200 + /// Get record data from the CAR importer using a CID 201 + async fn get_record_data( 88 202 &self, 89 - cid: &Cid, 90 - block_data: &[u8], 91 - ) -> Result<Option<ExtractedData>> { 92 - // Create IPLD block (convert CID types) 93 - let libipld_cid: LibipldCid = cid.to_string().parse()?; 94 - let block: Block<libipld::DefaultParams> = Block::new(libipld_cid, block_data.to_vec())?; 95 - 96 - // Decode to IPLD (try to decode as DAG-CBOR, which is common in AT Protocol) 97 - let ipld: Ipld = match block.decode::<DagCborCodec, Ipld>() { 98 - Ok(ipld) => ipld, 99 - Err(_) => { 100 - // If DAG-CBOR fails, try as raw data 101 - return Ok(None); 102 - } 103 - }; 104 - 105 - // Check if this looks like AT Protocol data 106 - if let Ipld::Map(map) = &ipld { 107 - // Look for AT Protocol patterns 108 - if let Some(collection) = map.get("$type").and_then(|v| { 109 - if let Ipld::String(s) = v { 110 - Some(s.as_str()) 111 - } else { 112 - None 203 + cid: &atmst::Cid, 204 + car_importer: &CarImporter, 205 + ) -> Result<Option<Value>> { 206 + // Try to decode the block as CBOR IPLD directly with atmst::Cid 207 + info!("🔍 Attempting to decode CBOR for CID: {}", cid); 208 + match car_importer.decode_cbor(cid) { 209 + Ok(ipld) => { 210 + info!(" ✅ Successfully decoded CBOR for CID: {}", cid); 211 + // Convert IPLD to JSON for processing by existing ingestors 212 + match self.ipld_to_json(&ipld) { 213 + Ok(json) => { 214 + info!(" ✅ Successfully converted IPLD to JSON for CID: {}", cid); 215 + Ok(Some(json)) 216 + } 217 + Err(e) => { 218 + warn!( 219 + " ❌ Failed to convert IPLD to JSON for CID {}: {}", 220 + cid, e 221 + ); 222 + Ok(None) 223 + } 113 224 } 114 - }) { 115 - return Ok(Some(ExtractedData { 116 - collection: collection.to_string(), 117 - data: ipld, 118 - cid: cid.clone(), 119 - })); 120 225 } 121 - 122 - // Check for commit structures 123 - if map.contains_key("ops") && map.contains_key("prev") { 124 - return Ok(Some(ExtractedData { 125 - collection: "commit".to_string(), 126 - data: ipld, 127 - cid: cid.clone(), 128 - })); 226 + Err(e) => { 227 + warn!(" ❌ Failed to decode CBOR for CID {}: {}", cid, e); 228 + Ok(None) 129 229 } 130 230 } 131 - 132 - Ok(None) 133 231 } 134 232 135 - /// Process extracted AT Protocol data 136 - async fn process_extracted_data( 233 + /// Process a single extracted record through the appropriate ingestor 234 + async fn process_extracted_record( 137 235 &self, 138 - data: &ExtractedData, 139 - cid: &Cid, 140 - import_id: &str, 236 + record: &ExtractedRecord, 237 + _import_id: &str, 238 + did: &str, 141 239 ) -> Result<()> { 142 - match data.collection.as_str() { 240 + info!( 241 + "Processing {} record with rkey: {}", 242 + record.collection, record.rkey 243 + ); 244 + 245 + info!( 246 + "🔄 Processing {} record: {}", 247 + record.collection, record.rkey 248 + ); 249 + match record.collection.as_str() { 143 250 "fm.teal.alpha.feed.play" => { 144 - self.process_play_record(&data.data, cid, import_id).await?; 251 + info!(" 📀 Processing play record..."); 252 + let result = self 253 + .process_play_record(&record.data, did, &record.rkey) 254 + .await; 255 + if result.is_ok() { 256 + info!(" ✅ Successfully processed play record"); 257 + } else { 258 + warn!(" ❌ Failed to process play record: {:?}", result); 259 + } 260 + result 145 261 } 146 262 "fm.teal.alpha.actor.profile" => { 147 - self.process_profile_record(&data.data, cid, import_id) 148 - .await?; 263 + info!(" 👤 Processing profile record..."); 264 + let result = self 265 + .process_profile_record(&record.data, did, &record.rkey) 266 + .await; 267 + if result.is_ok() { 268 + info!(" ✅ Successfully processed profile record"); 269 + } else { 270 + warn!(" ❌ Failed to process profile record: {:?}", result); 271 + } 272 + result 149 273 } 150 274 "fm.teal.alpha.actor.status" => { 151 - self.process_status_record(&data.data, cid, import_id) 152 - .await?; 153 - } 154 - "commit" => { 155 - self.process_commit_record(&data.data, cid, import_id) 156 - .await?; 275 + info!(" 📢 Processing status record..."); 276 + let result = self 277 + .process_status_record(&record.data, did, &record.rkey) 278 + .await; 279 + if result.is_ok() { 280 + info!(" ✅ Successfully processed status record"); 281 + } else { 282 + warn!(" ❌ Failed to process status record: {:?}", result); 283 + } 284 + result 157 285 } 158 286 _ => { 159 - info!("Unhandled collection type: {}", data.collection); 287 + warn!("❓ Unknown Teal collection: {}", record.collection); 288 + Ok(()) 160 289 } 161 290 } 291 + } 162 292 163 - Ok(()) 293 + /// Check if a key represents a Teal record 294 + fn is_teal_record_key(&self, key: &str) -> bool { 295 + key.starts_with("fm.teal.alpha.") && key.contains("/") 164 296 } 165 297 166 - /// Process a Teal play record from IPLD data 167 - async fn process_play_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> { 168 - // Convert IPLD to JSON value for processing by existing ingestors 169 - let json_value = ipld_to_json(ipld)?; 298 + /// Parse a Teal MST key to extract collection and rkey 299 + fn parse_teal_key(&self, key: &str) -> Option<(String, String)> { 300 + if let Some(slash_pos) = key.rfind('/') { 301 + let collection = key[..slash_pos].to_string(); 302 + let rkey = key[slash_pos + 1..].to_string(); 303 + Some((collection, rkey)) 304 + } else { 305 + None 306 + } 307 + } 170 308 171 - // Delegate to existing play ingestor logic 172 - if let Ok(play_record) = 173 - serde_json::from_value::<types::fm::teal::alpha::feed::play::RecordData>(json_value) 309 + /// Process a play record using the existing PlayIngestor 310 + async fn process_play_record(&self, data: &Value, did: &str, rkey: &str) -> Result<()> { 311 + match serde_json::from_value::<types::fm::teal::alpha::feed::play::RecordData>(data.clone()) 174 312 { 175 - info!("Importing play record from CAR: {}", play_record.track_name); 313 + Ok(play_record) => { 314 + let play_ingestor = 315 + super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 316 + let uri = super::super::teal::assemble_at_uri(did, "fm.teal.alpha.feed.play", rkey); 176 317 177 - // Use existing play ingestor for consistency 178 - let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 318 + play_ingestor 319 + .insert_play( 320 + &play_record, 321 + &uri, 322 + &format!("car-import-{}", uuid::Uuid::new_v4()), 323 + did, 324 + rkey, 325 + ) 326 + .await?; 179 327 180 - // Create a synthetic AT URI for the imported record 181 - let synthetic_did = format!("car-import:{}", import_id); 182 - let rkey = cid.to_string(); 183 - let uri = super::super::teal::assemble_at_uri( 184 - &synthetic_did, 185 - "fm.teal.alpha.feed.play", 186 - &rkey, 187 - ); 328 + info!( 329 + "Successfully stored play record: {} by {:?}", 330 + play_record.track_name, play_record.artist_names 331 + ); 332 + Ok(()) 333 + } 334 + Err(e) => { 335 + warn!("Failed to deserialize play record data: {}", e); 336 + Err(anyhow!("Invalid play record format: {}", e)) 337 + } 338 + } 339 + } 188 340 189 - // Store using existing logic 190 - play_ingestor 191 - .insert_play(&play_record, &uri, &cid.to_string(), &synthetic_did, &rkey) 192 - .await?; 341 + /// Process a profile record using the existing ActorProfileIngestor 342 + async fn process_profile_record(&self, data: &Value, did: &str, _rkey: &str) -> Result<()> { 343 + match serde_json::from_value::<types::fm::teal::alpha::actor::profile::RecordData>( 344 + data.clone(), 345 + ) { 346 + Ok(profile_record) => { 347 + let profile_ingestor = 348 + super::super::teal::actor_profile::ActorProfileIngestor::new(self.sql.clone()); 349 + let did_typed = atrium_api::types::string::Did::new(did.to_string()) 350 + .map_err(|e| anyhow!("Failed to create Did: {}", e))?; 193 351 194 - // Track the extracted record 195 - // self.store_extracted_record(import_id, cid, "fm.teal.alpha.feed.play", Some(&uri)).await?; 352 + profile_ingestor 353 + .insert_profile(did_typed, &profile_record) 354 + .await?; 355 + 356 + info!( 357 + "Successfully stored profile record: {:?}", 358 + profile_record.display_name 359 + ); 360 + Ok(()) 361 + } 362 + Err(e) => { 363 + warn!("Failed to deserialize profile record data: {}", e); 364 + Err(anyhow!("Invalid profile record format: {}", e)) 365 + } 196 366 } 367 + } 197 368 198 - Ok(()) 369 + /// Process a status record using the existing ActorStatusIngestor 370 + async fn process_status_record(&self, data: &Value, did: &str, rkey: &str) -> Result<()> { 371 + match serde_json::from_value::<types::fm::teal::alpha::actor::status::RecordData>( 372 + data.clone(), 373 + ) { 374 + Ok(status_record) => { 375 + let status_ingestor = 376 + super::super::teal::actor_status::ActorStatusIngestor::new(self.sql.clone()); 377 + let did_typed = atrium_api::types::string::Did::new(did.to_string()) 378 + .map_err(|e| anyhow!("Failed to create Did: {}", e))?; 379 + 380 + status_ingestor 381 + .insert_status( 382 + did_typed, 383 + rkey, 384 + &format!("car-import-{}", uuid::Uuid::new_v4()), 385 + &status_record, 386 + ) 387 + .await?; 388 + 389 + info!("Successfully stored status record from CAR import"); 390 + Ok(()) 391 + } 392 + Err(e) => { 393 + warn!("Failed to deserialize status record data: {}", e); 394 + Err(anyhow!("Invalid status record format: {}", e)) 395 + } 396 + } 199 397 } 200 398 201 - /// Process a Teal profile record from IPLD data 202 - async fn process_profile_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> { 203 - let json_value = ipld_to_json(ipld)?; 399 + /// Fetch and process a CAR file from a PDS for a given identity 400 + pub async fn fetch_and_process_identity_car(&self, handle_or_did: &str) -> Result<String> { 401 + info!("Fetching CAR file for identity: {}", handle_or_did); 402 + 403 + // Resolve to DID if needed 404 + let did = if handle_or_did.starts_with("did:") { 405 + handle_or_did.to_string() 406 + } else { 407 + self.resolve_handle_to_did(handle_or_did).await? 408 + }; 409 + 410 + // Resolve DID to PDS 411 + let pds_url = self.resolve_did_to_pds(&did).await?; 412 + info!("Resolved {} to PDS: {}", did, pds_url); 204 413 205 - if let Ok(profile_record) = 206 - serde_json::from_value::<types::fm::teal::alpha::actor::profile::RecordData>(json_value) 207 - { 208 - info!( 209 - "Importing profile record from CAR: {:?}", 210 - profile_record.display_name 211 - ); 414 + // Fetch CAR file 415 + let car_data = self.fetch_car_from_pds(&pds_url, &did).await?; 212 416 213 - // For now, just log until we have public methods on profile ingestor 214 - info!( 215 - "Would store profile record from CAR import {} with CID {}", 216 - import_id, cid 217 - ); 417 + // Generate import ID 418 + let import_id = uuid::Uuid::new_v4().to_string(); 218 419 219 - // Track the extracted record 220 - // self.store_extracted_record(import_id, cid, "fm.teal.alpha.actor.profile", None).await?; 221 - } 420 + // Process the CAR data 421 + self.process_car_data(&car_data, &import_id, &did).await?; 222 422 223 - Ok(()) 423 + Ok(import_id) 224 424 } 225 425 226 - /// Process a Teal status record from IPLD data 227 - async fn process_status_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> { 228 - let json_value = ipld_to_json(ipld)?; 426 + /// Resolve handle to DID 427 + async fn resolve_handle_to_did(&self, handle: &str) -> Result<String> { 428 + let url = format!( 429 + "https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", 430 + handle 431 + ); 432 + let response: Value = reqwest::get(&url).await?.json().await?; 229 433 230 - if let Ok(_status_record) = 231 - serde_json::from_value::<types::fm::teal::alpha::actor::status::RecordData>(json_value) 232 - { 233 - info!("Importing status record from CAR"); 434 + response["did"] 435 + .as_str() 436 + .map(|s| s.to_string()) 437 + .ok_or_else(|| anyhow!("Failed to resolve handle to DID")) 438 + } 234 439 235 - // For now, just log until we have public methods on status ingestor 236 - info!( 237 - "Would store status record from CAR import {} with CID {}", 238 - import_id, cid 239 - ); 440 + /// Resolve DID to PDS URL 441 + async fn resolve_did_to_pds(&self, did: &str) -> Result<String> { 442 + let url = format!("https://plc.directory/{}", did); 443 + let response: Value = reqwest::get(&url).await?.json().await?; 240 444 241 - // Track the extracted record 242 - // self.store_extracted_record(import_id, cid, "fm.teal.alpha.actor.status", None).await?; 445 + if let Some(services) = response["service"].as_array() { 446 + for service in services { 447 + if service["id"] == "#atproto_pds" { 448 + if let Some(endpoint) = service["serviceEndpoint"].as_str() { 449 + return Ok(endpoint.to_string()); 450 + } 451 + } 452 + } 243 453 } 244 454 245 - Ok(()) 455 + Err(anyhow!("Could not resolve PDS for DID: {}", did)) 246 456 } 247 457 248 - /// Process a commit record from IPLD data 249 - async fn process_commit_record( 250 - &self, 251 - _ipld: &Ipld, 252 - _cid: &Cid, 253 - _import_id: &str, 254 - ) -> Result<()> { 255 - info!("Processing commit record from CAR import"); 458 + /// Fetch CAR file from PDS 459 + async fn fetch_car_from_pds(&self, pds_url: &str, did: &str) -> Result<Vec<u8>> { 460 + let url = format!("{}/xrpc/com.atproto.sync.getRepo?did={}", pds_url, did); 461 + let response = reqwest::get(&url).await?; 256 462 257 - // Store commit metadata for tracking 258 - // self.store_commit_metadata(ipld, cid, import_id).await?; 463 + if !response.status().is_success() { 464 + return Err(anyhow!( 465 + "Failed to fetch CAR file: HTTP {}", 466 + response.status() 467 + )); 468 + } 469 + 470 + let car_data = response.bytes().await?.to_vec(); 471 + info!("Fetched CAR file: {} bytes", car_data.len()); 259 472 260 - Ok(()) 473 + Ok(car_data) 261 474 } 262 475 263 - /// Store CAR import metadata 264 - async fn store_import_metadata(&self, _import_id: &str, _header: &CarHeader) -> Result<()> { 265 - // TODO: Implement when database tables are ready 266 - Ok(()) 476 + /// Helper: Convert IPLD to JSON 477 + #[allow(clippy::only_used_in_recursion)] 478 + fn ipld_to_json(&self, ipld: &atmst::Ipld) -> Result<Value> { 479 + use atmst::Ipld; 480 + 481 + match ipld { 482 + Ipld::Null => Ok(Value::Null), 483 + Ipld::Bool(b) => Ok(Value::Bool(*b)), 484 + Ipld::Integer(i) => { 485 + if let Ok(i64_val) = i64::try_from(*i) { 486 + Ok(Value::Number(i64_val.into())) 487 + } else { 488 + Ok(Value::String(i.to_string())) 489 + } 490 + } 491 + Ipld::Float(f) => { 492 + if let Some(num) = serde_json::Number::from_f64(*f) { 493 + Ok(Value::Number(num)) 494 + } else { 495 + Err(anyhow!("Invalid float value")) 496 + } 497 + } 498 + Ipld::String(s) => Ok(Value::String(s.clone())), 499 + Ipld::Bytes(b) => Ok(Value::String( 500 + base64::engine::general_purpose::STANDARD.encode(b), 501 + )), 502 + Ipld::List(list) => { 503 + let json_array: Result<Vec<Value>> = 504 + list.iter().map(|v| self.ipld_to_json(v)).collect(); 505 + Ok(Value::Array(json_array?)) 506 + } 507 + Ipld::Map(map) => { 508 + let mut json_map = serde_json::Map::new(); 509 + for (key, value) in map { 510 + json_map.insert(key.clone(), self.ipld_to_json(value)?); 511 + } 512 + Ok(Value::Object(json_map)) 513 + } 514 + Ipld::Link(cid) => Ok(Value::String(cid.to_string())), 515 + } 267 516 } 517 + } 268 518 269 - /// Mark import as complete 270 - async fn mark_import_complete(&self, _import_id: &str, _block_count: i32) -> Result<()> { 271 - // TODO: Implement when database tables are ready 519 + #[async_trait] 520 + impl LexiconIngestor for CarImportIngestor { 521 + async fn ingest(&self, message: Event<Value>) -> Result<()> { 522 + let commit = message 523 + .commit 524 + .as_ref() 525 + .ok_or_else(|| anyhow!("CarImportIngestor requires a commit event"))?; 526 + 527 + let record = commit 528 + .record 529 + .as_ref() 530 + .ok_or_else(|| anyhow!("CarImportIngestor requires a record in the commit"))?; 531 + 532 + // Enqueue CAR import job into Redis 533 + let job = CarImportJob { 534 + request_id: uuid::Uuid::new_v4(), 535 + identity: record 536 + .get("identity") 537 + .and_then(|v| v.as_str()) 538 + .ok_or_else(|| anyhow!("Missing identity in record"))? 539 + .to_string(), 540 + since: None, 541 + created_at: chrono::Utc::now(), 542 + description: None, 543 + }; 544 + let job_payload = serde_json::to_string(&job)?; 545 + let mut conn = self.get_redis_connection().await?; 546 + // Specify the expected return type to avoid FromRedisValue fallback issues in edition 2024 547 + let _: () = conn.lpush(queue_keys::CAR_IMPORT_JOBS, job_payload).await?; 548 + tracing::info!("Enqueued CAR import job: {}", job.request_id); 549 + 272 550 Ok(()) 273 551 } 552 + } 274 553 275 - /// Store raw IPLD block 276 - async fn store_raw_block( 277 - &self, 278 - _cid: &Cid, 279 - _block_data: &[u8], 280 - _import_id: &str, 281 - ) -> Result<()> { 282 - // TODO: Implement when database tables are ready 283 - Ok(()) 554 + #[allow(dead_code)] 555 + impl CarImportIngestor { 556 + /// Download CAR file from URL 557 + async fn download_car_file(&self, url: &str) -> Result<Vec<u8>> { 558 + let response = reqwest::get(url).await?; 559 + Ok(response.bytes().await?.to_vec()) 284 560 } 285 561 286 - /// Store commit metadata 287 - async fn store_commit_metadata(&self, _ipld: &Ipld, _cid: &Cid, import_id: &str) -> Result<()> { 288 - info!("Would store commit metadata from CAR import {}", import_id); 289 - Ok(()) 562 + /// Import CAR data from bytes (public interface) 563 + pub async fn import_car_bytes(&self, car_data: &[u8], did: &str) -> Result<String> { 564 + let import_id = uuid::Uuid::new_v4().to_string(); 565 + self.process_car_data(car_data, &import_id, did).await?; 566 + Ok(import_id) 290 567 } 291 568 292 - /// Store extracted record tracking 293 - async fn store_extracted_record( 294 - &self, 295 - _import_id: &str, 296 - _cid: &Cid, 297 - _collection: &str, 298 - _record_uri: Option<&str>, 299 - ) -> Result<()> { 300 - // TODO: Implement when database tables are ready 301 - Ok(()) 569 + /// Consolidate synthetic artists with MusicBrainz artists 570 + pub async fn consolidate_synthetic_artists(&self, min_confidence: f64) -> Result<usize> { 571 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 572 + play_ingestor 573 + .consolidate_synthetic_artists(min_confidence) 574 + .await 302 575 } 303 576 304 - /// Fetch and process CAR file for a given identity (handle or DID) 305 - pub async fn fetch_and_process_identity_car(&self, identity: &str) -> Result<String> { 306 - info!( 307 - "Starting CAR fetch and processing for identity: {}", 308 - identity 309 - ); 577 + /// Consolidate duplicate releases 578 + pub async fn consolidate_duplicate_releases(&self, min_confidence: f64) -> Result<usize> { 579 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 580 + play_ingestor 581 + .consolidate_duplicate_releases(min_confidence) 582 + .await 583 + } 310 584 311 - // Resolve identity to DID and PDS 312 - let (user_did, pds_host) = self.resolve_user_to_pds(identity).await?; 313 - info!( 314 - "Resolved {} to DID {} on PDS {}", 315 - identity, user_did, pds_host 316 - ); 585 + /// Consolidate duplicate recordings 586 + pub async fn consolidate_duplicate_recordings(&self, min_confidence: f64) -> Result<usize> { 587 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 588 + play_ingestor 589 + .consolidate_duplicate_recordings(min_confidence) 590 + .await 591 + } 317 592 318 - // Fetch CAR file from PDS 319 - let car_data = self.fetch_car_from_pds(&pds_host, &user_did, None).await?; 320 - info!( 321 - "Successfully fetched CAR file for {} ({} bytes)", 322 - user_did, 323 - car_data.len() 324 - ); 593 + /// Preview consolidation candidates before running consolidation 594 + pub async fn preview_consolidation_candidates(&self, min_confidence: f64) -> Result<()> { 595 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 596 + play_ingestor 597 + .preview_consolidation_candidates(min_confidence) 598 + .await 599 + } 325 600 326 - // Generate import ID 327 - let import_id = format!( 328 - "pds-{}-{}", 329 - user_did.replace(":", "-"), 330 - chrono::Utc::now().timestamp() 331 - ); 601 + /// Run full batch consolidation for all entity types 602 + pub async fn run_full_consolidation(&self) -> Result<()> { 603 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 604 + play_ingestor.run_full_consolidation().await 605 + } 606 + } 332 607 333 - // Process through existing pipeline 334 - self.process_car_data(&car_data, &import_id).await?; 608 + // Removed unused helper struct for extracted records. 335 609 336 - info!("✅ CAR import completed successfully for {}", identity); 337 - Ok(import_id) 338 - } 610 + #[cfg(test)] 611 + mod tests { 612 + use super::*; 613 + use atmst::{CarBuilder, Ipld}; 614 + use std::collections::BTreeMap; 339 615 340 - /// Resolve a user identifier (DID or handle) to their DID and PDS host 341 - async fn resolve_user_to_pds(&self, user_identifier: &str) -> Result<(String, String)> { 342 - if user_identifier.starts_with("did:") { 343 - // User provided a DID directly, resolve to PDS 344 - let pds_host = self.resolve_did_to_pds(user_identifier).await?; 345 - Ok((user_identifier.to_string(), pds_host)) 346 - } else { 347 - // User provided a handle, resolve to DID then PDS 348 - let user_did = self.resolve_handle_to_did(user_identifier).await?; 349 - let pds_host = self.resolve_did_to_pds(&user_did).await?; 350 - Ok((user_did, pds_host)) 351 - } 616 + fn create_mock_teal_play_record() -> Ipld { 617 + let mut record = BTreeMap::new(); 618 + record.insert( 619 + "$type".to_string(), 620 + Ipld::String("fm.teal.alpha.feed.play".to_string()), 621 + ); 622 + record.insert( 623 + "track_name".to_string(), 624 + Ipld::String("Test Song".to_string()), 625 + ); 626 + record.insert( 627 + "artist_names".to_string(), 628 + Ipld::List(vec![Ipld::String("Test Artist".to_string())]), 629 + ); 630 + record.insert("duration".to_string(), Ipld::Integer(180000)); 631 + record.insert( 632 + "created_at".to_string(), 633 + Ipld::String("2024-01-01T00:00:00Z".to_string()), 634 + ); 635 + Ipld::Map(record) 352 636 } 353 637 354 - /// Resolve a handle to a DID using com.atproto.identity.resolveHandle 355 - async fn resolve_handle_to_did(&self, handle: &str) -> Result<String> { 356 - let url = format!( 357 - "https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", 358 - handle 638 + fn create_mock_teal_profile_record() -> Ipld { 639 + let mut record = BTreeMap::new(); 640 + record.insert( 641 + "$type".to_string(), 642 + Ipld::String("fm.teal.alpha.actor.profile".to_string()), 359 643 ); 644 + record.insert( 645 + "display_name".to_string(), 646 + Ipld::String("Test User".to_string()), 647 + ); 648 + record.insert( 649 + "description".to_string(), 650 + Ipld::String("Music lover".to_string()), 651 + ); 652 + Ipld::Map(record) 653 + } 360 654 361 - let response = reqwest::get(&url).await?; 362 - if !response.status().is_success() { 363 - return Err(anyhow!( 364 - "Failed to resolve handle {}: {}", 365 - handle, 366 - response.status() 367 - )); 368 - } 655 + async fn create_test_car_with_teal_records() -> Result<Bytes> { 656 + let mut builder = CarBuilder::new(); 369 657 370 - let json: serde_json::Value = response.json().await?; 371 - let did = json["did"] 372 - .as_str() 373 - .ok_or_else(|| anyhow!("No DID found in response for handle {}", handle))?; 658 + // Create test Teal records 659 + let play_record = create_mock_teal_play_record(); 660 + let profile_record = create_mock_teal_profile_record(); 374 661 375 - Ok(did.to_string()) 376 - } 662 + // Add records to CAR 663 + let play_cid = builder.add_cbor(&play_record)?; 664 + let profile_cid = builder.add_cbor(&profile_record)?; 377 665 378 - /// Resolve a DID to their PDS host using DID document 379 - async fn resolve_did_to_pds(&self, did: &str) -> Result<String> { 380 - // For DID:plc, use the PLC directory 381 - if did.starts_with("did:plc:") { 382 - let url = format!("https://plc.directory/{}", did); 666 + // Add roots (in a real MST, these would be MST nodes, but for testing this is sufficient) 667 + builder.add_root(play_cid); 668 + builder.add_root(profile_cid); 383 669 384 - let response = reqwest::get(&url).await?; 385 - if !response.status().is_success() { 386 - return Err(anyhow!( 387 - "Failed to resolve DID {}: {}", 388 - did, 389 - response.status() 390 - )); 391 - } 670 + let importer = builder.build(); 671 + importer 672 + .export_to_bytes() 673 + .await 674 + .map_err(|e| anyhow!("Failed to export CAR: {}", e)) 675 + } 392 676 393 - let doc: serde_json::Value = response.json().await?; 677 + #[test] 678 + fn test_parse_teal_key() { 679 + // This test doesn't need a database connection or async 680 + let key = "fm.teal.alpha.feed.play/3k2akjdlkjsf"; 394 681 395 - // Find the PDS service endpoint 396 - if let Some(services) = doc["service"].as_array() { 397 - for service in services { 398 - if service["id"].as_str() == Some("#atproto_pds") { 399 - if let Some(endpoint) = service["serviceEndpoint"].as_str() { 400 - // Extract hostname from URL 401 - let parsed_url = url::Url::parse(endpoint)?; 402 - let host = parsed_url 403 - .host_str() 404 - .ok_or_else(|| anyhow!("Invalid PDS endpoint URL: {}", endpoint))?; 405 - return Ok(host.to_string()); 406 - } 407 - } 408 - } 409 - } 682 + // Test the parsing logic directly 683 + if let Some(slash_pos) = key.rfind('/') { 684 + let collection = key[..slash_pos].to_string(); 685 + let rkey = key[slash_pos + 1..].to_string(); 410 686 411 - Err(anyhow!("No PDS service found in DID document for {}", did)) 687 + assert_eq!(collection, "fm.teal.alpha.feed.play"); 688 + assert_eq!(rkey, "3k2akjdlkjsf"); 412 689 } else { 413 - Err(anyhow!("Unsupported DID method: {}", did)) 690 + panic!("Should have found slash in key"); 414 691 } 415 692 } 416 693 417 - /// Fetch CAR file from PDS using com.atproto.sync.getRepo 418 - async fn fetch_car_from_pds( 419 - &self, 420 - pds_host: &str, 421 - did: &str, 422 - since: Option<&str>, 423 - ) -> Result<Vec<u8>> { 424 - let mut url = format!( 425 - "https://{}/xrpc/com.atproto.sync.getRepo?did={}", 426 - pds_host, did 427 - ); 428 - 429 - if let Some(since_rev) = since { 430 - url.push_str(&format!("&since={}", since_rev)); 694 + #[test] 695 + fn test_is_teal_record_key() { 696 + // Test the logic directly without needing an ingestor instance 697 + fn is_teal_record_key(key: &str) -> bool { 698 + key.starts_with("fm.teal.alpha.") && key.contains("/") 431 699 } 432 700 433 - info!("Fetching CAR file from: {}", url); 701 + assert!(is_teal_record_key("fm.teal.alpha.feed.play/abc123")); 702 + assert!(is_teal_record_key("fm.teal.alpha.profile/def456")); 703 + assert!(!is_teal_record_key("app.bsky.feed.post/xyz789")); 704 + assert!(!is_teal_record_key("fm.teal.alpha.feed.play")); // No rkey 705 + } 434 706 435 - let response = reqwest::get(&url).await?; 436 - if !response.status().is_success() { 437 - return Err(anyhow!( 438 - "Failed to fetch CAR from PDS {}: {}", 439 - pds_host, 440 - response.status() 441 - )); 442 - } 707 + #[test] 708 + fn test_ipld_to_json_conversion() { 709 + // Test IPLD to JSON conversion logic directly 710 + use atmst::Ipld; 711 + use std::collections::BTreeMap; 443 712 444 - // Verify content type 445 - let content_type = response 446 - .headers() 447 - .get("content-type") 448 - .and_then(|h| h.to_str().ok()) 449 - .unwrap_or(""); 713 + let mut record = BTreeMap::new(); 714 + record.insert( 715 + "$type".to_string(), 716 + Ipld::String("fm.teal.alpha.feed.play".to_string()), 717 + ); 718 + record.insert( 719 + "track_name".to_string(), 720 + Ipld::String("Test Song".to_string()), 721 + ); 722 + record.insert("duration".to_string(), Ipld::Integer(180000)); 723 + let play_record = Ipld::Map(record); 450 724 451 - if !content_type.contains("application/vnd.ipld.car") { 452 - return Err(anyhow!("Unexpected content type: {}", content_type)); 725 + // Test the conversion logic inline 726 + fn ipld_to_json(ipld: &Ipld) -> Result<Value> { 727 + match ipld { 728 + Ipld::Null => Ok(Value::Null), 729 + Ipld::Bool(b) => Ok(Value::Bool(*b)), 730 + Ipld::Integer(i) => { 731 + if let Ok(i64_val) = i64::try_from(*i) { 732 + Ok(Value::Number(i64_val.into())) 733 + } else { 734 + Ok(Value::String(i.to_string())) 735 + } 736 + } 737 + Ipld::String(s) => Ok(Value::String(s.clone())), 738 + Ipld::Map(map) => { 739 + let mut json_map = serde_json::Map::new(); 740 + for (key, value) in map { 741 + json_map.insert(key.clone(), ipld_to_json(value)?); 742 + } 743 + Ok(Value::Object(json_map)) 744 + } 745 + _ => Ok(Value::Null), // Simplified for test 746 + } 453 747 } 454 748 455 - let car_data = response.bytes().await?; 456 - Ok(car_data.to_vec()) 749 + let json_result = ipld_to_json(&play_record); 750 + assert!(json_result.is_ok()); 751 + let json = json_result.unwrap(); 752 + assert_eq!(json["$type"], "fm.teal.alpha.feed.play"); 753 + assert_eq!(json["track_name"], "Test Song"); 754 + assert_eq!(json["duration"], 180000); 457 755 } 458 - } 459 756 460 - #[async_trait] 461 - impl LexiconIngestor for CarImportIngestor { 462 - async fn ingest(&self, message: Event<Value>) -> Result<()> { 463 - // For CAR imports, we expect the message to contain CAR file data 464 - // This could be a file path, URL, or base64 encoded data 757 + #[tokio::test] 758 + async fn test_car_creation_and_basic_parsing() -> Result<()> { 759 + // Test that we can create a CAR file with Teal records and parse it 760 + let car_bytes = create_test_car_with_teal_records().await?; 465 761 466 - if let Some(commit) = &message.commit { 467 - if let Some(record) = &commit.record { 468 - // Check if this is a CAR import request 469 - if let Some(car_data_field) = record.get("carData") { 470 - let import_id = format!("{}:{}", message.did, commit.rkey); 762 + // Verify we can import the CAR with atmst 763 + let mut importer = CarImporter::new(); 764 + importer.import_from_bytes(car_bytes).await?; 765 + 766 + assert!(!importer.is_empty()); 767 + assert!(importer.len() >= 2); // Should have at least our 2 test records 471 768 472 - match car_data_field { 473 - Value::String(base64_data) => { 474 - // Decode base64 CAR data 475 - if let Ok(car_bytes) = general_purpose::STANDARD.decode(base64_data) { 476 - self.process_car_data(&car_bytes, &import_id).await?; 477 - } else { 478 - return Err(anyhow!("Invalid base64 CAR data")); 479 - } 480 - } 481 - Value::Object(obj) => { 482 - // Handle different CAR data formats (URL, file path, etc.) 483 - if let Some(Value::String(url)) = obj.get("url") { 484 - // Download and process CAR from URL 485 - let car_bytes = self.download_car_file(url).await?; 486 - self.process_car_data(&car_bytes, &import_id).await?; 487 - } 488 - } 489 - _ => { 490 - return Err(anyhow!("Unsupported CAR data format")); 491 - } 492 - } 493 - } else { 494 - return Err(anyhow!("No CAR data found in record")); 769 + // Test that we can decode the records 770 + for cid in importer.cids() { 771 + if let Ok(Ipld::Map(map)) = importer.decode_cbor(&cid) { 772 + if let Some(Ipld::String(record_type)) = map.get("$type") { 773 + assert!(record_type.starts_with("fm.teal.alpha.")); 774 + println!("Found Teal record: {}", record_type); 495 775 } 496 776 } 497 777 } 498 778 499 779 Ok(()) 500 780 } 501 - } 781 + 782 + #[tokio::test] 783 + #[ignore = "requires database connection"] 784 + async fn test_full_car_import_integration() -> Result<()> { 785 + // This test requires a real database connection 786 + let database_url = std::env::var("DATABASE_URL") 787 + .unwrap_or_else(|_| "postgresql://localhost/teal_test".to_string()); 788 + 789 + let pool = sqlx::PgPool::connect(&database_url).await?; 790 + let ingestor = CarImportIngestor::new(pool); 502 791 503 - impl CarImportIngestor { 504 - /// Download CAR file from URL 505 - async fn download_car_file(&self, url: &str) -> Result<Vec<u8>> { 506 - let response = reqwest::get(url).await?; 507 - let bytes = response.bytes().await?; 508 - Ok(bytes.to_vec()) 509 - } 510 - } 792 + // Create test CAR with Teal records 793 + let car_bytes = create_test_car_with_teal_records().await?; 794 + 795 + // Test the full import process 796 + let import_id = uuid::Uuid::new_v4().to_string(); 797 + let test_did = "did:plc:test123"; 511 798 512 - /// Helper struct for extracted AT Protocol data 513 - #[derive(Debug)] 514 - struct ExtractedData { 515 - collection: String, 516 - data: Ipld, 517 - cid: Cid, 518 - } 799 + // This should work with our new atmst implementation 800 + let result = ingestor 801 + .process_car_data(&car_bytes, &import_id, test_did) 802 + .await; 519 803 520 - /// Convert IPLD to JSON Value for compatibility with existing ingestors 521 - fn ipld_to_json(ipld: &Ipld) -> Result<Value> { 522 - match ipld { 523 - Ipld::Null => Ok(Value::Null), 524 - Ipld::Bool(b) => Ok(Value::Bool(*b)), 525 - Ipld::Integer(i) => { 526 - // Convert i128 to i64 for JSON compatibility 527 - if let Ok(i64_val) = i64::try_from(*i) { 528 - Ok(Value::Number(i64_val.into())) 529 - } else { 530 - // Fall back to string representation for very large integers 531 - Ok(Value::String(i.to_string())) 532 - } 533 - } 534 - Ipld::Float(f) => { 535 - if let Some(num) = serde_json::Number::from_f64(*f) { 536 - Ok(Value::Number(num)) 537 - } else { 538 - Err(anyhow!("Invalid float value")) 804 + // For now, we expect this to work but records might not actually get stored 805 + // because the test CAR doesn't have proper MST structure 806 + match result { 807 + Ok(()) => { 808 + println!("✅ CAR import completed successfully"); 539 809 } 540 - } 541 - Ipld::String(s) => Ok(Value::String(s.clone())), 542 - Ipld::Bytes(b) => { 543 - // Convert bytes to base64 string 544 - Ok(Value::String(general_purpose::STANDARD.encode(b))) 545 - } 546 - Ipld::List(list) => { 547 - let json_array: Result<Vec<Value>> = list.iter().map(ipld_to_json).collect(); 548 - Ok(Value::Array(json_array?)) 549 - } 550 - Ipld::Map(map) => { 551 - let mut json_map = serde_json::Map::new(); 552 - for (key, value) in map { 553 - json_map.insert(key.clone(), ipld_to_json(value)?); 810 + Err(e) => { 811 + println!("⚠️ CAR import failed (expected for test data): {}", e); 812 + // This is expected since our test CAR doesn't have proper MST structure 554 813 } 555 - Ok(Value::Object(json_map)) 556 814 } 557 - Ipld::Link(cid) => { 558 - // Convert CID to string representation 559 - Ok(Value::String(cid.to_string())) 560 - } 815 + 816 + Ok(()) 561 817 } 562 818 }

+51

services/cadet/src/ingestors/car/jobs.rs

··· 1 + use chrono::{DateTime, Utc}; 2 + use serde::{Deserialize, Serialize}; 3 + use uuid::Uuid; 4 + 5 + #[derive(Debug, Clone, Serialize, Deserialize)] 6 + pub struct CarImportJob { 7 + pub request_id: Uuid, 8 + pub identity: String, 9 + pub since: Option<DateTime<Utc>>, 10 + pub created_at: DateTime<Utc>, 11 + pub description: Option<String>, 12 + } 13 + 14 + #[derive(Debug, Clone, Serialize, Deserialize)] 15 + pub struct CarImportJobStatus { 16 + pub status: JobStatus, 17 + pub created_at: DateTime<Utc>, 18 + pub started_at: Option<DateTime<Utc>>, 19 + pub completed_at: Option<DateTime<Utc>>, 20 + pub error_message: Option<String>, 21 + pub progress: Option<JobProgress>, 22 + } 23 + 24 + #[derive(Debug, Clone, Serialize, Deserialize)] 25 + pub enum JobStatus { 26 + Pending, 27 + Processing, 28 + Completed, 29 + Failed, 30 + Cancelled, 31 + } 32 + 33 + #[derive(Debug, Clone, Serialize, Deserialize)] 34 + pub struct JobProgress { 35 + pub step: String, 36 + pub user_did: Option<String>, 37 + pub pds_host: Option<String>, 38 + pub car_size_bytes: Option<u64>, 39 + pub blocks_processed: Option<u64>, 40 + } 41 + 42 + pub mod queue_keys { 43 + use uuid::Uuid; 44 + 45 + pub const CAR_IMPORT_JOBS: &str = "car_import_jobs"; 46 + pub const CAR_IMPORT_STATUS_PREFIX: &str = "car_import_status"; 47 + 48 + pub fn job_status_key(job_id: &Uuid) -> String { 49 + format!("{}:{}", CAR_IMPORT_STATUS_PREFIX, job_id) 50 + } 51 + }

+2 -1

services/cadet/src/ingestors/car/mod.rs

··· 1 1 pub mod car_import; 2 + pub mod jobs; 2 3 3 - pub use car_import::CarImportIngestor; 4 + pub use car_import::CarImportIngestor;

+1 -1

services/cadet/src/ingestors/mod.rs

··· 1 - pub mod teal; 2 1 pub mod car; 2 + pub mod teal;

+7 -7

services/cadet/src/ingestors/teal/actor_status.rs

··· 23 23 status: &types::fm::teal::alpha::actor::status::RecordData, 24 24 ) -> anyhow::Result<()> { 25 25 let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey); 26 - 26 + 27 27 let record_json = serde_json::to_value(status)?; 28 - 28 + 29 29 sqlx::query!( 30 30 r#" 31 31 INSERT INTO statii (uri, did, rkey, cid, record) ··· 43 43 ) 44 44 .execute(&self.sql) 45 45 .await?; 46 - 46 + 47 47 Ok(()) 48 48 } 49 49 50 50 pub async fn remove_status(&self, did: Did, rkey: &str) -> anyhow::Result<()> { 51 51 let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey); 52 - 52 + 53 53 sqlx::query!( 54 54 r#" 55 55 DELETE FROM statii WHERE uri = $1 ··· 58 58 ) 59 59 .execute(&self.sql) 60 60 .await?; 61 - 61 + 62 62 Ok(()) 63 63 } 64 64 } ··· 71 71 let record = serde_json::from_value::< 72 72 types::fm::teal::alpha::actor::status::RecordData, 73 73 >(record.clone())?; 74 - 74 + 75 75 if let Some(ref commit) = message.commit { 76 76 if let Some(ref cid) = commit.cid { 77 77 self.insert_status( ··· 98 98 } 99 99 Ok(()) 100 100 } 101 - } 101 + }

+1132 -62

services/cadet/src/ingestors/teal/feed_play.rs

··· 7 7 8 8 use super::assemble_at_uri; 9 9 10 + #[derive(Debug, Clone)] 11 + struct FuzzyMatchCandidate { 12 + artist_id: i32, 13 + name: String, 14 + confidence: f64, 15 + } 16 + 17 + struct MusicBrainzCleaner; 18 + 19 + impl MusicBrainzCleaner { 20 + /// List of common "guff" words found in parentheses that should be removed 21 + const GUFF_WORDS: &'static [&'static str] = &[ 22 + "a cappella", 23 + "acoustic", 24 + "bonus", 25 + "censored", 26 + "clean", 27 + "club", 28 + "clubmix", 29 + "composition", 30 + "cut", 31 + "dance", 32 + "demo", 33 + "dialogue", 34 + "dirty", 35 + "edit", 36 + "excerpt", 37 + "explicit", 38 + "extended", 39 + "feat", 40 + "featuring", 41 + "ft", 42 + "instrumental", 43 + "interlude", 44 + "intro", 45 + "karaoke", 46 + "live", 47 + "long", 48 + "main", 49 + "maxi", 50 + "megamix", 51 + "mix", 52 + "mono", 53 + "official", 54 + "orchestral", 55 + "original", 56 + "outro", 57 + "outtake", 58 + "outtakes", 59 + "piano", 60 + "quadraphonic", 61 + "radio", 62 + "rap", 63 + "re-edit", 64 + "reedit", 65 + "refix", 66 + "rehearsal", 67 + "reinterpreted", 68 + "released", 69 + "release", 70 + "remake", 71 + "remastered", 72 + "remaster", 73 + "master", 74 + "remix", 75 + "remixed", 76 + "remode", 77 + "reprise", 78 + "rework", 79 + "reworked", 80 + "rmx", 81 + "session", 82 + "short", 83 + "single", 84 + "skit", 85 + "stereo", 86 + "studio", 87 + "take", 88 + "takes", 89 + "tape", 90 + "track", 91 + "tryout", 92 + "uncensored", 93 + "unknown", 94 + "unplugged", 95 + "untitled", 96 + "version", 97 + "ver", 98 + "video", 99 + "vocal", 100 + "vs", 101 + "with", 102 + "without", 103 + ]; 104 + 105 + /// Clean artist name by removing common variations and guff 106 + fn clean_artist_name(name: &str) -> String { 107 + let mut cleaned = name.trim().to_string(); 108 + 109 + // Remove common featuring patterns 110 + if let Some(pos) = cleaned.to_lowercase().find(" feat") { 111 + cleaned = cleaned[..pos].trim().to_string(); 112 + } 113 + if let Some(pos) = cleaned.to_lowercase().find(" ft.") { 114 + cleaned = cleaned[..pos].trim().to_string(); 115 + } 116 + if let Some(pos) = cleaned.to_lowercase().find(" featuring") { 117 + cleaned = cleaned[..pos].trim().to_string(); 118 + } 119 + 120 + // Remove parenthetical content if it looks like guff 121 + if let Some(start) = cleaned.find('(') { 122 + if let Some(end) = cleaned.find(')') { 123 + let paren_content = &cleaned[start + 1..end].to_lowercase(); 124 + if Self::is_likely_guff(paren_content) { 125 + cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..]) 126 + .trim() 127 + .to_string(); 128 + } 129 + } 130 + } 131 + 132 + // Remove brackets with guff 133 + if let Some(start) = cleaned.find('[') { 134 + if let Some(end) = cleaned.find(']') { 135 + let bracket_content = &cleaned[start + 1..end].to_lowercase(); 136 + if Self::is_likely_guff(bracket_content) { 137 + cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..]) 138 + .trim() 139 + .to_string(); 140 + } 141 + } 142 + } 143 + 144 + // Remove common prefixes/suffixes 145 + if cleaned.to_lowercase().starts_with("the ") && cleaned.len() > 4 { 146 + let without_the = &cleaned[4..]; 147 + if !without_the.trim().is_empty() { 148 + return without_the.trim().to_string(); 149 + } 150 + } 151 + 152 + cleaned.trim().to_string() 153 + } 154 + 155 + /// Clean track name by removing common variations and guff 156 + fn clean_track_name(name: &str) -> String { 157 + let mut cleaned = name.trim().to_string(); 158 + 159 + // Remove parenthetical content if it looks like guff 160 + if let Some(start) = cleaned.find('(') { 161 + if let Some(end) = cleaned.find(')') { 162 + let paren_content = &cleaned[start + 1..end].to_lowercase(); 163 + if Self::is_likely_guff(paren_content) { 164 + cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..]) 165 + .trim() 166 + .to_string(); 167 + } 168 + } 169 + } 170 + 171 + // Remove featuring artists from track titles 172 + if let Some(pos) = cleaned.to_lowercase().find(" feat") { 173 + cleaned = cleaned[..pos].trim().to_string(); 174 + } 175 + if let Some(pos) = cleaned.to_lowercase().find(" ft.") { 176 + cleaned = cleaned[..pos].trim().to_string(); 177 + } 178 + 179 + cleaned.trim().to_string() 180 + } 181 + 182 + /// Check if parenthetical content is likely "guff" that should be removed 183 + fn is_likely_guff(content: &str) -> bool { 184 + let content_lower = content.to_lowercase(); 185 + let words: Vec<&str> = content_lower.split_whitespace().collect(); 186 + 187 + // If most words are guff words, consider it guff 188 + let guff_word_count = words 189 + .iter() 190 + .filter(|word| Self::GUFF_WORDS.contains(word)) 191 + .count(); 192 + 193 + // Also check for years (19XX or 20XX) 194 + let has_year = content_lower.chars().collect::<String>().contains("19") 195 + || content_lower.contains("20"); 196 + 197 + // Consider it guff if >50% are guff words, or if it contains years, or if it's short and common 198 + guff_word_count > words.len() / 2 199 + || has_year 200 + || (words.len() <= 2 201 + && Self::GUFF_WORDS 202 + .iter() 203 + .any(|&guff| content_lower.contains(guff))) 204 + } 205 + 206 + /// Normalize text for comparison (remove special chars, lowercase, etc.) 207 + fn normalize_for_comparison(text: &str) -> String { 208 + text.chars() 209 + .filter(|c| c.is_alphanumeric() || c.is_whitespace()) 210 + .collect::<String>() 211 + .to_lowercase() 212 + .split_whitespace() 213 + .collect::<Vec<&str>>() 214 + .join(" ") 215 + } 216 + } 217 + 10 218 pub struct PlayIngestor { 11 219 sql: PgPool, 12 220 } ··· 58 266 Self { sql } 59 267 } 60 268 61 - /// Inserts or updates an artist in the database. 62 - /// Returns the Uuid of the artist. 63 - async fn insert_artist(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> { 64 - let artist_uuid = Uuid::parse_str(mbid)?; 65 - let res = sqlx::query!( 269 + /// Batch consolidate synthetic artists that match existing MusicBrainz artists 270 + pub async fn consolidate_synthetic_artists( 271 + &self, 272 + min_confidence: f64, 273 + ) -> anyhow::Result<usize> { 274 + tracing::info!( 275 + "🔄 Starting batch consolidation of synthetic artists with confidence >= {:.2}", 276 + min_confidence 277 + ); 278 + 279 + let consolidation_candidates = sqlx::query!( 280 + r#" 281 + SELECT DISTINCT 282 + ae1.id as synthetic_id, 283 + ae1.name as synthetic_name, 284 + ae2.id as target_id, 285 + ae2.name as target_name, 286 + ae2.mbid as target_mbid, 287 + similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score 288 + FROM artists_extended ae1 289 + CROSS JOIN artists_extended ae2 290 + WHERE ae1.id != ae2.id 291 + AND ae1.mbid_type = 'synthetic' 292 + AND ae2.mbid_type = 'musicbrainz' 293 + AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1 294 + ORDER BY similarity_score DESC 295 + "#, 296 + min_confidence as f32 297 + ) 298 + .fetch_all(&self.sql) 299 + .await?; 300 + 301 + let mut consolidated_count = 0; 302 + 303 + for candidate in consolidation_candidates { 304 + let synthetic_id = candidate.synthetic_id; 305 + let target_id = candidate.target_id; 306 + let similarity = candidate.similarity_score.unwrap_or(0.0) as f64; 307 + 308 + // Double-check with our improved similarity calculation 309 + let calculated_similarity = 310 + Self::calculate_similarity(&candidate.synthetic_name, &candidate.target_name, true); 311 + 312 + let final_confidence = similarity.max(calculated_similarity); 313 + 314 + if final_confidence >= min_confidence { 315 + // Move all play relationships from synthetic artist to MusicBrainz artist 316 + let moved_plays = sqlx::query!( 317 + r#" 318 + UPDATE play_to_artists_extended 319 + SET artist_id = $1, artist_name = $2 320 + WHERE artist_id = $3 321 + AND NOT EXISTS ( 322 + SELECT 1 FROM play_to_artists_extended existing 323 + WHERE existing.play_uri = play_to_artists_extended.play_uri 324 + AND existing.artist_id = $1 325 + ) 326 + "#, 327 + target_id, 328 + candidate.target_name, 329 + synthetic_id 330 + ) 331 + .execute(&self.sql) 332 + .await?; 333 + 334 + // Remove duplicate relationships that couldn't be moved 335 + sqlx::query!( 336 + "DELETE FROM play_to_artists_extended WHERE artist_id = $1", 337 + synthetic_id 338 + ) 339 + .execute(&self.sql) 340 + .await?; 341 + 342 + // Remove the synthetic artist 343 + sqlx::query!("DELETE FROM artists_extended WHERE id = $1", synthetic_id) 344 + .execute(&self.sql) 345 + .await?; 346 + 347 + consolidated_count += 1; 348 + 349 + tracing::info!( 350 + "✅ Consolidated '{}' → '{}' (confidence: {:.2}, moved {} plays)", 351 + candidate.synthetic_name, 352 + candidate.target_name, 353 + final_confidence, 354 + moved_plays.rows_affected() 355 + ); 356 + } 357 + } 358 + 359 + // Refresh materialized views after consolidation 360 + if consolidated_count > 0 { 361 + tracing::info!("🔄 Refreshing materialized views after consolidation"); 362 + sqlx::query!("REFRESH MATERIALIZED VIEW mv_artist_play_counts;") 363 + .execute(&self.sql) 364 + .await?; 365 + } 366 + 367 + tracing::info!( 368 + "🎉 Batch consolidation complete: {} artists consolidated", 369 + consolidated_count 370 + ); 371 + Ok(consolidated_count) 372 + } 373 + 374 + /// Find and consolidate duplicate releases/albums (requires matching artist context) 375 + pub async fn consolidate_duplicate_releases( 376 + &self, 377 + min_confidence: f64, 378 + ) -> anyhow::Result<usize> { 379 + tracing::info!( 380 + "🔄 Starting release consolidation with confidence >= {:.2} (requires artist context)", 381 + min_confidence 382 + ); 383 + 384 + // Find releases that have similar names AND share at least one artist 385 + let release_candidates = sqlx::query!( 66 386 r#" 67 - INSERT INTO artists (mbid, name) VALUES ($1, $2) 68 - ON CONFLICT (mbid) DO NOTHING 69 - RETURNING mbid; 387 + SELECT DISTINCT 388 + r1.mbid as release1_mbid, 389 + r1.name as release1_name, 390 + r2.mbid as release2_mbid, 391 + r2.name as release2_name, 392 + similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score, 393 + COUNT(DISTINCT ptae1.artist_id) as shared_artists 394 + FROM releases r1 395 + CROSS JOIN releases r2 396 + INNER JOIN plays p1 ON p1.release_mbid = r1.mbid 397 + INNER JOIN plays p2 ON p2.release_mbid = r2.mbid 398 + INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri 399 + INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri 400 + WHERE r1.mbid != r2.mbid 401 + AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1 402 + AND ptae1.artist_id = ptae2.artist_id -- Same artist 403 + AND ( 404 + (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR 405 + (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, '')))) 406 + ) -- Same or no discriminants 407 + GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score 408 + HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist 409 + ORDER BY similarity_score DESC, shared_artists DESC 70 410 "#, 71 - artist_uuid, 72 - name 411 + min_confidence as f32 73 412 ) 74 413 .fetch_all(&self.sql) 75 414 .await?; 76 415 77 - if !res.is_empty() { 78 - // TODO: send request to async scrape data from local MB instance 416 + let mut consolidated_count = 0; 417 + 418 + for candidate in release_candidates { 419 + let similarity = candidate.similarity_score.unwrap_or(0.0) as f64; 420 + let shared_artists = candidate.shared_artists.unwrap_or(0); 421 + 422 + // Use MusicBrainz-style cleaning for better matching 423 + let cleaned_similarity = Self::calculate_similarity( 424 + &candidate.release1_name, 425 + &candidate.release2_name, 426 + false, // is_artist = false for releases 427 + ); 428 + 429 + let final_confidence = similarity.max(cleaned_similarity); 430 + 431 + // Require high confidence AND shared artists for album consolidation 432 + if final_confidence >= min_confidence && shared_artists > 0 { 433 + // Choose the release with more plays as the canonical one 434 + let r1_plays: i64 = sqlx::query_scalar!( 435 + "SELECT COUNT(*) FROM plays WHERE release_mbid = $1", 436 + candidate.release1_mbid 437 + ) 438 + .fetch_one(&self.sql) 439 + .await? 440 + .unwrap_or(0); 441 + 442 + let r2_plays: i64 = sqlx::query_scalar!( 443 + "SELECT COUNT(*) FROM plays WHERE release_mbid = $1", 444 + candidate.release2_mbid 445 + ) 446 + .fetch_one(&self.sql) 447 + .await? 448 + .unwrap_or(0); 449 + 450 + let (keep_mbid, remove_mbid, keep_name) = if r1_plays >= r2_plays { 451 + ( 452 + candidate.release1_mbid, 453 + candidate.release2_mbid, 454 + candidate.release1_name.clone(), 455 + ) 456 + } else { 457 + ( 458 + candidate.release2_mbid, 459 + candidate.release1_mbid, 460 + candidate.release2_name.clone(), 461 + ) 462 + }; 463 + 464 + // Update plays to use the canonical release 465 + let updated_plays = sqlx::query!( 466 + "UPDATE plays SET release_mbid = $1, release_name = $2 WHERE release_mbid = $3", 467 + keep_mbid, 468 + keep_name, 469 + remove_mbid 470 + ) 471 + .execute(&self.sql) 472 + .await?; 473 + 474 + // Remove the duplicate release 475 + sqlx::query!("DELETE FROM releases WHERE mbid = $1", remove_mbid) 476 + .execute(&self.sql) 477 + .await?; 478 + 479 + consolidated_count += 1; 480 + 481 + tracing::info!( 482 + "✅ Consolidated releases: '{}' → '{}' (confidence: {:.2}, {} shared artists, updated {} plays)", 483 + if r1_plays >= r2_plays { 484 + &candidate.release2_name 485 + } else { 486 + &candidate.release1_name 487 + }, 488 + keep_name, 489 + final_confidence, 490 + shared_artists, 491 + updated_plays.rows_affected() 492 + ); 493 + } 79 494 } 80 495 81 - Ok(artist_uuid) 496 + tracing::info!( 497 + "🎉 Release consolidation complete: {} releases consolidated", 498 + consolidated_count 499 + ); 500 + Ok(consolidated_count) 501 + } 502 + 503 + /// Find and consolidate duplicate recordings/tracks (requires matching artist context) 504 + pub async fn consolidate_duplicate_recordings( 505 + &self, 506 + min_confidence: f64, 507 + ) -> anyhow::Result<usize> { 508 + tracing::info!( 509 + "🔄 Starting recording consolidation with confidence >= {:.2} (requires artist context)", 510 + min_confidence 511 + ); 512 + 513 + // Find recordings that have similar names AND share at least one artist 514 + let recording_candidates = sqlx::query!( 515 + r#" 516 + SELECT DISTINCT 517 + r1.mbid as recording1_mbid, 518 + r1.name as recording1_name, 519 + r2.mbid as recording2_mbid, 520 + r2.name as recording2_name, 521 + similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score, 522 + COUNT(DISTINCT ptae1.artist_id) as shared_artists 523 + FROM recordings r1 524 + CROSS JOIN recordings r2 525 + INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid 526 + INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid 527 + INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri 528 + INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri 529 + WHERE r1.mbid != r2.mbid 530 + AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1 531 + AND ptae1.artist_id = ptae2.artist_id -- Same artist 532 + AND ( 533 + (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR 534 + (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, '')))) 535 + ) -- Same or no discriminants 536 + GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score 537 + HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist 538 + ORDER BY similarity_score DESC, shared_artists DESC 539 + "#, 540 + min_confidence as f32 541 + ) 542 + .fetch_all(&self.sql) 543 + .await?; 544 + 545 + let mut consolidated_count = 0; 546 + 547 + for candidate in recording_candidates { 548 + let similarity = candidate.similarity_score.unwrap_or(0.0) as f64; 549 + let shared_artists = candidate.shared_artists.unwrap_or(0); 550 + 551 + // Use MusicBrainz-style cleaning for track names 552 + let cleaned_similarity = Self::calculate_similarity( 553 + &candidate.recording1_name, 554 + &candidate.recording2_name, 555 + false, // is_artist = false for recordings 556 + ); 557 + 558 + let final_confidence = similarity.max(cleaned_similarity); 559 + 560 + // Require high confidence AND shared artists for track consolidation 561 + if final_confidence >= min_confidence && shared_artists > 0 { 562 + // Choose the recording with more plays as canonical 563 + let r1_plays: i64 = sqlx::query_scalar!( 564 + "SELECT COUNT(*) FROM plays WHERE recording_mbid = $1", 565 + candidate.recording1_mbid 566 + ) 567 + .fetch_one(&self.sql) 568 + .await? 569 + .unwrap_or(0); 570 + 571 + let r2_plays: i64 = sqlx::query_scalar!( 572 + "SELECT COUNT(*) FROM plays WHERE recording_mbid = $1", 573 + candidate.recording2_mbid 574 + ) 575 + .fetch_one(&self.sql) 576 + .await? 577 + .unwrap_or(0); 578 + 579 + let (keep_mbid, remove_mbid, keep_name) = if r1_plays >= r2_plays { 580 + ( 581 + candidate.recording1_mbid, 582 + candidate.recording2_mbid, 583 + candidate.recording1_name.clone(), 584 + ) 585 + } else { 586 + ( 587 + candidate.recording2_mbid, 588 + candidate.recording1_mbid, 589 + candidate.recording2_name.clone(), 590 + ) 591 + }; 592 + 593 + // Update plays to use the canonical recording 594 + let updated_plays = sqlx::query!( 595 + "UPDATE plays SET recording_mbid = $1 WHERE recording_mbid = $2", 596 + keep_mbid, 597 + remove_mbid 598 + ) 599 + .execute(&self.sql) 600 + .await?; 601 + 602 + // Remove the duplicate recording 603 + sqlx::query!("DELETE FROM recordings WHERE mbid = $1", remove_mbid) 604 + .execute(&self.sql) 605 + .await?; 606 + 607 + consolidated_count += 1; 608 + 609 + tracing::info!( 610 + "✅ Consolidated recordings: '{}' → '{}' (confidence: {:.2}, {} shared artists, updated {} plays)", 611 + if r1_plays >= r2_plays { 612 + &candidate.recording2_name 613 + } else { 614 + &candidate.recording1_name 615 + }, 616 + keep_name, 617 + final_confidence, 618 + shared_artists, 619 + updated_plays.rows_affected() 620 + ); 621 + } 622 + } 623 + 624 + tracing::info!( 625 + "🎉 Recording consolidation complete: {} recordings consolidated", 626 + consolidated_count 627 + ); 628 + Ok(consolidated_count) 629 + } 630 + 631 + /// Preview consolidation candidates to show what would be merged 632 + pub async fn preview_consolidation_candidates( 633 + &self, 634 + min_confidence: f64, 635 + ) -> anyhow::Result<()> { 636 + tracing::info!( 637 + "🔍 Previewing consolidation candidates (confidence >= {:.2})", 638 + min_confidence 639 + ); 640 + 641 + // Preview artist consolidations 642 + let artist_candidates = sqlx::query!( 643 + r#" 644 + SELECT DISTINCT 645 + ae1.name as synthetic_name, 646 + ae2.name as target_name, 647 + similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score, 648 + COUNT(ptae1.play_uri) as synthetic_plays, 649 + COUNT(ptae2.play_uri) as target_plays 650 + FROM artists_extended ae1 651 + CROSS JOIN artists_extended ae2 652 + LEFT JOIN play_to_artists_extended ptae1 ON ae1.id = ptae1.artist_id 653 + LEFT JOIN play_to_artists_extended ptae2 ON ae2.id = ptae2.artist_id 654 + WHERE ae1.id != ae2.id 655 + AND ae1.mbid_type = 'synthetic' 656 + AND ae2.mbid_type = 'musicbrainz' 657 + AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1 658 + GROUP BY ae1.id, ae1.name, ae2.id, ae2.name, similarity_score 659 + ORDER BY similarity_score DESC 660 + LIMIT 10 661 + "#, 662 + min_confidence as f32 663 + ) 664 + .fetch_all(&self.sql) 665 + .await?; 666 + 667 + if !artist_candidates.is_empty() { 668 + tracing::info!("🎯 Artist consolidation candidates:"); 669 + for candidate in artist_candidates { 670 + tracing::info!( 671 + " '{}' → '{}' (confidence: {:.2}, {} + {} plays)", 672 + candidate.synthetic_name, 673 + candidate.target_name, 674 + candidate.similarity_score.unwrap_or(0.0), 675 + candidate.synthetic_plays.unwrap_or(0), 676 + candidate.target_plays.unwrap_or(0) 677 + ); 678 + } 679 + } 680 + 681 + // Preview release consolidations (with artist context) 682 + let release_candidates = sqlx::query!( 683 + r#" 684 + SELECT DISTINCT 685 + r1.name as release1_name, 686 + r2.name as release2_name, 687 + similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score, 688 + COUNT(DISTINCT ptae1.artist_id) as shared_artists, 689 + STRING_AGG(DISTINCT ae.name, ', ') as artist_names 690 + FROM releases r1 691 + CROSS JOIN releases r2 692 + INNER JOIN plays p1 ON p1.release_mbid = r1.mbid 693 + INNER JOIN plays p2 ON p2.release_mbid = r2.mbid 694 + INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri 695 + INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri 696 + INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id 697 + WHERE r1.mbid != r2.mbid 698 + AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1 699 + AND ptae1.artist_id = ptae2.artist_id 700 + GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score 701 + HAVING COUNT(DISTINCT ptae1.artist_id) > 0 702 + ORDER BY similarity_score DESC 703 + LIMIT 5 704 + "#, 705 + min_confidence as f32 706 + ) 707 + .fetch_all(&self.sql) 708 + .await?; 709 + 710 + if !release_candidates.is_empty() { 711 + tracing::info!("💿 Release consolidation candidates (with artist context):"); 712 + for candidate in release_candidates { 713 + tracing::info!( 714 + " '{}' ↔ '{}' (confidence: {:.2}, {} shared artists: {})", 715 + candidate.release1_name, 716 + candidate.release2_name, 717 + candidate.similarity_score.unwrap_or(0.0), 718 + candidate.shared_artists.unwrap_or(0), 719 + candidate.artist_names.unwrap_or_default() 720 + ); 721 + } 722 + } 723 + 724 + // Preview recording consolidations (with artist context) 725 + let recording_candidates = sqlx::query!( 726 + r#" 727 + SELECT DISTINCT 728 + r1.name as recording1_name, 729 + r2.name as recording2_name, 730 + similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score, 731 + COUNT(DISTINCT ptae1.artist_id) as shared_artists, 732 + STRING_AGG(DISTINCT ae.name, ', ') as artist_names 733 + FROM recordings r1 734 + CROSS JOIN recordings r2 735 + INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid 736 + INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid 737 + INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri 738 + INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri 739 + INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id 740 + WHERE r1.mbid != r2.mbid 741 + AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1 742 + AND ptae1.artist_id = ptae2.artist_id 743 + GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score 744 + HAVING COUNT(DISTINCT ptae1.artist_id) > 0 745 + ORDER BY similarity_score DESC 746 + LIMIT 5 747 + "#, 748 + min_confidence as f32 749 + ) 750 + .fetch_all(&self.sql) 751 + .await?; 752 + 753 + if !recording_candidates.is_empty() { 754 + tracing::info!("🎵 Recording consolidation candidates (with artist context):"); 755 + for candidate in recording_candidates { 756 + tracing::info!( 757 + " '{}' ↔ '{}' (confidence: {:.2}, {} shared artists: {})", 758 + candidate.recording1_name, 759 + candidate.recording2_name, 760 + candidate.similarity_score.unwrap_or(0.0), 761 + candidate.shared_artists.unwrap_or(0), 762 + candidate.artist_names.unwrap_or_default() 763 + ); 764 + } 765 + } 766 + 767 + Ok(()) 768 + } 769 + 770 + /// Run full batch consolidation for all entity types 771 + pub async fn run_full_consolidation(&self) -> anyhow::Result<()> { 772 + tracing::info!("🚀 Starting full batch consolidation process"); 773 + 774 + // First, preview what we would consolidate 775 + self.preview_consolidation_candidates(0.92).await?; 776 + 777 + let artist_count = self.consolidate_synthetic_artists(0.92).await?; 778 + let release_count = self.consolidate_duplicate_releases(0.92).await?; 779 + let recording_count = self.consolidate_duplicate_recordings(0.92).await?; 780 + 781 + tracing::info!( 782 + "🎉 Full consolidation complete! Artists: {}, Releases: {}, Recordings: {}", 783 + artist_count, 784 + release_count, 785 + recording_count 786 + ); 787 + 788 + Ok(()) 789 + } 790 + 791 + /// Generate a synthetic MBID for artists without MusicBrainz data using database function 792 + async fn generate_synthetic_mbid(&self, artist_name: &str) -> anyhow::Result<Uuid> { 793 + let result = sqlx::query_scalar!("SELECT generate_synthetic_mbid($1)", artist_name) 794 + .fetch_one(&self.sql) 795 + .await?; 796 + 797 + result.ok_or_else(|| anyhow!("Failed to generate synthetic MBID")) 798 + } 799 + 800 + /// Generate a fallback artist name for tracks without any artist information 801 + fn generate_fallback_artist(track_name: &str) -> String { 802 + format!( 803 + "Unknown Artist ({})", 804 + track_name.chars().take(20).collect::<String>() 805 + ) 806 + } 807 + 808 + /// Normalize text for fuzzy matching with MusicBrainz-style cleaning 809 + fn normalize_text(text: &str, is_artist: bool) -> String { 810 + let cleaned = if is_artist { 811 + MusicBrainzCleaner::clean_artist_name(text) 812 + } else { 813 + MusicBrainzCleaner::clean_track_name(text) 814 + }; 815 + 816 + MusicBrainzCleaner::normalize_for_comparison(&cleaned) 817 + } 818 + 819 + /// Calculate string similarity with MusicBrainz-style cleaning 820 + fn calculate_similarity(s1: &str, s2: &str, is_artist: bool) -> f64 { 821 + let s1_norm = Self::normalize_text(s1, is_artist); 822 + let s2_norm = Self::normalize_text(s2, is_artist); 823 + 824 + if s1_norm == s2_norm { 825 + return 1.0; 826 + } 827 + 828 + if s1_norm.is_empty() || s2_norm.is_empty() { 829 + return 0.0; 830 + } 831 + 832 + // Calculate basic similarity 833 + let max_len = s1_norm.len().max(s2_norm.len()) as f64; 834 + let min_len = s1_norm.len().min(s2_norm.len()) as f64; 835 + 836 + // Character-based similarity 837 + let common_chars = s1_norm 838 + .chars() 839 + .zip(s2_norm.chars()) 840 + .filter(|(a, b)| a == b) 841 + .count() as f64; 842 + 843 + // Word-based similarity boost 844 + let s1_words: std::collections::HashSet<&str> = s1_norm.split_whitespace().collect(); 845 + let s2_words: std::collections::HashSet<&str> = s2_norm.split_whitespace().collect(); 846 + let common_words = s1_words.intersection(&s2_words).count() as f64; 847 + let total_words = s1_words.union(&s2_words).count() as f64; 848 + 849 + let word_similarity = if total_words > 0.0 { 850 + common_words / total_words 851 + } else { 852 + 0.0 853 + }; 854 + let char_similarity = common_chars / max_len; 855 + 856 + // Boost for very similar lengths (helps with minor differences) 857 + let length_factor = if max_len > 0.0 { 858 + min_len / max_len 859 + } else { 860 + 0.0 861 + }; 862 + 863 + // Weighted combination: 50% word similarity, 30% char similarity, 20% length factor 864 + (word_similarity * 0.5) + (char_similarity * 0.3) + (length_factor * 0.2) 865 + } 866 + 867 + /// Find existing artists that fuzzy match the given name 868 + async fn find_fuzzy_artist_matches( 869 + &self, 870 + artist_name: &str, 871 + _track_name: &str, 872 + _album_name: Option<&str>, 873 + ) -> anyhow::Result<Vec<FuzzyMatchCandidate>> { 874 + let normalized_name = Self::normalize_text(artist_name, true); 875 + 876 + // Search for artists with similar names using trigram similarity 877 + let candidates = sqlx::query!( 878 + r#" 879 + SELECT 880 + ae.id, 881 + ae.name 882 + FROM artists_extended ae 883 + WHERE ae.mbid_type = 'musicbrainz' 884 + AND ( 885 + LOWER(TRIM(ae.name)) = $1 886 + OR LOWER(TRIM(ae.name)) LIKE '%' || $1 || '%' 887 + OR $1 LIKE '%' || LOWER(TRIM(ae.name)) || '%' 888 + OR similarity(LOWER(TRIM(ae.name)), $1) > 0.6 889 + ) 890 + ORDER BY similarity(LOWER(TRIM(ae.name)), $1) DESC 891 + LIMIT 10 892 + "#, 893 + normalized_name 894 + ) 895 + .fetch_all(&self.sql) 896 + .await 897 + .unwrap_or_default(); 898 + 899 + let mut matches = Vec::new(); 900 + 901 + for candidate in candidates { 902 + let name_similarity = Self::calculate_similarity(artist_name, &candidate.name, true); 903 + 904 + // Base confidence from name similarity 905 + let mut confidence = name_similarity; 906 + 907 + // Boost confidence for exact matches after normalization 908 + if Self::normalize_text(artist_name, true) 909 + == Self::normalize_text(&candidate.name, true) 910 + { 911 + confidence = confidence.max(0.95); 912 + } 913 + 914 + // Additional boost for cleaned matches 915 + let cleaned_input = MusicBrainzCleaner::clean_artist_name(artist_name); 916 + let cleaned_candidate = MusicBrainzCleaner::clean_artist_name(&candidate.name); 917 + if MusicBrainzCleaner::normalize_for_comparison(&cleaned_input) 918 + == MusicBrainzCleaner::normalize_for_comparison(&cleaned_candidate) 919 + { 920 + confidence = confidence.max(0.9); 921 + } 922 + 923 + // Lower threshold since we have better cleaning now 924 + if confidence >= 0.8 { 925 + matches.push(FuzzyMatchCandidate { 926 + artist_id: candidate.id, 927 + name: candidate.name, 928 + confidence, 929 + }); 930 + } 931 + } 932 + 933 + // Sort by confidence descending 934 + matches.sort_by(|a, b| { 935 + b.confidence 936 + .partial_cmp(&a.confidence) 937 + .unwrap_or(std::cmp::Ordering::Equal) 938 + }); 939 + 940 + Ok(matches) 941 + } 942 + 943 + /// Try to match an artist to existing MusicBrainz data using fuzzy matching 944 + async fn find_or_create_artist_with_fuzzy_matching( 945 + &self, 946 + artist_name: &str, 947 + mbid: Option<&str>, 948 + track_name: &str, 949 + album_name: Option<&str>, 950 + ) -> anyhow::Result<i32> { 951 + // If we already have an MBID, use it directly 952 + if let Some(mbid) = mbid { 953 + return self.insert_artist_extended(Some(mbid), artist_name).await; 954 + } 955 + 956 + // Try fuzzy matching against existing MusicBrainz artists 957 + let matches = self 958 + .find_fuzzy_artist_matches(artist_name, track_name, album_name) 959 + .await?; 960 + 961 + if let Some(best_match) = matches.first() { 962 + // Use high confidence threshold for automatic matching 963 + if best_match.confidence >= 0.92 { 964 + tracing::info!( 965 + "🔗 Fuzzy matched '{}' to existing artist '{}' (confidence: {:.2})", 966 + artist_name, 967 + best_match.name, 968 + best_match.confidence 969 + ); 970 + 971 + // Update the existing artist name if the new one seems more complete 972 + if artist_name.len() > best_match.name.len() && best_match.confidence >= 0.95 { 973 + sqlx::query!( 974 + "UPDATE artists_extended SET name = $1, updated_at = NOW() WHERE id = $2", 975 + artist_name, 976 + best_match.artist_id 977 + ) 978 + .execute(&self.sql) 979 + .await?; 980 + } 981 + 982 + return Ok(best_match.artist_id); 983 + } else if best_match.confidence >= 0.85 { 984 + tracing::debug!( 985 + "🤔 Potential match for '{}' -> '{}' (confidence: {:.2}) but below auto-match threshold", 986 + artist_name, 987 + best_match.name, 988 + best_match.confidence 989 + ); 990 + } 991 + } 992 + 993 + // No good match found, create synthetic artist 994 + self.insert_artist_extended(None, artist_name).await 995 + } 996 + 997 + /// Inserts or updates an artist in the database using the extended table. 998 + /// Returns the internal ID of the artist. 999 + async fn insert_artist_extended(&self, mbid: Option<&str>, name: &str) -> anyhow::Result<i32> { 1000 + if let Some(mbid) = mbid { 1001 + let artist_uuid = Uuid::parse_str(mbid)?; 1002 + let res = sqlx::query!( 1003 + r#" 1004 + INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'musicbrainz') 1005 + ON CONFLICT (mbid) DO UPDATE SET 1006 + name = EXCLUDED.name, 1007 + updated_at = NOW() 1008 + RETURNING id; 1009 + "#, 1010 + artist_uuid, 1011 + name 1012 + ) 1013 + .fetch_one(&self.sql) 1014 + .await?; 1015 + Ok(res.id) 1016 + } else { 1017 + // Artist without MBID - generate synthetic MBID 1018 + let synthetic_uuid = self.generate_synthetic_mbid(name).await?; 1019 + 1020 + let res = sqlx::query!( 1021 + r#" 1022 + INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'synthetic') 1023 + ON CONFLICT (mbid) DO UPDATE SET 1024 + name = EXCLUDED.name, 1025 + updated_at = NOW() 1026 + RETURNING id; 1027 + "#, 1028 + synthetic_uuid, 1029 + name 1030 + ) 1031 + .fetch_one(&self.sql) 1032 + .await?; 1033 + Ok(res.id) 1034 + } 82 1035 } 83 1036 84 1037 /// Inserts or updates a release in the database. 85 1038 /// Returns the Uuid of the release. 86 1039 async fn insert_release(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> { 87 1040 let release_uuid = Uuid::parse_str(mbid)?; 1041 + 1042 + // Extract discriminant from release name for new releases 1043 + // Prioritize edition-specific patterns for better quality 1044 + let discriminant = self 1045 + .extract_edition_discriminant_from_db(name) 1046 + .await 1047 + .or_else(|| { 1048 + futures::executor::block_on(async { self.extract_discriminant_from_db(name).await }) 1049 + }); 1050 + 88 1051 let res = sqlx::query!( 89 1052 r#" 90 - INSERT INTO releases (mbid, name) VALUES ($1, $2) 91 - ON CONFLICT (mbid) DO NOTHING 1053 + INSERT INTO releases (mbid, name, discriminant) VALUES ($1, $2, $3) 1054 + ON CONFLICT (mbid) DO UPDATE SET 1055 + name = EXCLUDED.name, 1056 + discriminant = COALESCE(EXCLUDED.discriminant, releases.discriminant) 92 1057 RETURNING mbid; 93 1058 "#, 94 1059 release_uuid, 95 - name 1060 + name, 1061 + discriminant 96 1062 ) 97 1063 .fetch_all(&self.sql) 98 1064 .await?; ··· 108 1074 /// Returns the Uuid of the recording. 109 1075 async fn insert_recording(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> { 110 1076 let recording_uuid = Uuid::parse_str(mbid)?; 1077 + 1078 + // Extract discriminant from recording name for new recordings 1079 + // Prioritize edition-specific patterns for better quality 1080 + let discriminant = self 1081 + .extract_edition_discriminant_from_db(name) 1082 + .await 1083 + .or_else(|| { 1084 + futures::executor::block_on(async { self.extract_discriminant_from_db(name).await }) 1085 + }); 1086 + 111 1087 let res = sqlx::query!( 112 1088 r#" 113 - INSERT INTO recordings (mbid, name) VALUES ($1, $2) 114 - ON CONFLICT (mbid) DO NOTHING 1089 + INSERT INTO recordings (mbid, name, discriminant) VALUES ($1, $2, $3) 1090 + ON CONFLICT (mbid) DO UPDATE SET 1091 + name = EXCLUDED.name, 1092 + discriminant = COALESCE(EXCLUDED.discriminant, recordings.discriminant) 115 1093 RETURNING mbid; 116 1094 "#, 117 1095 recording_uuid, 118 - name 1096 + name, 1097 + discriminant 119 1098 ) 120 1099 .fetch_all(&self.sql) 121 1100 .await?; ··· 126 1105 127 1106 Ok(recording_uuid) 128 1107 } 1108 + 1109 + /// Extract discriminant from name using database function 1110 + async fn extract_discriminant_from_db(&self, name: &str) -> Option<String> { 1111 + sqlx::query_scalar!("SELECT extract_discriminant($1)", name) 1112 + .fetch_one(&self.sql) 1113 + .await 1114 + .ok() 1115 + .flatten() 1116 + } 1117 + 1118 + /// Extract edition-specific discriminant from name using database function 1119 + async fn extract_edition_discriminant_from_db(&self, name: &str) -> Option<String> { 1120 + sqlx::query_scalar!("SELECT extract_edition_discriminant($1)", name) 1121 + .fetch_one(&self.sql) 1122 + .await 1123 + .ok() 1124 + .flatten() 1125 + } 1126 + 1127 + // /// Get base name without discriminant using database function 1128 + // async fn get_base_name_from_db(&self, name: &str) -> String { 1129 + // sqlx::query_scalar!("SELECT get_base_name($1)", name) 1130 + // .fetch_one(&self.sql) 1131 + // .await 1132 + // .ok() 1133 + // .flatten() 1134 + // .unwrap_or_else(|| name.to_string()) 1135 + // } 129 1136 130 1137 pub async fn insert_play( 131 1138 &self, ··· 137 1144 ) -> anyhow::Result<()> { 138 1145 dbg!("ingesting", play_record); 139 1146 let play_record = clean(play_record); 140 - let mut parsed_artists: Vec<(Uuid, String)> = vec![]; 1147 + let mut parsed_artists: Vec<(i32, String)> = vec![]; 1148 + let mut artist_names_raw: Vec<String> = vec![]; 1149 + 141 1150 if let Some(ref artists) = &play_record.artists { 142 1151 for artist in artists { 143 1152 let artist_name = artist.artist_name.clone(); 144 - let artist_mbid = artist.artist_mb_id.clone(); 145 - if let Some(artist_mbid) = artist_mbid { 146 - let artist_uuid = self.insert_artist(&artist_mbid, &artist_name).await?; 147 - parsed_artists.push((artist_uuid, artist_name.clone())); 1153 + artist_names_raw.push(artist_name.clone()); 1154 + let artist_mbid = artist.artist_mb_id.as_deref(); 1155 + 1156 + let artist_id = self 1157 + .find_or_create_artist_with_fuzzy_matching( 1158 + &artist_name, 1159 + artist_mbid, 1160 + &play_record.track_name, 1161 + play_record.release_name.as_deref(), 1162 + ) 1163 + .await?; 1164 + parsed_artists.push((artist_id, artist_name.clone())); 1165 + } 1166 + } else if let Some(artist_names) = &play_record.artist_names { 1167 + for (index, artist_name) in artist_names.iter().enumerate() { 1168 + artist_names_raw.push(artist_name.clone()); 1169 + 1170 + let artist_mbid_opt = if let Some(ref mbid_list) = play_record.artist_mb_ids { 1171 + mbid_list.get(index) 148 1172 } else { 149 - // Handle case where artist MBID is missing, maybe log a warning 150 - eprintln!("Warning: Artist MBID missing for '{}'", artist_name); 151 - } 1173 + None 1174 + }; 1175 + 1176 + let artist_id = self 1177 + .find_or_create_artist_with_fuzzy_matching( 1178 + artist_name, 1179 + artist_mbid_opt.map(|s| s.as_str()), 1180 + &play_record.track_name, 1181 + play_record.release_name.as_deref(), 1182 + ) 1183 + .await?; 1184 + parsed_artists.push((artist_id, artist_name.clone())); 152 1185 } 153 1186 } else { 154 - if let Some(artist_names) = &play_record.artist_names { 155 - for artist_name in artist_names { 156 - // Assuming artist_mbid is optional, handle missing mbid gracefully 157 - let artist_mbid_opt = if let Some(ref mbid_list) = play_record.artist_mb_ids { 158 - mbid_list.get( 159 - artist_names 160 - .iter() 161 - .position(|name| name == artist_name) 162 - .unwrap_or(0), 163 - ) 164 - } else { 165 - None 166 - }; 1187 + // No artist information provided - create a fallback artist 1188 + let fallback_artist_name = Self::generate_fallback_artist(&play_record.track_name); 1189 + artist_names_raw.push(fallback_artist_name.clone()); 167 1190 168 - if let Some(artist_mbid) = artist_mbid_opt { 169 - let artist_uuid = self.insert_artist(artist_mbid, artist_name).await?; 170 - parsed_artists.push((artist_uuid, artist_name.clone())); 171 - } else { 172 - // Handle case where artist MBID is missing, maybe log a warning 173 - eprintln!("Warning: Artist MBID missing for '{}'", artist_name); 174 - } 175 - } 176 - } 1191 + let artist_id = self 1192 + .find_or_create_artist_with_fuzzy_matching( 1193 + &fallback_artist_name, 1194 + None, 1195 + &play_record.track_name, 1196 + play_record.release_name.as_deref(), 1197 + ) 1198 + .await?; 1199 + parsed_artists.push((artist_id, fallback_artist_name)); 177 1200 } 178 1201 179 1202 // Insert release if missing ··· 203 1226 time::OffsetDateTime::from_unix_timestamp(played_time.as_ref().timestamp()) 204 1227 .unwrap_or_else(|_| time::OffsetDateTime::now_utc()); 205 1228 206 - // Our main insert into plays 1229 + // Extract discriminants from lexicon fields or infer from names 1230 + // First try lexicon fields, then extract from names with preference for edition-specific patterns 1231 + // TODO: Enable when types are updated with discriminant fields 1232 + // let track_discriminant = play_record.track_discriminant.clone().or_else(|| { 1233 + let track_discriminant = { 1234 + // Try edition-specific patterns first, then general patterns 1235 + futures::executor::block_on(async { 1236 + self.extract_edition_discriminant_from_db(&play_record.track_name) 1237 + .await 1238 + .or_else(|| { 1239 + futures::executor::block_on(async { 1240 + self.extract_discriminant_from_db(&play_record.track_name) 1241 + .await 1242 + }) 1243 + }) 1244 + }) 1245 + }; 1246 + 1247 + // let release_discriminant = play_record.release_discriminant.clone().or_else(|| { 1248 + let release_discriminant = { 1249 + if let Some(ref release_name) = play_record.release_name { 1250 + futures::executor::block_on(async { 1251 + // Try edition-specific patterns first, then general patterns 1252 + self.extract_edition_discriminant_from_db(release_name) 1253 + .await 1254 + .or_else(|| { 1255 + futures::executor::block_on(async { 1256 + self.extract_discriminant_from_db(release_name).await 1257 + }) 1258 + }) 1259 + }) 1260 + } else { 1261 + None 1262 + } 1263 + }; 1264 + 1265 + // Our main insert into plays with raw artist names and discriminants 1266 + let artist_names_json = if !artist_names_raw.is_empty() { 1267 + Some(serde_json::to_value(&artist_names_raw)?) 1268 + } else { 1269 + None 1270 + }; 1271 + 207 1272 sqlx::query!( 208 1273 r#" 209 1274 INSERT INTO plays ( 210 1275 uri, cid, did, rkey, isrc, duration, track_name, played_time, 211 1276 processed_time, release_mbid, release_name, recording_mbid, 212 - submission_client_agent, music_service_base_domain 1277 + submission_client_agent, music_service_base_domain, artist_names_raw, 1278 + track_discriminant, release_discriminant 213 1279 ) VALUES ( 214 1280 $1, $2, $3, $4, $5, $6, $7, $8, 215 - NOW(), $9, $10, $11, $12, $13 1281 + NOW(), $9, $10, $11, $12, $13, $14, $15, $16 216 1282 ) ON CONFLICT(uri) DO UPDATE SET 217 1283 isrc = EXCLUDED.isrc, 218 1284 duration = EXCLUDED.duration, ··· 223 1289 release_name = EXCLUDED.release_name, 224 1290 recording_mbid = EXCLUDED.recording_mbid, 225 1291 submission_client_agent = EXCLUDED.submission_client_agent, 226 - music_service_base_domain = EXCLUDED.music_service_base_domain; 1292 + music_service_base_domain = EXCLUDED.music_service_base_domain, 1293 + artist_names_raw = EXCLUDED.artist_names_raw, 1294 + track_discriminant = EXCLUDED.track_discriminant, 1295 + release_discriminant = EXCLUDED.release_discriminant; 227 1296 "#, 228 1297 uri, 229 1298 cid, ··· 238 1307 recording_mbid_opt, 239 1308 play_record.submission_client_agent, 240 1309 play_record.music_service_base_domain, 1310 + artist_names_json, 1311 + track_discriminant, 1312 + release_discriminant 241 1313 ) 242 1314 .execute(&self.sql) 243 1315 .await?; 244 1316 245 - // Insert plays into join table 246 - for (mbid, artist) in &parsed_artists { 247 - let artist_name = artist.clone(); // Clone to move into the query 248 - 1317 + // Insert plays into the extended join table (supports all artists) 1318 + for (artist_id, artist_name) in &parsed_artists { 249 1319 sqlx::query!( 250 1320 r#" 251 - INSERT INTO play_to_artists (play_uri, artist_mbid, artist_name) VALUES 252 - ($1, $2, $3) 253 - ON CONFLICT (play_uri, artist_mbid) DO NOTHING; 254 - "#, 1321 + INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) VALUES 1322 + ($1, $2, $3) 1323 + ON CONFLICT (play_uri, artist_id) DO NOTHING; 1324 + "#, 255 1325 uri, 256 - mbid, 1326 + artist_id, 257 1327 artist_name 258 1328 ) 259 1329 .execute(&self.sql)

+51 -24

services/cadet/src/main.rs

··· 17 17 mod cursor; 18 18 mod db; 19 19 mod ingestors; 20 - mod resolve; 21 20 mod redis_client; 21 + mod resolve; 22 22 23 23 fn setup_tracing() { 24 24 tracing_subscriber::fmt() ··· 96 96 97 97 // CAR import job worker 98 98 let car_ingestor = ingestors::car::CarImportIngestor::new(pool.clone()); 99 - let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 100 - 99 + let redis_url = 100 + std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 101 + 101 102 match redis_client::RedisClient::new(&redis_url) { 102 103 Ok(redis_client) => { 103 104 // Spawn CAR import job processing task 104 105 tokio::spawn(async move { 105 - use types::jobs::{CarImportJob, CarImportJobStatus, JobStatus, JobProgress, queue_keys}; 106 - use tracing::{info, error}; 107 106 use chrono::Utc; 108 - 107 + use ingestors::car::jobs::{ 108 + queue_keys, CarImportJob, CarImportJobStatus, JobProgress, JobStatus, 109 + }; 110 + use tracing::{error, info}; 111 + 109 112 info!("Starting CAR import job worker, polling Redis queue..."); 110 - 113 + 111 114 loop { 112 115 // Block for up to 10 seconds waiting for jobs 113 116 match redis_client.pop_job(queue_keys::CAR_IMPORT_JOBS, 10).await { 114 117 Ok(Some(job_data)) => { 115 118 info!("Received CAR import job: {}", job_data); 116 - 119 + 117 120 // Parse job 118 121 match serde_json::from_str::<CarImportJob>(&job_data) { 119 122 Ok(job) => { ··· 132 135 blocks_processed: None, 133 136 }), 134 137 }; 135 - 138 + 136 139 let status_key = queue_keys::job_status_key(&job.request_id); 137 - if let Ok(status_data) = serde_json::to_string(&processing_status) { 138 - let _ = redis_client.update_job_status(&status_key, &status_data).await; 140 + if let Ok(status_data) = 141 + serde_json::to_string(&processing_status) 142 + { 143 + let _ = redis_client 144 + .update_job_status(&status_key, &status_data) 145 + .await; 139 146 } 140 - 147 + 141 148 // Process the job 142 - match car_ingestor.fetch_and_process_identity_car(&job.identity).await { 149 + match car_ingestor 150 + .fetch_and_process_identity_car(&job.identity) 151 + .await 152 + { 143 153 Ok(import_id) => { 144 - info!("✅ CAR import job completed successfully: {}", job.request_id); 145 - 154 + info!( 155 + "✅ CAR import job completed successfully: {}", 156 + job.request_id 157 + ); 158 + 146 159 let completed_status = CarImportJobStatus { 147 160 status: JobStatus::Completed, 148 161 created_at: job.created_at, ··· 150 163 completed_at: Some(Utc::now()), 151 164 error_message: None, 152 165 progress: Some(JobProgress { 153 - step: format!("CAR import completed: {}", import_id), 166 + step: format!( 167 + "CAR import completed: {}", 168 + import_id 169 + ), 154 170 user_did: None, 155 171 pds_host: None, 156 172 car_size_bytes: None, 157 173 blocks_processed: None, 158 174 }), 159 175 }; 160 - 161 - if let Ok(status_data) = serde_json::to_string(&completed_status) { 162 - let _ = redis_client.update_job_status(&status_key, &status_data).await; 176 + 177 + if let Ok(status_data) = 178 + serde_json::to_string(&completed_status) 179 + { 180 + let _ = redis_client 181 + .update_job_status(&status_key, &status_data) 182 + .await; 163 183 } 164 184 } 165 185 Err(e) => { 166 - error!("❌ CAR import job failed: {}: {}", job.request_id, e); 167 - 186 + error!( 187 + "❌ CAR import job failed: {}: {}", 188 + job.request_id, e 189 + ); 190 + 168 191 let failed_status = CarImportJobStatus { 169 192 status: JobStatus::Failed, 170 193 created_at: job.created_at, ··· 173 196 error_message: Some(e.to_string()), 174 197 progress: None, 175 198 }; 176 - 177 - if let Ok(status_data) = serde_json::to_string(&failed_status) { 178 - let _ = redis_client.update_job_status(&status_key, &status_data).await; 199 + 200 + if let Ok(status_data) = 201 + serde_json::to_string(&failed_status) 202 + { 203 + let _ = redis_client 204 + .update_job_status(&status_key, &status_data) 205 + .await; 179 206 } 180 207 } 181 208 }

+3 -3

services/cadet/src/redis_client.rs

··· 20 20 pub async fn pop_job(&self, queue_key: &str, timeout_seconds: u64) -> Result<Option<String>> { 21 21 let mut conn = self.get_connection().await?; 22 22 let result: Option<Vec<String>> = conn.brpop(queue_key, timeout_seconds as f64).await?; 23 - 23 + 24 24 match result { 25 25 Some(mut items) if items.len() >= 2 => { 26 26 // brpop returns [queue_name, item], we want the item 27 27 Ok(Some(items.remove(1))) 28 28 } 29 - _ => Ok(None) 29 + _ => Ok(None), 30 30 } 31 31 } 32 32 ··· 36 36 let _: () = conn.set(status_key, status_data).await?; 37 37 Ok(()) 38 38 } 39 - } 39 + }

+55

services/cadet/target.sh

··· 1 + #!/bin/bash 2 + set -e 3 + 4 + # Debug: Print all available build variables 5 + echo "DEBUG: TARGETPLATFORM=$TARGETPLATFORM" 6 + echo "DEBUG: BUILDPLATFORM=$BUILDPLATFORM" 7 + echo "DEBUG: TARGETARCH=$TARGETARCH" 8 + echo "DEBUG: TARGETOS=$TARGETOS" 9 + 10 + # Use TARGETARCH directly (more reliable than TARGETPLATFORM) 11 + TARGET_ARCH_VAR="${TARGETARCH:-}" 12 + 13 + # If TARGETARCH is not set, try to extract from TARGETPLATFORM 14 + if [ -z "$TARGET_ARCH_VAR" ] && [ -n "$TARGETPLATFORM" ]; then 15 + TARGET_ARCH_VAR=$(echo "$TARGETPLATFORM" | cut -d'/' -f2) 16 + echo "DEBUG: Extracted TARGET_ARCH_VAR=$TARGET_ARCH_VAR from TARGETPLATFORM" 17 + fi 18 + 19 + # Final fallback: detect from uname 20 + if [ -z "$TARGET_ARCH_VAR" ]; then 21 + ARCH=$(uname -m) 22 + case "$ARCH" in 23 + "x86_64") 24 + TARGET_ARCH_VAR="amd64" 25 + ;; 26 + "aarch64") 27 + TARGET_ARCH_VAR="arm64" 28 + ;; 29 + *) 30 + echo "ERROR: Could not detect target architecture. uname -m returned: $ARCH" 31 + echo "Available variables: TARGETARCH=$TARGETARCH, TARGETPLATFORM=$TARGETPLATFORM" 32 + exit 1 33 + ;; 34 + esac 35 + echo "DEBUG: Detected TARGET_ARCH_VAR=$TARGET_ARCH_VAR from uname" 36 + fi 37 + 38 + # Map architecture to Rust target 39 + case "$TARGET_ARCH_VAR" in 40 + "amd64") 41 + export RUST_TARGET="x86_64-unknown-linux-gnu" 42 + export TARGET_ARCH="amd64" 43 + ;; 44 + "arm64") 45 + export RUST_TARGET="aarch64-unknown-linux-gnu" 46 + export TARGET_ARCH="arm64" 47 + ;; 48 + *) 49 + echo "ERROR: Unsupported target architecture: $TARGET_ARCH_VAR" 50 + echo "Supported architectures: amd64, arm64" 51 + exit 1 52 + ;; 53 + esac 54 + 55 + echo "SUCCESS: Using RUST_TARGET=$RUST_TARGET, TARGET_ARCH=$TARGET_ARCH"

-226

services/migrations/20241220000001_initial_schema.sql

··· 1 - -- Initial comprehensive schema for Teal music platform 2 - -- Based on services/cadet/sql/base.sql 3 - 4 - CREATE TABLE artists ( 5 - mbid UUID PRIMARY KEY, 6 - name TEXT NOT NULL, 7 - play_count INTEGER DEFAULT 0 8 - ); 9 - 10 - -- releases are synologous to 'albums' 11 - CREATE TABLE releases ( 12 - mbid UUID PRIMARY KEY, 13 - name TEXT NOT NULL, 14 - play_count INTEGER DEFAULT 0 15 - ); 16 - 17 - -- recordings are synologous to 'tracks' BUT tracks can be in multiple releases! 18 - CREATE TABLE recordings ( 19 - mbid UUID PRIMARY KEY, 20 - name TEXT NOT NULL, 21 - play_count INTEGER DEFAULT 0 22 - ); 23 - 24 - CREATE TABLE plays ( 25 - uri TEXT PRIMARY KEY, 26 - did TEXT NOT NULL, 27 - rkey TEXT NOT NULL, 28 - cid TEXT NOT NULL, 29 - isrc TEXT, 30 - duration INTEGER, 31 - track_name TEXT NOT NULL, 32 - played_time TIMESTAMP WITH TIME ZONE, 33 - processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 34 - release_mbid UUID, 35 - release_name TEXT, 36 - recording_mbid UUID, 37 - submission_client_agent TEXT, 38 - music_service_base_domain TEXT, 39 - origin_url TEXT, 40 - FOREIGN KEY (release_mbid) REFERENCES releases (mbid), 41 - FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid) 42 - ); 43 - 44 - CREATE INDEX idx_plays_release_mbid ON plays (release_mbid); 45 - CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid); 46 - CREATE INDEX idx_plays_played_time ON plays (played_time); 47 - CREATE INDEX idx_plays_did ON plays (did); 48 - 49 - CREATE TABLE play_to_artists ( 50 - play_uri TEXT, -- references plays(uri) 51 - artist_mbid UUID REFERENCES artists (mbid), 52 - artist_name TEXT, -- storing here for ease of use when joining 53 - PRIMARY KEY (play_uri, artist_mbid), 54 - FOREIGN KEY (play_uri) REFERENCES plays (uri) 55 - ); 56 - 57 - CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid); 58 - 59 - -- Profiles table 60 - CREATE TABLE profiles ( 61 - did TEXT PRIMARY KEY, 62 - handle TEXT, 63 - display_name TEXT, 64 - description TEXT, 65 - description_facets JSONB, 66 - avatar TEXT, -- IPLD of the image, bafy... 67 - banner TEXT, 68 - created_at TIMESTAMP WITH TIME ZONE 69 - ); 70 - 71 - -- User featured items table 72 - CREATE TABLE featured_items ( 73 - did TEXT PRIMARY KEY, 74 - mbid TEXT NOT NULL, 75 - type TEXT NOT NULL 76 - ); 77 - 78 - -- Statii table (status records) 79 - CREATE TABLE statii ( 80 - uri TEXT PRIMARY KEY, 81 - did TEXT NOT NULL, 82 - rkey TEXT NOT NULL, 83 - cid TEXT NOT NULL, 84 - record JSONB NOT NULL, 85 - indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 86 - ); 87 - 88 - CREATE INDEX idx_statii_did_rkey ON statii (did, rkey); 89 - 90 - -- Materialized view for artists' play counts 91 - CREATE MATERIALIZED VIEW mv_artist_play_counts AS 92 - SELECT 93 - a.mbid AS artist_mbid, 94 - a.name AS artist_name, 95 - COUNT(p.uri) AS play_count 96 - FROM 97 - artists a 98 - LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 99 - LEFT JOIN plays p ON p.uri = pta.play_uri 100 - GROUP BY 101 - a.mbid, 102 - a.name; 103 - 104 - CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid); 105 - 106 - -- Materialized view for releases' play counts 107 - CREATE MATERIALIZED VIEW mv_release_play_counts AS 108 - SELECT 109 - r.mbid AS release_mbid, 110 - r.name AS release_name, 111 - COUNT(p.uri) AS play_count 112 - FROM 113 - releases r 114 - LEFT JOIN plays p ON p.release_mbid = r.mbid 115 - GROUP BY 116 - r.mbid, 117 - r.name; 118 - 119 - CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid); 120 - 121 - -- Materialized view for recordings' play counts 122 - CREATE MATERIALIZED VIEW mv_recording_play_counts AS 123 - SELECT 124 - rec.mbid AS recording_mbid, 125 - rec.name AS recording_name, 126 - COUNT(p.uri) AS play_count 127 - FROM 128 - recordings rec 129 - LEFT JOIN plays p ON p.recording_mbid = rec.mbid 130 - GROUP BY 131 - rec.mbid, 132 - rec.name; 133 - 134 - CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid); 135 - 136 - -- Global play count materialized view 137 - CREATE MATERIALIZED VIEW mv_global_play_count AS 138 - SELECT 139 - COUNT(uri) AS total_plays, 140 - COUNT(DISTINCT did) AS unique_listeners 141 - FROM plays; 142 - 143 - CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays); 144 - 145 - -- Top artists in the last 30 days 146 - CREATE MATERIALIZED VIEW mv_top_artists_30days AS 147 - SELECT 148 - a.mbid AS artist_mbid, 149 - a.name AS artist_name, 150 - COUNT(p.uri) AS play_count 151 - FROM artists a 152 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 153 - INNER JOIN plays p ON p.uri = pta.play_uri 154 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 155 - GROUP BY a.mbid, a.name 156 - ORDER BY COUNT(p.uri) DESC; 157 - 158 - -- Top releases in the last 30 days 159 - CREATE MATERIALIZED VIEW mv_top_releases_30days AS 160 - SELECT 161 - r.mbid AS release_mbid, 162 - r.name AS release_name, 163 - COUNT(p.uri) AS play_count 164 - FROM releases r 165 - INNER JOIN plays p ON p.release_mbid = r.mbid 166 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 167 - GROUP BY r.mbid, r.name 168 - ORDER BY COUNT(p.uri) DESC; 169 - 170 - -- Top artists for user in the last 30 days 171 - CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS 172 - SELECT 173 - prof.did, 174 - a.mbid AS artist_mbid, 175 - a.name AS artist_name, 176 - COUNT(p.uri) AS play_count 177 - FROM artists a 178 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 179 - INNER JOIN plays p ON p.uri = pta.play_uri 180 - INNER JOIN profiles prof ON prof.did = p.did 181 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 182 - GROUP BY prof.did, a.mbid, a.name 183 - ORDER BY COUNT(p.uri) DESC; 184 - 185 - -- Top artists for user in the last 7 days 186 - CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS 187 - SELECT 188 - prof.did, 189 - a.mbid AS artist_mbid, 190 - a.name AS artist_name, 191 - COUNT(p.uri) AS play_count 192 - FROM artists a 193 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 194 - INNER JOIN plays p ON p.uri = pta.play_uri 195 - INNER JOIN profiles prof ON prof.did = p.did 196 - WHERE p.played_time >= NOW() - INTERVAL '7 days' 197 - GROUP BY prof.did, a.mbid, a.name 198 - ORDER BY COUNT(p.uri) DESC; 199 - 200 - -- Top releases for user in the last 30 days 201 - CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS 202 - SELECT 203 - prof.did, 204 - r.mbid AS release_mbid, 205 - r.name AS release_name, 206 - COUNT(p.uri) AS play_count 207 - FROM releases r 208 - INNER JOIN plays p ON p.release_mbid = r.mbid 209 - INNER JOIN profiles prof ON prof.did = p.did 210 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 211 - GROUP BY prof.did, r.mbid, r.name 212 - ORDER BY COUNT(p.uri) DESC; 213 - 214 - -- Top releases for user in the last 7 days 215 - CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS 216 - SELECT 217 - prof.did, 218 - r.mbid AS release_mbid, 219 - r.name AS release_name, 220 - COUNT(p.uri) AS play_count 221 - FROM releases r 222 - INNER JOIN plays p ON p.release_mbid = r.mbid 223 - INNER JOIN profiles prof ON prof.did = p.did 224 - WHERE p.played_time >= NOW() - INTERVAL '7 days' 225 - GROUP BY prof.did, r.mbid, r.name 226 - ORDER BY COUNT(p.uri) DESC;

-59

services/migrations/20241220000002_car_import_tables.sql

··· 1 - -- CAR import functionality tables 2 - -- For handling AT Protocol CAR file imports and processing 3 - 4 - -- Tracks uploaded CAR files that are queued for processing 5 - CREATE TABLE IF NOT EXISTS car_import_requests ( 6 - import_id TEXT PRIMARY KEY, 7 - car_data_base64 TEXT NOT NULL, 8 - status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed 9 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 10 - processed_at TIMESTAMP WITH TIME ZONE, 11 - error_message TEXT, 12 - file_size_bytes INTEGER, 13 - block_count INTEGER, 14 - extracted_records_count INTEGER DEFAULT 0 15 - ); 16 - 17 - CREATE INDEX idx_car_import_requests_status ON car_import_requests (status); 18 - CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at); 19 - 20 - -- Tracks raw IPLD blocks extracted from CAR files 21 - CREATE TABLE IF NOT EXISTS car_blocks ( 22 - cid TEXT PRIMARY KEY, 23 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 24 - block_data BYTEA NOT NULL, 25 - decoded_successfully BOOLEAN DEFAULT FALSE, 26 - collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc. 27 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 28 - ); 29 - 30 - CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id); 31 - CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type); 32 - 33 - -- Tracks records extracted from CAR imports that were successfully processed 34 - CREATE TABLE IF NOT EXISTS car_extracted_records ( 35 - id SERIAL PRIMARY KEY, 36 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 37 - cid TEXT NOT NULL REFERENCES car_blocks(cid), 38 - collection_type TEXT NOT NULL, 39 - record_uri TEXT, -- AT URI if applicable (e.g., for play records) 40 - synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123') 41 - rkey TEXT, 42 - extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 43 - processing_notes TEXT 44 - ); 45 - 46 - CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id); 47 - CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type); 48 - CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri); 49 - 50 - -- Tracks import metadata and commit information 51 - CREATE TABLE IF NOT EXISTS car_import_metadata ( 52 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 53 - metadata_key TEXT NOT NULL, 54 - metadata_value JSONB NOT NULL, 55 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 56 - PRIMARY KEY (import_id, metadata_key) 57 - ); 58 - 59 - CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);

-34

services/rocketman/Cargo.toml

··· 1 - [package] 2 - name = "rocketman" 3 - version = "0.2.3" 4 - edition = "2021" 5 - 6 - license = "MIT" 7 - authors = ["Natalie B. <nat@natalie.sh>"] 8 - repository = "https://github.com/espeon/cadet" 9 - 10 - readme = "readme.md" 11 - 12 - description = "A modular(ish) jetstream consumer." 13 - 14 - [dependencies] 15 - tokio.workspace = true 16 - tokio-tungstenite.workspace = true 17 - futures-util = "0.3" 18 - url.workspace = true 19 - rand.workspace = true 20 - tracing.workspace = true 21 - tracing-subscriber.workspace = true 22 - metrics.workspace = true 23 - derive_builder = "0.20.2" 24 - bon = "3.3.2" 25 - serde = { workspace = true, features = ["derive"] } 26 - serde_json.workspace = true 27 - flume.workspace = true 28 - anyhow.workspace = true 29 - async-trait.workspace = true 30 - zstd = { version = "0.13.3", optional = true } 31 - 32 - [features] 33 - default = ["zstd"] 34 - zstd = ["dep:zstd"]

-77

services/rocketman/examples/spew-bsky-posts.rs

··· 1 - use rocketman::{ 2 - connection::JetstreamConnection, 3 - handler, 4 - ingestion::LexiconIngestor, 5 - options::JetstreamOptions, 6 - types::event::{ Event, Commit }, 7 - }; 8 - use serde_json::Value; 9 - use std::{ 10 - collections::HashMap, 11 - sync::Arc, 12 - sync::Mutex, 13 - }; 14 - use async_trait::async_trait; 15 - 16 - #[tokio::main] 17 - async fn main() { 18 - // init the builder 19 - let opts = JetstreamOptions::builder() 20 - // your EXACT nsids 21 - .wanted_collections(vec!["app.bsky.feed.post".to_string()]) 22 - .build(); 23 - // create the jetstream connector 24 - let jetstream = JetstreamConnection::new(opts); 25 - 26 - // create your ingestors 27 - let mut ingestors: HashMap<String, Box<dyn LexiconIngestor + Send + Sync>> = HashMap::new(); 28 - ingestors.insert( 29 - // your EXACT nsid 30 - "app.bsky.feed.post".to_string(), 31 - Box::new(MyCoolIngestor), 32 - ); 33 - 34 - 35 - // tracks the last message we've processed 36 - let cursor: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None)); 37 - 38 - // get channels 39 - let msg_rx = jetstream.get_msg_rx(); 40 - let reconnect_tx = jetstream.get_reconnect_tx(); 41 - 42 - // spawn a task to process messages from the queue. 43 - // this is a simple implementation, you can use a more complex one based on needs. 44 - let c_cursor = cursor.clone(); 45 - tokio::spawn(async move { 46 - while let Ok(message) = msg_rx.recv_async().await { 47 - if let Err(e) = 48 - handler::handle_message(message, &ingestors, reconnect_tx.clone(), c_cursor.clone()) 49 - .await 50 - { 51 - eprintln!("Error processing message: {}", e); 52 - }; 53 - } 54 - }); 55 - 56 - // connect to jetstream 57 - // retries internally, but may fail if there is an extreme error. 58 - if let Err(e) = jetstream.connect(cursor.clone()).await { 59 - eprintln!("Failed to connect to Jetstream: {}", e); 60 - std::process::exit(1); 61 - } 62 - } 63 - 64 - pub struct MyCoolIngestor; 65 - 66 - /// A cool ingestor implementation. Will just print the message. Does not do verification. 67 - #[async_trait] 68 - impl LexiconIngestor for MyCoolIngestor { 69 - async fn ingest(&self, message: Event<Value>) -> anyhow::Result<()> { 70 - if let Some(Commit { record: Some(record), .. }) = message.commit { 71 - if let Some(Value::String(text)) = record.get("text") { 72 - println!("{text:?}"); 73 - } 74 - } 75 - Ok(()) 76 - } 77 - }

-11

services/rocketman/package.json

··· 1 - { 2 - "name": "@repo/rocketman", 3 - "private": true, 4 - "scripts": { 5 - "build": "cargo build --release", 6 - "build:rust": "cargo build --release", 7 - "dev": "cargo watch -x 'run'", 8 - "test": "cargo test", 9 - "test:rust": "cargo test" 10 - } 11 - }

-74

services/rocketman/readme.md

··· 1 - ## Rocketman 2 - 3 - A modular(ish) jetstream consumer. Backed by Tungstenite. 4 - 5 - 6 - ### Installation 7 - ```toml 8 - [dependencies] 9 - rocketman = "latest" # pyt the latest version here 10 - tokio = { version = "1", features = ["macros", "rt-multi-thread"] } 11 - ``` 12 - ### Usage 13 - ```rs 14 - #[tokio::main] 15 - async fn main() { 16 - // init the builder 17 - let opts = JetstreamOptions::builder() 18 - // your EXACT nsids 19 - .wanted_collections(vec!["com.example.cool.nsid".to_string()]) 20 - .build(); 21 - // create the jetstream connector 22 - let jetstream = JetstreamConnection::new(opts); 23 - 24 - // create your ingestors 25 - let mut ingestors: HashMap<String, Box<dyn LexiconIngestor + Send + Sync>> = HashMap::new(); 26 - ingestors.insert( 27 - // your EXACT nsid 28 - "com.example.cool.nsid".to_string(), 29 - Box::new(MyCoolIngestor), 30 - ); 31 - 32 - 33 - // tracks the last message we've processed 34 - let cursor: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None)); 35 - 36 - // get channels 37 - let msg_rx = jetstream.get_msg_rx(); 38 - let reconnect_tx = jetstream.get_reconnect_tx(); 39 - 40 - // spawn a task to process messages from the queue. 41 - // this is a simple implementation, you can use a more complex one based on needs. 42 - let c_cursor = cursor.clone(); 43 - tokio::spawn(async move { 44 - while let Ok(message) = msg_rx.recv_async().await { 45 - if let Err(e) = 46 - handler::handle_message(message, &ingestors, reconnect_tx.clone(), c_cursor.clone()) 47 - .await 48 - { 49 - error!("Error processing message: {}", e); 50 - }; 51 - } 52 - }); 53 - 54 - // connect to jetstream 55 - // retries internally, but may fail if there is an extreme error. 56 - if let Err(e) = jetstream.connect(cursor.clone()).await { 57 - error!("Failed to connect to Jetstream: {}", e); 58 - std::process::exit(1); 59 - } 60 - } 61 - 62 - pub struct MyCoolIngestor; 63 - 64 - /// A cool ingestor implementation. Will just print the message. Does not do verification. 65 - impl LexiconIngestor for MyCoolIngestor { 66 - async fn ingest(&self, message: Event<Value>) -> Result<()> { 67 - info!("{:?}", message); 68 - // Process message for default lexicon. 69 - Ok(()) 70 - } 71 - } 72 - ``` 73 - ### gratz 74 - Based heavily on [phil's jetstream consumer on atcosm constellation.](https://github.com/atcosm/links/blob/main/constellation/src/consumer/jetstream.rs)

-335

services/rocketman/src/connection.rs

··· 1 - use flume::{Receiver, Sender}; 2 - use futures_util::StreamExt; 3 - use metrics::{counter, describe_counter, describe_histogram, histogram, Unit}; 4 - use std::cmp::{max, min}; 5 - use std::sync::{Arc, Mutex}; 6 - use std::time::Instant; 7 - use tokio::time::{sleep, Duration}; 8 - use tokio_tungstenite::{connect_async, tungstenite::Message}; 9 - use tracing::{error, info}; 10 - use url::Url; 11 - 12 - use crate::options::JetstreamOptions; 13 - use crate::time::system_time::SystemTimeProvider; 14 - use crate::time::TimeProvider; 15 - 16 - pub struct JetstreamConnection { 17 - pub opts: JetstreamOptions, 18 - reconnect_tx: flume::Sender<()>, 19 - reconnect_rx: flume::Receiver<()>, 20 - msg_tx: flume::Sender<Message>, 21 - msg_rx: flume::Receiver<Message>, 22 - } 23 - 24 - impl JetstreamConnection { 25 - pub fn new(opts: JetstreamOptions) -> Self { 26 - let (reconnect_tx, reconnect_rx) = flume::bounded(opts.bound); 27 - let (msg_tx, msg_rx) = flume::bounded(opts.bound); 28 - Self { 29 - opts, 30 - reconnect_tx, 31 - reconnect_rx, 32 - msg_tx, 33 - msg_rx, 34 - } 35 - } 36 - 37 - pub fn get_reconnect_tx(&self) -> Sender<()> { 38 - self.reconnect_tx.clone() 39 - } 40 - 41 - pub fn get_msg_rx(&self) -> Receiver<Message> { 42 - self.msg_rx.clone() 43 - } 44 - 45 - fn build_ws_url(&self, cursor: Arc<Mutex<Option<u64>>>) -> String { 46 - let mut url = Url::parse(&self.opts.ws_url.to_string()).unwrap(); 47 - 48 - // Append query params 49 - if let Some(ref cols) = self.opts.wanted_collections { 50 - for col in cols { 51 - url.query_pairs_mut().append_pair("wantedCollections", col); 52 - } 53 - } 54 - if let Some(ref dids) = self.opts.wanted_dids { 55 - for did in dids { 56 - url.query_pairs_mut().append_pair("wantedDids", did); 57 - } 58 - } 59 - if let Some(cursor) = cursor.lock().unwrap().as_ref() { 60 - url.query_pairs_mut() 61 - .append_pair("cursor", &cursor.to_string()); 62 - } 63 - #[cfg(feature = "zstd")] 64 - if self.opts.compress { 65 - url.query_pairs_mut().append_pair("compress", "true"); 66 - } 67 - 68 - url.to_string() 69 - } 70 - 71 - pub async fn connect( 72 - &self, 73 - cursor: Arc<Mutex<Option<u64>>>, 74 - ) -> Result<(), Box<dyn std::error::Error>> { 75 - describe_counter!( 76 - "jetstream.connection.attempt", 77 - Unit::Count, 78 - "attempts to connect to jetstream service" 79 - ); 80 - describe_counter!( 81 - "jetstream.connection.error", 82 - Unit::Count, 83 - "errors connecting to jetstream service" 84 - ); 85 - describe_histogram!( 86 - "jetstream.connection.duration", 87 - Unit::Seconds, 88 - "Time connected to jetstream service" 89 - ); 90 - describe_counter!( 91 - "jetstream.connection.reconnect", 92 - Unit::Count, 93 - "reconnects to jetstream service" 94 - ); 95 - let mut retry_interval = 1; 96 - 97 - let time_provider = SystemTimeProvider::new(); 98 - 99 - let mut start_time = time_provider.now(); 100 - 101 - loop { 102 - counter!("jetstream.connection.attempt").increment(1); 103 - info!("Connecting to {}", self.opts.ws_url); 104 - let start = Instant::now(); 105 - 106 - let ws_url = self.build_ws_url(cursor.clone()); 107 - 108 - match connect_async(ws_url).await { 109 - Ok((ws_stream, response)) => { 110 - let elapsed = start.elapsed(); 111 - info!("Connected. HTTP status: {}", response.status()); 112 - 113 - let (_, mut read) = ws_stream.split(); 114 - 115 - loop { 116 - // Inner loop to handle messages, reconnect signals, and receive timeout 117 - let receive_timeout = 118 - sleep(Duration::from_secs(self.opts.timeout_time_sec as u64)); 119 - tokio::pin!(receive_timeout); 120 - 121 - loop { 122 - tokio::select! { 123 - message_result = read.next() => { 124 - match message_result { 125 - Some(message) => { 126 - // Reset timeout on message received 127 - receive_timeout.as_mut().reset(tokio::time::Instant::now() + Duration::from_secs(self.opts.timeout_time_sec as u64)); 128 - 129 - histogram!("jetstream.connection.duration").record(elapsed.as_secs_f64()); 130 - match message { 131 - Ok(message) => { 132 - if let Err(err) = self.msg_tx.send_async(message).await { 133 - counter!("jetstream.error").increment(1); 134 - error!("Failed to queue message: {}", err); 135 - } 136 - } 137 - Err(e) => { 138 - counter!("jetstream.error").increment(1); 139 - error!("Error: {}", e); 140 - } 141 - } 142 - } 143 - None => { 144 - info!("Stream closed by server."); 145 - counter!("jetstream.connection.reconnect").increment(1); 146 - break; // Stream ended, break inner loop to reconnect 147 - } 148 - } 149 - } 150 - _ = self.reconnect_rx.recv_async() => { 151 - info!("Reconnect signal received."); 152 - counter!("jetstream.connection.reconnect").increment(1); 153 - break; 154 - } 155 - _ = &mut receive_timeout => { 156 - // last final poll, just in case 157 - match read.next().await { 158 - Some(Ok(message)) => { 159 - if let Err(err) = self.msg_tx.send_async(message).await { 160 - counter!("jetstream.error").increment(1); 161 - error!("Failed to queue message: {}", err); 162 - } 163 - // Reset timeout to continue 164 - receive_timeout.as_mut().reset(tokio::time::Instant::now() + Duration::from_secs(self.opts.timeout_time_sec as u64)); 165 - } 166 - Some(Err(e)) => { 167 - counter!("jetstream.error").increment(1); 168 - error!("Error receiving message during final poll: {}", e); 169 - counter!("jetstream.connection.reconnect").increment(1); 170 - break; 171 - } 172 - None => { 173 - info!("No commits received in {} seconds, reconnecting.", self.opts.timeout_time_sec); 174 - counter!("jetstream.connection.reconnect").increment(1); 175 - break; 176 - } 177 - } 178 - } 179 - } 180 - } 181 - } 182 - } 183 - Err(e) => { 184 - let elapsed_time = time_provider.elapsed(start_time); 185 - // reset if time connected > the time we set 186 - if elapsed_time.as_secs() > self.opts.max_retry_interval_seconds { 187 - retry_interval = 0; 188 - start_time = time_provider.now(); 189 - } 190 - counter!("jetstream.connection.error").increment(1); 191 - error!("Connection error: {}", e); 192 - } 193 - } 194 - 195 - let sleep_time = max(1, min(self.opts.max_retry_interval_seconds, retry_interval)); 196 - info!("Reconnecting in {} seconds...", sleep_time); 197 - sleep(Duration::from_secs(sleep_time)).await; 198 - 199 - if retry_interval > self.opts.max_retry_interval_seconds { 200 - retry_interval = self.opts.max_retry_interval_seconds; 201 - } else { 202 - retry_interval *= 2; 203 - } 204 - } 205 - } 206 - 207 - pub fn force_reconnect(&self) -> Result<(), flume::SendError<()>> { 208 - info!("Force reconnect requested."); 209 - self.reconnect_tx.send(()) // Send a reconnect signal 210 - } 211 - } 212 - 213 - #[cfg(test)] 214 - mod tests { 215 - use super::*; 216 - use std::sync::{Arc, Mutex}; 217 - use tokio::task; 218 - use tokio::time::{timeout, Duration}; 219 - use tokio_tungstenite::tungstenite::Message; 220 - 221 - #[test] 222 - fn test_build_ws_url() { 223 - let opts = JetstreamOptions { 224 - wanted_collections: Some(vec!["col1".to_string(), "col2".to_string()]), 225 - wanted_dids: Some(vec!["did1".to_string()]), 226 - ..Default::default() 227 - }; 228 - let connection = JetstreamConnection::new(opts); 229 - 230 - let test = Arc::new(Mutex::new(Some(8373))); 231 - 232 - let url = connection.build_ws_url(test); 233 - 234 - assert!(url.starts_with("wss://")); 235 - assert!(url.contains("cursor=8373")); 236 - assert!(url.contains("wantedCollections=col1")); 237 - assert!(url.contains("wantedCollections=col2")); 238 - assert!(url.contains("wantedDids=did1")); 239 - } 240 - 241 - #[tokio::test] 242 - async fn test_force_reconnect() { 243 - let opts = JetstreamOptions::default(); 244 - let connection = JetstreamConnection::new(opts); 245 - 246 - // Spawn a task to listen for the reconnect signal 247 - let reconnect_rx = connection.reconnect_rx.clone(); 248 - let recv_task = task::spawn(async move { 249 - reconnect_rx 250 - .recv_async() 251 - .await 252 - .expect("Failed to receive reconnect signal"); 253 - }); 254 - 255 - connection 256 - .force_reconnect() 257 - .expect("Failed to send reconnect signal"); 258 - 259 - // Ensure reconnect signal was received 260 - assert!(recv_task.await.is_ok()); 261 - } 262 - 263 - #[tokio::test] 264 - async fn test_message_queue() { 265 - let opts = JetstreamOptions::default(); 266 - let connection = JetstreamConnection::new(opts); 267 - 268 - let msg_rx = connection.get_msg_rx(); 269 - let msg = Message::Text("test message".into()); 270 - 271 - // Send a message to the queue 272 - connection 273 - .msg_tx 274 - .send_async(msg.clone()) 275 - .await 276 - .expect("Failed to send message"); 277 - 278 - // Receive and verify the message 279 - let received = msg_rx 280 - .recv_async() 281 - .await 282 - .expect("Failed to receive message"); 283 - assert_eq!(received, msg); 284 - } 285 - 286 - #[tokio::test] 287 - async fn test_connection_retries_on_failure() { 288 - let opts = JetstreamOptions::default(); 289 - let connection = Arc::new(JetstreamConnection::new(opts)); 290 - 291 - let cursor = Arc::new(Mutex::new(None)); 292 - 293 - // Timeout to prevent infinite loop 294 - let result = timeout(Duration::from_secs(3), connection.connect(cursor)).await; 295 - 296 - assert!(result.is_err(), "Expected timeout due to retry logic"); 297 - } 298 - 299 - #[tokio::test] 300 - async fn test_reconnect_after_receive_timeout() { 301 - use tokio::net::TcpListener; 302 - use tokio_tungstenite::accept_async; 303 - 304 - let opts = JetstreamOptions { 305 - ws_url: crate::endpoints::JetstreamEndpoints::Custom("ws://127.0.0.1:9001".to_string()), 306 - bound: 5, 307 - max_retry_interval_seconds: 1, 308 - ..Default::default() 309 - }; 310 - let connection = JetstreamConnection::new(opts); 311 - let cursor = Arc::new(Mutex::new(None)); 312 - 313 - // set up dummy "websocket" 314 - let listener = TcpListener::bind("127.0.0.1:9001") 315 - .await 316 - .expect("Failed to bind"); 317 - let server_handle = tokio::spawn(async move { 318 - if let Ok((stream, _)) = listener.accept().await { 319 - let ws_stream = accept_async(stream).await.expect("Failed to accept"); 320 - // send nothing 321 - tokio::time::sleep(Duration::from_secs(6)).await; 322 - drop(ws_stream); 323 - } 324 - }); 325 - 326 - // spawn, then run for >30 seconds to trigger reconnect 327 - let connect_handle = tokio::spawn(async move { 328 - tokio::time::timeout(Duration::from_secs(5), connection.connect(cursor)) 329 - .await 330 - .ok(); 331 - }); 332 - 333 - let _ = tokio::join!(server_handle, connect_handle); 334 - } 335 - }

-65

services/rocketman/src/endpoints.rs

··· 1 - use std::fmt::{Display, Formatter, Result}; 2 - 3 - #[derive(Debug, Clone, PartialEq, Eq, Hash)] 4 - pub enum JetstreamEndpointLocations { 5 - UsEast, 6 - UsWest, 7 - } 8 - 9 - impl Display for JetstreamEndpointLocations { 10 - fn fmt(&self, f: &mut Formatter<'_>) -> Result { 11 - write!( 12 - f, 13 - "{}", 14 - match self { 15 - Self::UsEast => "us-east", 16 - Self::UsWest => "us-west", 17 - } 18 - ) 19 - } 20 - } 21 - 22 - #[derive(Debug, Clone, PartialEq, Eq, Hash)] 23 - pub enum JetstreamEndpoints { 24 - Public(JetstreamEndpointLocations, i8), 25 - Custom(String), 26 - } 27 - 28 - impl Display for JetstreamEndpoints { 29 - fn fmt(&self, f: &mut Formatter<'_>) -> Result { 30 - match self { 31 - Self::Public(location, id) => write!( 32 - f, 33 - "wss://jetstream{}.{}.bsky.network/subscribe", 34 - id, location 35 - ), 36 - Self::Custom(url) => write!(f, "{}", url), 37 - } 38 - } 39 - } 40 - 41 - impl Default for JetstreamEndpoints { 42 - fn default() -> Self { 43 - Self::Public(JetstreamEndpointLocations::UsEast, 2) 44 - } 45 - } 46 - 47 - #[cfg(test)] 48 - mod tests { 49 - use super::*; 50 - 51 - #[test] 52 - fn test_display_public() { 53 - let endpoint = JetstreamEndpoints::Public(JetstreamEndpointLocations::UsEast, 2); 54 - assert_eq!( 55 - endpoint.to_string(), 56 - "wss://jetstream2.us-east.bsky.network/subscribe" 57 - ); 58 - } 59 - 60 - #[test] 61 - fn test_display_custom() { 62 - let endpoint = JetstreamEndpoints::Custom("wss://custom.bsky.network/subscribe".into()); 63 - assert_eq!(endpoint.to_string(), "wss://custom.bsky.network/subscribe"); 64 - } 65 - }

-1

services/rocketman/src/err.rs

··· 1 - // TODO: error types instead of using anyhow

-452

services/rocketman/src/handler.rs

··· 1 - use anyhow::Result; 2 - use flume::Sender; 3 - use metrics::{counter, describe_counter, Unit}; 4 - use serde_json::Value; 5 - use std::{ 6 - collections::HashMap, 7 - sync::{Arc, Mutex}, 8 - }; 9 - use tokio_tungstenite::tungstenite::{Error, Message}; 10 - use tracing::{debug, error}; 11 - 12 - #[cfg(feature = "zstd")] 13 - use std::io::Cursor as IoCursor; 14 - #[cfg(feature = "zstd")] 15 - use std::sync::LazyLock; 16 - #[cfg(feature = "zstd")] 17 - use zstd::dict::DecoderDictionary; 18 - 19 - use crate::{ 20 - ingestion::LexiconIngestor, 21 - types::event::{Event, Kind}, 22 - }; 23 - 24 - /// The custom `zstd` dictionary used for decoding compressed Jetstream messages. 25 - /// 26 - /// Sourced from the [official Bluesky Jetstream repo.](https://github.com/bluesky-social/jetstream/tree/main/pkg/models) 27 - #[cfg(feature = "zstd")] 28 - static ZSTD_DICTIONARY: LazyLock<DecoderDictionary> = 29 - LazyLock::new(|| DecoderDictionary::copy(include_bytes!("../zstd/dictionary"))); 30 - 31 - pub async fn handle_message( 32 - message: Message, 33 - ingestors: &HashMap<String, Box<dyn LexiconIngestor + Send + Sync>>, 34 - reconnect_tx: Sender<()>, 35 - cursor: Arc<Mutex<Option<u64>>>, 36 - ) -> Result<()> { 37 - describe_counter!( 38 - "jetstream.event", 39 - Unit::Count, 40 - "number of event ingest attempts" 41 - ); 42 - describe_counter!( 43 - "jetstream.event.parse", 44 - Unit::Count, 45 - "events that were successfully processed" 46 - ); 47 - describe_counter!( 48 - "jetstream.event.fail", 49 - Unit::Count, 50 - "events that could not be read" 51 - ); 52 - describe_counter!("jetstream.error", Unit::Count, "errors encountered"); 53 - match message { 54 - Message::Text(text) => { 55 - debug!("Text message received"); 56 - counter!("jetstream.event").increment(1); 57 - let envelope: Event<Value> = serde_json::from_str(&text).map_err(|e| { 58 - anyhow::anyhow!("Failed to parse message: {} with json string {}", e, text) 59 - })?; 60 - debug!("envelope: {:?}", envelope); 61 - handle_envelope(envelope, cursor, ingestors).await?; 62 - Ok(()) 63 - } 64 - #[cfg(feature = "zstd")] 65 - Message::Binary(bytes) => { 66 - debug!("Binary message received"); 67 - counter!("jetstream.event").increment(1); 68 - let decoder = zstd::stream::Decoder::with_prepared_dictionary( 69 - IoCursor::new(bytes), 70 - &*ZSTD_DICTIONARY, 71 - )?; 72 - let envelope: Event<Value> = serde_json::from_reader(decoder) 73 - .map_err(|e| anyhow::anyhow!("Failed to parse binary message: {}", e))?; 74 - debug!("envelope: {:?}", envelope); 75 - handle_envelope(envelope, cursor, ingestors).await?; 76 - Ok(()) 77 - } 78 - #[cfg(not(feature = "zstd"))] 79 - Message::Binary(_) => { 80 - debug!("Binary message received"); 81 - Err(anyhow::anyhow!( 82 - "binary message received but zstd feature is not enabled" 83 - )) 84 - } 85 - Message::Close(_) => { 86 - debug!("Server closed connection"); 87 - if let Err(e) = reconnect_tx.send(()) { 88 - counter!("jetstream.event.parse.error", "error" => "failed_to_send_reconnect_signal").increment(1); 89 - error!("Failed to send reconnect signal: {}", e); 90 - } 91 - Err(Error::ConnectionClosed.into()) 92 - } 93 - _ => Ok(()), 94 - } 95 - } 96 - 97 - async fn handle_envelope( 98 - envelope: Event<Value>, 99 - cursor: Arc<Mutex<Option<u64>>>, 100 - ingestors: &HashMap<String, Box<dyn LexiconIngestor + Send + Sync>>, 101 - ) -> Result<()> { 102 - if let Some(ref time_us) = envelope.time_us { 103 - debug!("Time: {}", time_us); 104 - if let Some(cursor) = cursor.lock().unwrap().as_mut() { 105 - debug!("Cursor: {}", cursor); 106 - if time_us > cursor { 107 - debug!("Cursor is behind, resetting"); 108 - *cursor = *time_us; 109 - } 110 - } 111 - } 112 - 113 - match envelope.kind { 114 - Kind::Commit => match extract_commit_nsid(&envelope) { 115 - Ok(nsid) => { 116 - if let Some(fun) = ingestors.get(&nsid) { 117 - match fun.ingest(envelope).await { 118 - Ok(_) => { 119 - counter!("jetstream.event.parse.commit", "nsid" => nsid).increment(1) 120 - } 121 - Err(e) => { 122 - error!("Error ingesting commit with nsid {}: {}", nsid, e); 123 - counter!("jetstream.error").increment(1); 124 - counter!("jetstream.event.fail").increment(1); 125 - } 126 - } 127 - } 128 - } 129 - Err(e) => error!("Error parsing commit: {}", e), 130 - }, 131 - Kind::Identity => { 132 - counter!("jetstream.event.parse.identity").increment(1); 133 - } 134 - Kind::Account => { 135 - counter!("jetstream.event.parse.account").increment(1); 136 - } 137 - Kind::Unknown(kind) => { 138 - counter!("jetstream.event.parse.unknown", "kind" => kind).increment(1); 139 - } 140 - } 141 - Ok(()) 142 - } 143 - 144 - fn extract_commit_nsid(envelope: &Event<Value>) -> anyhow::Result<String> { 145 - // if the type is not a commit 146 - if envelope.commit.is_none() { 147 - return Err(anyhow::anyhow!( 148 - "Message has no commit, so there is no nsid attached." 149 - )); 150 - } else if let Some(ref commit) = envelope.commit { 151 - return Ok(commit.collection.clone()); 152 - } 153 - 154 - Err(anyhow::anyhow!("Failed to extract nsid: unknown error")) 155 - } 156 - 157 - #[cfg(test)] 158 - mod tests { 159 - use super::*; 160 - use crate::types::event::Event; 161 - use anyhow::Result; 162 - use async_trait::async_trait; 163 - use flume::{Receiver, Sender}; 164 - use serde_json::json; 165 - use std::{ 166 - collections::HashMap, 167 - sync::{Arc, Mutex}, 168 - }; 169 - use tokio_tungstenite::tungstenite::Message; 170 - 171 - // Dummy ingestor that records if it was called. 172 - struct DummyIngestor { 173 - pub called: Arc<Mutex<bool>>, 174 - } 175 - 176 - #[async_trait] 177 - impl crate::ingestion::LexiconIngestor for DummyIngestor { 178 - async fn ingest(&self, _event: Event<serde_json::Value>) -> Result<(), anyhow::Error> { 179 - let mut called = self.called.lock().unwrap(); 180 - *called = true; 181 - Ok(()) 182 - } 183 - } 184 - 185 - // Dummy ingestor that always returns an error. 186 - struct ErrorIngestor; 187 - 188 - #[async_trait] 189 - impl crate::ingestion::LexiconIngestor for ErrorIngestor { 190 - async fn ingest(&self, _event: Event<serde_json::Value>) -> Result<(), anyhow::Error> { 191 - Err(anyhow::anyhow!("Ingest error")) 192 - } 193 - } 194 - 195 - // Helper to create a reconnect channel. 196 - fn setup_reconnect_channel() -> (Sender<()>, Receiver<()>) { 197 - flume::unbounded() 198 - } 199 - 200 - #[tokio::test] 201 - async fn test_valid_commit_success() { 202 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 203 - let cursor = Arc::new(Mutex::new(Some(100))); 204 - let called_flag = Arc::new(Mutex::new(false)); 205 - 206 - // Create a valid commit event JSON. 207 - let event_json = json!({ 208 - "did": "did:example:123", 209 - "time_us": 200, 210 - "kind": "commit", 211 - "commit": { 212 - "rev": "1", 213 - "operation": "create", 214 - "collection": "ns1", 215 - "rkey": "rkey1", 216 - "record": { "foo": "bar" }, 217 - "cid": "cid123" 218 - }, 219 - }) 220 - .to_string(); 221 - 222 - let mut ingestors: HashMap< 223 - String, 224 - Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>, 225 - > = HashMap::new(); 226 - ingestors.insert( 227 - "ns1".to_string(), 228 - Box::new(DummyIngestor { 229 - called: called_flag.clone(), 230 - }), 231 - ); 232 - 233 - let result = handle_message( 234 - Message::Text(event_json), 235 - &ingestors, 236 - reconnect_tx, 237 - cursor.clone(), 238 - ) 239 - .await; 240 - assert!(result.is_ok()); 241 - // Check that the ingestor was called. 242 - assert!(*called_flag.lock().unwrap()); 243 - // Verify that the cursor got updated. 244 - assert_eq!(*cursor.lock().unwrap(), Some(200)); 245 - } 246 - 247 - #[cfg(feature = "zstd")] 248 - #[tokio::test] 249 - async fn test_binary_valid_commit() { 250 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 251 - let cursor = Arc::new(Mutex::new(Some(100))); 252 - let called_flag = Arc::new(Mutex::new(false)); 253 - 254 - let uncompressed_json = json!({ 255 - "did": "did:example:123", 256 - "time_us": 200, 257 - "kind": "commit", 258 - "commit": { 259 - "rev": "1", 260 - "operation": "create", 261 - "collection": "ns1", 262 - "rkey": "rkey1", 263 - "record": { "foo": "bar" }, 264 - "cid": "cid123" 265 - }, 266 - }) 267 - .to_string(); 268 - 269 - let compressed_dest: IoCursor<Vec<u8>> = IoCursor::new(vec![]); 270 - let mut encoder = zstd::Encoder::with_prepared_dictionary( 271 - compressed_dest, 272 - &zstd::dict::EncoderDictionary::copy(include_bytes!("../zstd/dictionary"), 0), 273 - ) 274 - .unwrap(); 275 - std::io::copy( 276 - &mut IoCursor::new(uncompressed_json.as_bytes()), 277 - &mut encoder, 278 - ) 279 - .unwrap(); 280 - let compressed_dest = encoder.finish().unwrap(); 281 - 282 - let mut ingestors: HashMap< 283 - String, 284 - Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>, 285 - > = HashMap::new(); 286 - ingestors.insert( 287 - "ns1".to_string(), 288 - Box::new(DummyIngestor { 289 - called: called_flag.clone(), 290 - }), 291 - ); 292 - 293 - let result = handle_message( 294 - Message::Binary(compressed_dest.into_inner()), 295 - &ingestors, 296 - reconnect_tx, 297 - cursor.clone(), 298 - ) 299 - .await; 300 - 301 - assert!(result.is_ok()); 302 - // Check that the ingestor was called. 303 - assert!(*called_flag.lock().unwrap()); 304 - // Verify that the cursor got updated. 305 - assert_eq!(*cursor.lock().unwrap(), Some(200)); 306 - } 307 - 308 - #[tokio::test] 309 - async fn test_commit_ingest_failure() { 310 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 311 - let cursor = Arc::new(Mutex::new(Some(100))); 312 - 313 - // Valid commit event with an ingestor that fails. 314 - let event_json = json!({ 315 - "did": "did:example:123", 316 - "time_us": 300, 317 - "kind": "commit", 318 - "commit": { 319 - "rev": "1", 320 - "operation": "create", 321 - "collection": "ns_error", 322 - "rkey": "rkey1", 323 - "record": { "foo": "bar" }, 324 - "cid": "cid123" 325 - }, 326 - "identity": null 327 - }) 328 - .to_string(); 329 - 330 - let mut ingestors: HashMap< 331 - String, 332 - Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>, 333 - > = HashMap::new(); 334 - ingestors.insert("ns_error".to_string(), Box::new(ErrorIngestor)); 335 - 336 - // Even though ingestion fails, handle_message returns Ok(()). 337 - let result = handle_message( 338 - Message::Text(event_json), 339 - &ingestors, 340 - reconnect_tx, 341 - cursor.clone(), 342 - ) 343 - .await; 344 - assert!(result.is_ok()); 345 - // Cursor should still update because it comes before the ingest call. 346 - assert_eq!(*cursor.lock().unwrap(), Some(300)); 347 - } 348 - 349 - #[tokio::test] 350 - async fn test_identity_message() { 351 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 352 - let cursor = Arc::new(Mutex::new(None)); 353 - // Valid identity event. 354 - let event_json = json!({ 355 - "did": "did:example:123", 356 - "time_us": 150, 357 - "kind": "identity", 358 - "commit": null, 359 - "identity": { 360 - "did": "did:example:123", 361 - "handle": "user", 362 - "seq": 1, 363 - "time": "2025-01-01T00:00:00Z" 364 - } 365 - }) 366 - .to_string(); 367 - let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> = 368 - HashMap::new(); 369 - 370 - let result = 371 - handle_message(Message::Text(event_json), &ingestors, reconnect_tx, cursor).await; 372 - assert!(result.is_ok()); 373 - } 374 - 375 - #[tokio::test] 376 - async fn test_close_message() { 377 - let (reconnect_tx, reconnect_rx) = setup_reconnect_channel(); 378 - let cursor = Arc::new(Mutex::new(None)); 379 - let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> = 380 - HashMap::new(); 381 - 382 - let result = handle_message(Message::Close(None), &ingestors, reconnect_tx, cursor).await; 383 - // Should return an error due to connection close. 384 - assert!(result.is_err()); 385 - // Verify that a reconnect signal was sent. 386 - let signal = reconnect_rx.recv_async().await; 387 - assert!(signal.is_ok()); 388 - } 389 - 390 - #[tokio::test] 391 - async fn test_invalid_json() { 392 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 393 - let cursor = Arc::new(Mutex::new(None)); 394 - let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> = 395 - HashMap::new(); 396 - 397 - let invalid_json = "this is not json".to_string(); 398 - let result = handle_message( 399 - Message::Text(invalid_json), 400 - &ingestors, 401 - reconnect_tx, 402 - cursor, 403 - ) 404 - .await; 405 - assert!(result.is_err()); 406 - } 407 - 408 - #[tokio::test] 409 - async fn test_cursor_not_updated_if_lower() { 410 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 411 - // Set an initial cursor value. 412 - let cursor = Arc::new(Mutex::new(Some(300))); 413 - let event_json = json!({ 414 - "did": "did:example:123", 415 - "time_us": 200, 416 - "kind": "commit", 417 - "commit": { 418 - "rev": "1", 419 - "operation": "create", 420 - "collection": "ns1", 421 - "rkey": "rkey1", 422 - "record": { "foo": "bar" }, 423 - "cid": "cid123" 424 - }, 425 - "identity": null 426 - }) 427 - .to_string(); 428 - 429 - // Use a dummy ingestor that does nothing. 430 - let mut ingestors: HashMap< 431 - String, 432 - Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>, 433 - > = HashMap::new(); 434 - ingestors.insert( 435 - "ns1".to_string(), 436 - Box::new(DummyIngestor { 437 - called: Arc::new(Mutex::new(false)), 438 - }), 439 - ); 440 - 441 - let result = handle_message( 442 - Message::Text(event_json), 443 - &ingestors, 444 - reconnect_tx, 445 - cursor.clone(), 446 - ) 447 - .await; 448 - assert!(result.is_ok()); 449 - // Cursor should remain unchanged. 450 - assert_eq!(*cursor.lock().unwrap(), Some(300)); 451 - } 452 - }

-22

services/rocketman/src/ingestion.rs

··· 1 - use anyhow::Result; 2 - use async_trait::async_trait; 3 - use serde_json::Value; 4 - use tracing::info; 5 - 6 - use crate::types::event::Event; 7 - 8 - #[async_trait] 9 - pub trait LexiconIngestor { 10 - async fn ingest(&self, message: Event<Value>) -> Result<()>; 11 - } 12 - 13 - pub struct DefaultLexiconIngestor; 14 - 15 - #[async_trait] 16 - impl LexiconIngestor for DefaultLexiconIngestor { 17 - async fn ingest(&self, message: Event<Value>) -> Result<()> { 18 - info!("Default lexicon processing: {:?}", message); 19 - // Process message for default lexicon. 20 - Ok(()) 21 - } 22 - }

-8

services/rocketman/src/lib.rs

··· 1 - // lib.rs 2 - pub mod connection; 3 - pub mod endpoints; 4 - pub mod handler; 5 - pub mod ingestion; 6 - pub mod options; 7 - pub mod time; 8 - pub mod types;

-40

services/rocketman/src/options.rs

··· 1 - use bon::Builder; 2 - 3 - use crate::endpoints::JetstreamEndpoints; 4 - 5 - #[derive(Builder, Debug)] 6 - pub struct JetstreamOptions { 7 - #[builder(default)] 8 - pub ws_url: JetstreamEndpoints, 9 - #[builder(default)] 10 - pub max_retry_interval_seconds: u64, 11 - #[builder(default)] 12 - pub connection_success_time_seconds: u64, 13 - #[builder(default)] 14 - pub bound: usize, 15 - #[builder(default)] 16 - pub timeout_time_sec: usize, 17 - #[cfg(feature = "zstd")] 18 - #[builder(default = true)] 19 - pub compress: bool, 20 - pub wanted_collections: Option<Vec<String>>, 21 - pub wanted_dids: Option<Vec<String>>, 22 - pub cursor: Option<String>, 23 - } 24 - 25 - impl Default for JetstreamOptions { 26 - fn default() -> Self { 27 - Self { 28 - ws_url: JetstreamEndpoints::default(), 29 - max_retry_interval_seconds: 120, 30 - connection_success_time_seconds: 60, 31 - bound: 65536, 32 - timeout_time_sec: 40, 33 - #[cfg(feature = "zstd")] 34 - compress: true, 35 - wanted_collections: None, 36 - wanted_dids: None, 37 - cursor: None, 38 - } 39 - } 40 - }

-11

services/rocketman/src/time/mod.rs

··· 1 - use std::time::{Duration, Instant, SystemTime}; 2 - 3 - pub mod system_time; 4 - 5 - pub trait TimeProvider { 6 - fn new() -> Self; 7 - fn now(&self) -> SystemTime; // Get the current time 8 - fn elapsed(&self, earlier: SystemTime) -> Duration; // Calculate the elapsed time. 9 - fn instant_now(&self) -> Instant; // For compatibility with your existing code (if needed) 10 - fn instant_elapsed(&self, earlier: Instant) -> Duration; 11 - }

-28

services/rocketman/src/time/system_time.rs

··· 1 - use std::time::{Duration, Instant, SystemTime}; 2 - 3 - use super::TimeProvider; 4 - 5 - #[derive(Default, Clone, Copy)] // Add these derives for ease of use 6 - pub struct SystemTimeProvider; // No fields needed, just a marker type 7 - 8 - impl TimeProvider for SystemTimeProvider { 9 - fn new() -> Self { 10 - Self 11 - } 12 - 13 - fn now(&self) -> SystemTime { 14 - SystemTime::now() 15 - } 16 - 17 - fn elapsed(&self, earlier: SystemTime) -> Duration { 18 - earlier.elapsed().unwrap_or_else(|_| Duration::from_secs(0)) 19 - } 20 - 21 - fn instant_now(&self) -> Instant { 22 - Instant::now() 23 - } 24 - 25 - fn instant_elapsed(&self, earlier: Instant) -> Duration { 26 - earlier.elapsed() 27 - } 28 - }

-116

services/rocketman/src/types/event.rs

··· 1 - use serde::{Deserialize, Deserializer, Serialize}; 2 - 3 - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] 4 - #[serde(rename_all = "lowercase")] 5 - pub enum Kind { 6 - Account, 7 - Identity, 8 - Commit, 9 - Unknown(String), 10 - } 11 - 12 - #[derive(Debug, Serialize, Deserialize)] 13 - #[serde(rename_all = "snake_case")] 14 - pub struct Event<T> { 15 - pub did: String, 16 - pub time_us: Option<u64>, 17 - pub kind: Kind, 18 - pub commit: Option<Commit<T>>, 19 - pub identity: Option<Identity>, 20 - } 21 - 22 - #[derive(Debug, Serialize, Deserialize)] 23 - pub struct Identity { 24 - did: String, 25 - handle: Option<String>, 26 - seq: u64, 27 - time: String, 28 - } 29 - 30 - #[derive(Debug, Serialize, Deserialize)] 31 - #[serde(rename_all = "lowercase")] 32 - enum AccountStatus { 33 - TakenDown, 34 - Suspended, 35 - Deleted, 36 - Activated, 37 - } 38 - 39 - #[derive(Debug, Serialize, Deserialize)] 40 - pub struct Account { 41 - did: String, 42 - handle: String, 43 - seq: u64, 44 - time: String, 45 - status: AccountStatus, 46 - } 47 - 48 - #[derive(Debug, Serialize)] 49 - #[serde(rename_all = "camelCase")] 50 - pub struct Commit<T> { 51 - pub rev: String, 52 - pub operation: Operation, 53 - pub collection: String, 54 - pub rkey: String, 55 - pub record: Option<T>, 56 - pub cid: Option<String>, 57 - } 58 - 59 - #[derive(Debug, Serialize, Deserialize)] 60 - #[serde(rename_all = "lowercase")] 61 - pub enum Operation { 62 - Create, 63 - Update, 64 - Delete, 65 - } 66 - 67 - /// Enforce that record is None only when operation is 'delete' 68 - impl<'de, T> Deserialize<'de> for Commit<T> 69 - where 70 - T: Deserialize<'de>, 71 - { 72 - fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 73 - where 74 - D: Deserializer<'de>, 75 - { 76 - // Helper struct to perform the deserialization. 77 - #[derive(Deserialize)] 78 - #[serde(rename_all = "camelCase")] 79 - struct Helper<T> { 80 - rev: String, 81 - operation: Operation, 82 - collection: String, 83 - rkey: String, 84 - record: Option<T>, 85 - cid: Option<String>, 86 - } 87 - 88 - let helper = Helper::deserialize(deserializer)?; 89 - 90 - match helper.operation { 91 - Operation::Delete => { 92 - if helper.record.is_some() || helper.cid.is_some() { 93 - return Err(<D::Error as serde::de::Error>::custom( 94 - "record and cid must be null when operation is delete", 95 - )); 96 - } 97 - } 98 - _ => { 99 - if helper.record.is_none() || helper.cid.is_none() { 100 - return Err(<D::Error as serde::de::Error>::custom( 101 - "record and cid must be present unless operation is delete", 102 - )); 103 - } 104 - } 105 - } 106 - 107 - Ok(Commit { 108 - rev: helper.rev, 109 - operation: helper.operation, 110 - collection: helper.collection, 111 - rkey: helper.rkey, 112 - record: helper.record, 113 - cid: helper.cid, 114 - }) 115 - } 116 - }

-1

services/rocketman/src/types/mod.rs

··· 1 - pub mod event;

services/rocketman/zstd/dictionary

This is a binary file and will not be displayed.

+1 -6

services/satellite/src/counts.rs

··· 3 3 http::StatusCode, 4 4 Json, 5 5 }; 6 - use chrono::{DateTime, Utc}; 7 6 use serde::{Deserialize, Serialize}; 8 7 use sqlx::FromRow; 9 8 use uuid::Uuid; ··· 43 42 pub limit: i64, 44 43 } 45 44 46 - #[derive(FromRow, Debug, Deserialize, Serialize)] 45 + #[derive(FromRow, Debug)] 47 46 pub struct Play { 48 47 pub did: String, 49 48 pub track_name: String, ··· 51 50 pub release_name: Option<String>, 52 51 pub release_mbid: Option<Uuid>, 53 52 pub duration: Option<i32>, 54 - pub played_time: Option<DateTime<Utc>>, 55 53 pub uri: Option<String>, 56 54 // MASSIVE HUGE HACK 57 55 pub artists: Option<String>, ··· 65 63 pub release_name: Option<String>, 66 64 pub release_mbid: Option<Uuid>, 67 65 pub duration: Option<i32>, 68 - pub played_time: Option<DateTime<Utc>>, 69 66 pub uri: Option<String>, 70 67 pub artists: Vec<Artist>, 71 68 } ··· 92 89 -- TODO: replace with actual 93 90 STRING_AGG(pa.artist_name || '|' || TEXT(pa.artist_mbid), ',') AS artists, 94 91 p.release_name, 95 - p.played_time, 96 92 p.duration, 97 93 p.uri, 98 94 p.recording_mbid, ··· 138 134 release_name: play.release_name, 139 135 release_mbid: play.release_mbid, 140 136 duration: play.duration, 141 - played_time: play.played_time, 142 137 uri: play.uri, 143 138 artists, 144 139 }

+31

services/types/Cargo.toml

··· 1 + [package] 2 + name = "types" 3 + version = "0.1.0" 4 + edition = "2021" 5 + 6 + [dependencies] 7 + atrium-api.workspace = true 8 + atrium-xrpc = "0.12.1" 9 + chrono = "0.4.39" 10 + http = "1.2.0" 11 + ipld-core = { version = "0.4.2", features = ["serde"] } 12 + langtag = { version = "0.3", features = ["serde"] } 13 + regex = "1.11.1" 14 + serde = { workspace = true, features = ["derive"] } 15 + serde_bytes = "0.11.15" 16 + serde_ipld_dagcbor = "0.6.2" 17 + serde_json.workspace = true 18 + thiserror = "2.0.11" 19 + 20 + # features 21 + [features] 22 + default = [ 23 + "namespace-fmteal", 24 + "namespace-appbsky", 25 + "namespace-toolsozone", 26 + "namespace-chatbsky", 27 + ] 28 + namespace-fmteal = [] 29 + namespace-appbsky = [] 30 + namespace-toolsozone = [] 31 + namespace-chatbsky = []

+10

services/types/readme.md

··· 1 + ## Types 2 + Rust lexicons for teal.fm and others. 3 + 4 + ### Generate lexicons 5 + You will need to install [esquema-cli](https://github.com/fatfingers23/esquema) a fork of the [atrium codegen tool](https://github.com/sugyan/atrium). 6 + 7 + Currently can install directly from the repo 8 + `cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git` 9 + 10 + Then can recreate with `esquema-cli generate local --lexdir ./lexicons --outdir ./src` from this directory

+55

target.sh

··· 1 + #!/bin/bash 2 + set -e 3 + 4 + # Debug: Print all available build variables 5 + echo "DEBUG: TARGETPLATFORM=$TARGETPLATFORM" 6 + echo "DEBUG: BUILDPLATFORM=$BUILDPLATFORM" 7 + echo "DEBUG: TARGETARCH=$TARGETARCH" 8 + echo "DEBUG: TARGETOS=$TARGETOS" 9 + 10 + # Use TARGETARCH directly (more reliable than TARGETPLATFORM) 11 + TARGET_ARCH_VAR="${TARGETARCH:-}" 12 + 13 + # If TARGETARCH is not set, try to extract from TARGETPLATFORM 14 + if [ -z "$TARGET_ARCH_VAR" ] && [ -n "$TARGETPLATFORM" ]; then 15 + TARGET_ARCH_VAR=$(echo "$TARGETPLATFORM" | cut -d'/' -f2) 16 + echo "DEBUG: Extracted TARGET_ARCH_VAR=$TARGET_ARCH_VAR from TARGETPLATFORM" 17 + fi 18 + 19 + # Final fallback: detect from uname 20 + if [ -z "$TARGET_ARCH_VAR" ]; then 21 + ARCH=$(uname -m) 22 + case "$ARCH" in 23 + "x86_64") 24 + TARGET_ARCH_VAR="amd64" 25 + ;; 26 + "aarch64") 27 + TARGET_ARCH_VAR="arm64" 28 + ;; 29 + *) 30 + echo "ERROR: Could not detect target architecture. uname -m returned: $ARCH" 31 + echo "Available variables: TARGETARCH=$TARGETARCH, TARGETPLATFORM=$TARGETPLATFORM" 32 + exit 1 33 + ;; 34 + esac 35 + echo "DEBUG: Detected TARGET_ARCH_VAR=$TARGET_ARCH_VAR from uname" 36 + fi 37 + 38 + # Map architecture to Rust target 39 + case "$TARGET_ARCH_VAR" in 40 + "amd64") 41 + export RUST_TARGET="x86_64-unknown-linux-gnu" 42 + export TARGET_ARCH="amd64" 43 + ;; 44 + "arm64") 45 + export RUST_TARGET="aarch64-unknown-linux-gnu" 46 + export TARGET_ARCH="arm64" 47 + ;; 48 + *) 49 + echo "ERROR: Unsupported target architecture: $TARGET_ARCH_VAR" 50 + echo "Supported architectures: amd64, arm64" 51 + exit 1 52 + ;; 53 + esac 54 + 55 + echo "SUCCESS: Using RUST_TARGET=$RUST_TARGET, TARGET_ARCH=$TARGET_ARCH"

+92 -62

tools/lexicon-cli/src/commands/generate.ts

··· 1 - import { execa } from 'execa'; 2 - import { existsSync } from 'fs'; 3 - import { join } from 'path'; 4 - import pc from 'picocolors'; 5 - import { findWorkspaceRoot } from '../utils/workspace.js'; 1 + import { existsSync } from "fs"; 2 + import { join } from "path"; 3 + import { execa } from "execa"; 4 + import pc from "picocolors"; 5 + 6 + import { findWorkspaceRoot } from "../utils/workspace.js"; 6 7 7 8 interface GenerateOptions { 8 9 tsOnly?: boolean; ··· 12 13 13 14 export async function generate(options: GenerateOptions = {}) { 14 15 const workspaceRoot = findWorkspaceRoot(); 15 - 16 - console.log(pc.blue('🔧 Generating lexicon types...')); 17 - 16 + 17 + console.log(pc.blue("🔧 Generating lexicon types...")); 18 + 18 19 try { 19 20 if (!options.rustOnly) { 20 21 await generateTypeScript(workspaceRoot, options.force); 21 22 } 22 - 23 + 23 24 if (!options.tsOnly) { 24 25 await generateRust(workspaceRoot, options.force); 25 26 } 26 - 27 - console.log(pc.green('✅ Lexicon generation complete!')); 27 + 28 + console.log(pc.green("✅ Lexicon generation complete!")); 28 29 } catch (error) { 29 - console.error(pc.red('❌ Generation failed:'), error instanceof Error ? error.message : String(error)); 30 + console.error( 31 + pc.red("❌ Generation failed:"), 32 + error instanceof Error ? error.message : String(error), 33 + ); 30 34 process.exit(1); 31 35 } 32 36 } 33 37 34 38 async function generateTypeScript(workspaceRoot: string, force?: boolean) { 35 - const lexiconsPath = join(workspaceRoot, 'lexicons'); 36 - 39 + const lexiconsPath = join(workspaceRoot, "lexicons"); 40 + 37 41 if (!existsSync(lexiconsPath)) { 38 - throw new Error('Lexicons directory not found at workspace root'); 42 + throw new Error("Lexicons directory not found at workspace root"); 39 43 } 40 - 44 + 41 45 // Check if packages/lexicons exists for TypeScript generation 42 - const packagesLexiconsPath = join(workspaceRoot, 'packages/lexicons'); 46 + const packagesLexiconsPath = join(workspaceRoot, "packages/lexicons"); 43 47 if (!existsSync(packagesLexiconsPath)) { 44 - console.log(pc.yellow(' ⚠️ TypeScript lexicons package not found, skipping TypeScript generation')); 48 + console.log( 49 + pc.yellow( 50 + " ⚠️ TypeScript lexicons package not found, skipping TypeScript generation", 51 + ), 52 + ); 45 53 return; 46 54 } 47 - 48 - console.log(pc.cyan(' 📦 Generating TypeScript types...')); 49 - 55 + 56 + console.log(pc.cyan(" 📦 Generating TypeScript types...")); 57 + 50 58 try { 51 - await execa('pnpm', ['lex:gen-server'], { 59 + await execa("pnpm", ["lex:gen-server"], { 52 60 cwd: packagesLexiconsPath, 53 - stdio: 'inherit' 61 + stdio: "inherit", 54 62 }); 55 - console.log(pc.green(' ✓ TypeScript types generated')); 63 + console.log(pc.green(" ✓ TypeScript types generated")); 56 64 } catch (error) { 57 - throw new Error(`TypeScript generation failed: ${error instanceof Error ? error.message : String(error)}`); 65 + throw new Error( 66 + `TypeScript generation failed: ${error instanceof Error ? error.message : String(error)}`, 67 + ); 58 68 } 59 69 } 60 70 61 71 async function generateRust(workspaceRoot: string, force?: boolean) { 62 - const typesPath = join(workspaceRoot, 'services/types'); 63 - const lexiconsPath = join(workspaceRoot, 'lexicons'); 64 - 72 + const typesPath = join(workspaceRoot, "services/types"); 73 + const lexiconsPath = join(workspaceRoot, "lexicons"); 74 + 65 75 if (!existsSync(typesPath)) { 66 - throw new Error('Rust types service not found'); 76 + throw new Error("Rust types service not found"); 67 77 } 68 - 78 + 69 79 if (!existsSync(lexiconsPath)) { 70 - throw new Error('Lexicons directory not found at workspace root'); 80 + throw new Error("Lexicons directory not found at workspace root"); 71 81 } 72 - 73 - console.log(pc.cyan(' 🦀 Generating Rust types...')); 74 - 82 + 83 + console.log(pc.cyan(" 🦀 Generating Rust types...")); 84 + 75 85 try { 76 86 // Check if esquema-cli is available 77 87 try { 78 - await execa('esquema-cli', ['--version'], { stdio: 'pipe' }); 88 + await execa("esquema-cli", ["--version"], { stdio: "pipe" }); 79 89 } catch { 80 - console.log(pc.yellow(' ⚠️ esquema-cli not found. Installing...')); 90 + console.log(pc.yellow(" ⚠️ esquema-cli not found. Installing...")); 81 91 try { 82 - await execa('cargo', [ 83 - 'install', 84 - 'esquema-cli', 85 - '--git', 86 - 'https://github.com/fatfingers23/esquema.git' 87 - ], { 88 - stdio: 'inherit' 89 - }); 90 - console.log(pc.green(' ✓ esquema-cli installed successfully')); 92 + await execa( 93 + "cargo", 94 + [ 95 + "install", 96 + "esquema-cli", 97 + "--git", 98 + "https://github.com/fatfingers23/esquema.git", 99 + ], 100 + { 101 + stdio: "inherit", 102 + }, 103 + ); 104 + console.log(pc.green(" ✓ esquema-cli installed successfully")); 91 105 } catch (installError) { 92 - throw new Error('Failed to install esquema-cli. Please install manually: cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git'); 106 + throw new Error( 107 + "Failed to install esquema-cli. Please install manually: cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git", 108 + ); 93 109 } 94 110 } 95 - 96 - await execa('esquema-cli', [ 97 - 'generate', 98 - 'local', 99 - '--lexdir', 100 - lexiconsPath, 101 - '--outdir', 102 - join(typesPath, 'src') 103 - ], { 104 - cwd: typesPath, 105 - stdio: 'inherit' 106 - }); 107 - 108 - console.log(pc.green(' ✓ Rust types generated')); 111 + 112 + // create typespath/src if it doesn't exist 113 + if (!existsSync(join(typesPath, "src"))) { 114 + console.log(pc.yellow(" Creating src directory for Rust types...")); 115 + await execa("mkdir", ["-p", join(typesPath, "src")], { 116 + stdio: "inherit", 117 + }); 118 + } 119 + 120 + await execa( 121 + "esquema-cli", 122 + [ 123 + "generate", 124 + "local", 125 + "--lexdir", 126 + lexiconsPath, 127 + "--outdir", 128 + join(typesPath, "src"), 129 + ], 130 + { 131 + cwd: typesPath, 132 + stdio: "inherit", 133 + }, 134 + ); 135 + 136 + console.log(pc.green(" ✓ Rust types generated")); 109 137 } catch (error) { 110 - throw new Error(`Rust generation failed: ${error instanceof Error ? error.message : String(error)}`); 138 + throw new Error( 139 + `Rust generation failed: ${error instanceof Error ? error.message : String(error)}`, 140 + ); 111 141 } 112 - } 142 + }

+44

tools/teal-cli/Cargo.toml

··· 1 + [package] 2 + name = "teal-cli" 3 + version = "0.1.0" 4 + edition = "2021" 5 + description = "A simple management tool for teal.fm AT Protocol services" 6 + 7 + [[bin]] 8 + name = "teal" 9 + path = "src/main.rs" 10 + 11 + [dependencies] 12 + # CLI framework 13 + clap = { version = "4.0", features = ["derive"] } 14 + anyhow = "1.0" 15 + serde = { version = "1.0", features = ["derive"] } 16 + serde_json = "1.0" 17 + 18 + # Async runtime 19 + tokio = { version = "1.0", features = [ 20 + "rt", 21 + "macros", 22 + "fs", 23 + "rt-multi-thread", 24 + ] } 25 + 26 + # Cryptography 27 + k256 = { version = "0.13", features = ["ecdsa"] } 28 + multibase = "0.9" 29 + hex = "0.4" 30 + rand = "0.8" 31 + 32 + # File system and paths 33 + dirs = "5.0" 34 + 35 + # Utilities 36 + chrono = { version = "0.4", features = ["serde"] } 37 + colored = "2.0" 38 + 39 + 40 + [features] 41 + default = [] 42 + 43 + [dev-dependencies] 44 + tempfile = "3.0"

+257

tools/teal-cli/README.md

··· 1 + # Teal CLI 2 + 3 + A comprehensive management tool for Teal AT Protocol services, featuring cryptographic key management and CAR (Content Addressable aRchive) file exploration. 4 + 5 + ## Installation 6 + 7 + From the project root: 8 + 9 + ```bash 10 + cargo build --release --bin teal 11 + ``` 12 + 13 + The binary will be available at `target/release/teal`. 14 + 15 + ## Usage 16 + 17 + ### CAR File Explorer 18 + 19 + Explore and analyze CAR files containing AT Protocol and Teal records. 20 + 21 + #### Fetch CAR file from the internet 22 + 23 + ```bash 24 + # Fetch from AT Protocol handle 25 + teal car fetch --identity alice.bsky.social 26 + 27 + # Fetch from DID 28 + teal car fetch --identity did:plc:vdjlpwlhbnug4fnjodwr3vzh 29 + 30 + # Fetch and save to specific file 31 + teal car fetch --identity mmatt.net --output mmatt.car 32 + 33 + # Fetch and immediately explore 34 + teal car fetch --identity mmatt.net --explore 35 + ``` 36 + 37 + #### Explore a CAR file 38 + 39 + ```bash 40 + # Basic exploration 41 + teal car explore --file path/to/archive.car 42 + 43 + # Verbose output with detailed information 44 + teal car explore --file path/to/archive.car --verbose 45 + ``` 46 + 47 + #### Search for specific content 48 + 49 + ```bash 50 + # Search for records containing "play" 51 + teal car search --file path/to/archive.car --query "play" 52 + 53 + # Search with verbose JSON output 54 + teal car search --file path/to/archive.car --query "queen" --verbose 55 + ``` 56 + 57 + #### Export Teal records to JSON 58 + 59 + ```bash 60 + # Export to default directory (./teal_exports) 61 + teal car export --file path/to/archive.car 62 + 63 + # Export to custom directory 64 + teal car export --file path/to/archive.car --output ./my_exports 65 + ``` 66 + 67 + ### Generate a new K256 key pair 68 + 69 + ```bash 70 + # Generate with default settings (saves to ~/.teal/keys/) 71 + teal gen-key 72 + 73 + # Generate with custom name 74 + teal gen-key --name production 75 + 76 + # Generate with custom output directory 77 + teal gen-key --output ./keys 78 + 79 + # Overwrite existing keys 80 + teal gen-key --force 81 + 82 + # Output only the multibase (useful for scripts) 83 + teal gen-key --format multibase 84 + 85 + # Output as JSON 86 + teal gen-key --format json 87 + ``` 88 + 89 + ### Extract public key from existing private key 90 + 91 + ```bash 92 + # Extract as multibase (default) 93 + teal extract-pubkey --private-key ./keys/repo.key 94 + 95 + # Extract as hex 96 + teal extract-pubkey --private-key ./keys/repo.key --format hex 97 + 98 + # Extract as JSON with both formats 99 + teal extract-pubkey --private-key ./keys/repo.key --format json 100 + ``` 101 + 102 + ### List available keys 103 + 104 + ```bash 105 + # List keys in default directory 106 + teal list 107 + 108 + # List keys in custom directory 109 + teal list --directory ./keys 110 + ``` 111 + 112 + ### Rotate keys (backup old, generate new) 113 + 114 + ```bash 115 + # Rotate the default 'repo' key 116 + teal rotate --name repo 117 + 118 + # Rotate with custom backup directory 119 + teal rotate --name repo --backup-dir ./backups 120 + ``` 121 + 122 + ## CAR File Analysis 123 + 124 + The CAR explorer can analyze AT Protocol archives and identify: 125 + 126 + - **Teal Records**: Music plays (`fm.teal.alpha.feed.play`), profiles (`fm.teal.alpha.actor.profile`), and status updates 127 + - **AT Protocol Records**: BlueSky posts, likes, follows, and other social data 128 + - **Commit Operations**: Repository changes and metadata 129 + - **IPLD Structure**: Content addressing and linking 130 + 131 + ### Example Output 132 + 133 + ``` 134 + 📊 CAR Analysis Results 135 + ================================================== 136 + 137 + 📁 File Overview: 138 + File size: 10267026 bytes 139 + Total blocks: 30195 140 + Root CIDs: 1 141 + 142 + 📋 Record Types: 143 + app.bsky.feed.like: 11034 144 + app.bsky.feed.post: 7510 145 + fm.teal.alpha.feed.play: 2605 146 + fm.teal.alpha.actor.profile: 1 147 + 148 + 🎵 Teal Records Found: 149 + fm.teal.alpha.feed.play: 2605 150 + fm.teal.alpha.actor.profile: 1 151 + 152 + 🔍 Sample Teal Records: 153 + 1. fm.teal.alpha.feed.play (bafyreigmu...) 154 + 🎵 Track: Bohemian Rhapsody 155 + 🎤 Artists: Queen 156 + ⏱️ Duration: 355000ms 157 + ``` 158 + 159 + ### Exported JSON Structure 160 + 161 + ```json 162 + [ 163 + { 164 + "cid": "bafyreigmuwliezhxczoxgxq5hjtsdzaj3jl54kg...", 165 + "data": { 166 + "$type": "fm.teal.alpha.feed.play", 167 + "track_name": "Bohemian Rhapsody", 168 + "artist_names": ["Queen"], 169 + "duration": 355000, 170 + "played_time": "2024-01-15T14:30:00Z" 171 + } 172 + } 173 + ] 174 + ``` 175 + 176 + ## Key Management 177 + 178 + The tool generates K256 (secp256k1) keys compatible with AT Protocol: 179 + 180 + - **Private Key**: 32-byte secp256k1 private key stored as binary 181 + - **Public Key**: Base58-encoded multibase of the compressed public key 182 + - **Default Location**: `~/.teal/keys/` 183 + 184 + ### File Structure 185 + 186 + ``` 187 + ~/.teal/keys/ 188 + ├── repo.key # Private key (32 bytes, binary) 189 + ├── repo.pub # Public key multibase (text) 190 + ├── production.key # Another private key 191 + └── production.pub # Another public key multibase 192 + ``` 193 + 194 + ## Integration 195 + 196 + Replace the hardcoded multibase in your DID document: 197 + 198 + ```rust 199 + // Before (hardcoded) 200 + "publicKeyMultibase": "z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK" 201 + 202 + // After (using generated key) 203 + let pubkey = std::fs::read_to_string("~/.teal/keys/repo.pub")?; 204 + // Use pubkey in your DID document 205 + ``` 206 + 207 + ## Examples 208 + 209 + ### CAR File Analysis 210 + 211 + ```bash 212 + # Fetch CAR file from a user's handle 213 + teal car fetch --identity mmatt.net --output mmatt.car 214 + 215 + # Fetch and immediately explore 216 + teal car fetch --identity alice.bsky.social --explore 217 + 218 + # Analyze a local CAR export 219 + teal car explore --file nat.car 220 + 221 + # Search for specific tracks 222 + teal car search --file nat.car --query "bohemian rhapsody" 223 + 224 + # Export all Teal records for data analysis 225 + teal car export --file nat.car --output ./music_data 226 + 227 + # View exported play records 228 + cat ./music_data/fm_teal_alpha_feed_play.json | jq '.[0]' 229 + ``` 230 + 231 + ### Quick setup 232 + 233 + ```bash 234 + # Generate a key for development 235 + teal gen-key --name dev 236 + 237 + # Get the multibase for your DID document 238 + teal extract-pubkey --private-key ~/.teal/keys/dev.key 239 + ``` 240 + 241 + ### Production deployment 242 + 243 + ```bash 244 + # Generate production keys in a secure location 245 + teal gen-key --name production --output /secure/keys 246 + 247 + # Extract multibase for configuration 248 + PUBKEY=$(teal extract-pubkey --private-key /secure/keys/production.key) 249 + echo "Public key: $PUBKEY" 250 + ``` 251 + 252 + ## Security Notes 253 + 254 + - Private keys are stored as raw 32-byte files with restrictive permissions (600 on Unix) 255 + - Keys are generated using cryptographically secure random number generation 256 + - Never commit private keys to version control 257 + - Consider using secure key management systems in production

+104

tools/teal-cli/rkey_example.md

··· 1 + # How to Extract rkey from AT Protocol CAR Files 2 + 3 + The **rkey** (record key) is not stored inside the IPLD record data itself. Instead, it's found in **commit operations** that map collection paths to record CIDs. 4 + 5 + ## AT Protocol Structure 6 + 7 + ``` 8 + Repository Structure: 9 + ├── Records (IPLD blocks) 10 + │ ├── bafyrei123... (actual play record data) 11 + │ ├── bafyrei456... (actual profile record data) 12 + │ └── bafyrei789... (actual post record data) 13 + └── Commits (IPLD blocks) 14 + ├── bafycommit1... (operations mapping paths to CIDs) 15 + └── bafycommit2... (more operations) 16 + ``` 17 + 18 + ## Example: Record IPLD (without rkey) 19 + 20 + ```json 21 + { 22 + "$type": "fm.teal.alpha.feed.play", 23 + "track_name": "Bohemian Rhapsody", 24 + "artist_names": ["Queen"], 25 + "duration": 355000, 26 + "played_time": "2024-01-15T14:30:00Z" 27 + } 28 + ``` 29 + 30 + **❌ No rkey here!** The record contains the data but not its key. 31 + 32 + ## Example: Commit IPLD (with rkey mappings) 33 + 34 + ```json 35 + { 36 + "ops": [ 37 + { 38 + "action": "create", 39 + "path": "fm.teal.alpha.feed.play/3k2akjdlkjsf", // ← collection/rkey 40 + "cid": "bafyrei123..." // ← points to the record above 41 + }, 42 + { 43 + "action": "create", 44 + "path": "fm.teal.alpha.actor.profile/self", 45 + "cid": "bafyrei456..." 46 + } 47 + ], 48 + "prev": "bafyrei...", 49 + "rev": "3k2bkl...", 50 + "time": "2024-01-15T14:35:00Z" 51 + } 52 + ``` 53 + 54 + **✅ rkey is here!** Extract it from the `path` field: `"3k2akjdlkjsf"` 55 + 56 + ## Extraction Algorithm 57 + 58 + ```rust 59 + fn extract_rkeys_from_commits(commits: &[CommitInfo]) -> HashMap<String, String> { 60 + let mut cid_to_rkey = HashMap::new(); 61 + 62 + for commit in commits { 63 + for operation in &commit.operations { 64 + // Path format: "collection/rkey" 65 + if let Some(rkey) = operation.path.split('/').last() { 66 + if let Some(ref record_cid) = operation.record_cid { 67 + cid_to_rkey.insert(record_cid.clone(), rkey.to_string()); 68 + } 69 + } 70 + } 71 + } 72 + 73 + cid_to_rkey 74 + } 75 + ``` 76 + 77 + ## Complete Example 78 + 79 + 1. **Find commit blocks** in CAR file 80 + 2. **Extract operations** from commit IPLD 81 + 3. **Parse paths** like `"fm.teal.alpha.feed.play/3k2akjdlkjsf"` 82 + 4. **Map CID → rkey**: `bafyrei123... → 3k2akjdlkjsf` 83 + 5. **Use rkey** when processing records 84 + 85 + ## Why This Matters 86 + 87 + The rkey is essential for: 88 + - **AT URI construction**: `at://did:plc:user123/fm.teal.alpha.feed.play/3k2akjdlkjsf` 89 + - **Record identity**: Uniquely identifies the record within the collection 90 + - **Data integrity**: Maintains proper AT Protocol addressing 91 + 92 + ## CLI Usage 93 + 94 + ```bash 95 + # Explore CAR file and show rkey extraction 96 + teal car explore --file archive.car --verbose 97 + 98 + # The verbose output will show: 99 + # 🔑 rkey Extraction Examples: 100 + # 1. bafyrei123... → rkey: 3k2akjdlkjsf 101 + # 2. bafyrei456... → rkey: self 102 + ``` 103 + 104 + **Note**: Some CAR files may not contain commit operations with rkey mappings, especially if they're partial exports or contain only raw records without repository structure.

+116

tools/teal-cli/src/commands/dev.rs

··· 1 + use anyhow::Result; 2 + use colored::*; 3 + 4 + use crate::config::TealConfig; 5 + use crate::DevCommands; 6 + 7 + pub async fn run(cmd: DevCommands, config: &TealConfig) -> Result<()> { 8 + match cmd { 9 + DevCommands::Setup { 10 + skip_docker, 11 + skip_db, 12 + } => setup_dev_environment(skip_docker, skip_db, config).await, 13 + DevCommands::Clean { all } => clean_dev_artifacts(all).await, 14 + DevCommands::Dev { port, watch } => run_dev_server(port, watch, config).await, 15 + DevCommands::Seed { count, data_type } => generate_seed_data(count, data_type, config).await, 16 + } 17 + } 18 + 19 + async fn setup_dev_environment( 20 + skip_docker: bool, 21 + skip_db: bool, 22 + config: &TealConfig, 23 + ) -> Result<()> { 24 + println!("{} Setting up development environment...", "🛠️".blue()); 25 + println!(); 26 + 27 + if !skip_docker { 28 + println!("{} Docker Setup:", "🐳".blue()); 29 + println!(" {} Checking Docker...", "•".bold()); 30 + 31 + // TODO: Check if Docker is installed and running 32 + println!(" {} Docker check not implemented", "⚠️".yellow()); 33 + println!(" {} Manually ensure Docker is running", "💡".blue()); 34 + println!(); 35 + } 36 + 37 + if !skip_db { 38 + println!("{} Database Setup:", "🗄️".blue()); 39 + println!(" {} Database URL: {}", "•".bold(), mask_db_url(&config.database.url)); 40 + 41 + // TODO: Run database initialization and migrations 42 + println!(" {} Database setup not implemented", "⚠️".yellow()); 43 + println!(" {} Run: teal database init", "💡".blue()); 44 + println!(" {} Run: teal database migrate", "💡".blue()); 45 + println!(); 46 + } 47 + 48 + println!("{} Keys Setup:", "🔐".blue()); 49 + let key_path = config.get_key_path(&config.crypto.default_key_name); 50 + if key_path.exists() { 51 + println!(" {} Default key already exists", "✅".green()); 52 + } else { 53 + println!(" {} Generating default key...", "•".bold()); 54 + // TODO: Auto-generate key 55 + println!(" {} Run: teal crypto gen-key", "💡".blue()); 56 + } 57 + println!(); 58 + 59 + println!("{} Development environment setup complete!", "✅".green()); 60 + println!(); 61 + println!("{} Next steps:", "💡".yellow()); 62 + println!(" 1. teal crypto gen-key --name dev"); 63 + println!(" 2. teal database init"); 64 + println!(" 3. teal dev dev --watch"); 65 + 66 + Ok(()) 67 + } 68 + 69 + async fn clean_dev_artifacts(all: bool) -> Result<()> { 70 + println!("{} Cleaning development artifacts...", "🧹".blue()); 71 + println!(); 72 + 73 + let mut cleaned_items = Vec::new(); 74 + 75 + // Clean logs 76 + if let Ok(entries) = std::fs::read_dir("logs") { 77 + let mut log_count = 0; 78 + for entry in entries.flatten() { 79 + if entry.path().extension().map_or(false, |ext| ext == "log") { 80 + // TODO: Actually delete log files 81 + log_count += 1; 82 + } 83 + } 84 + if log_count > 0 { 85 + cleaned_items.push(format!("{} log files", log_count)); 86 + } 87 + } 88 + 89 + // Clean temporary files 90 + if let Ok(entries) = std::fs::read_dir(".") { 91 + let mut temp_count = 0; 92 + for entry in entries.flatten() { 93 + let path = entry.path(); 94 + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { 95 + if name.starts_with("tmp_") || name.ends_with(".tmp") { 96 + temp_count += 1; 97 + } 98 + } 99 + } 100 + if temp_count > 0 { 101 + cleaned_items.push(format!("{} temporary files", temp_count)); 102 + } 103 + } 104 + 105 + if all { 106 + // Clean build artifacts 107 + cleaned_items.push("build artifacts".to_string()); 108 + println!(" {} Would clean: target/ directory", "•".bold()); 109 + 110 + // Clean Docker artifacts 111 + cleaned_items.push("Docker artifacts".to_string()); 112 + println!(" {} Would clean: Docker images and containers", "•".bold()); 113 + } 114 + 115 + if cleaned_items.is_empty() { 116 + println!("{} No artifacts to clean", "ℹ️".blue

+349

tools/teal-cli/src/crypto.rs

··· 1 + use anyhow::{Context, Result}; 2 + use colored::*; 3 + use k256::ecdsa::{SigningKey, VerifyingKey}; 4 + use k256::SecretKey; 5 + use multibase::Base; 6 + use rand::rngs::OsRng; 7 + use serde_json::json; 8 + use std::path::PathBuf; 9 + use tokio::fs; 10 + 11 + /// Generate a new K256 private key 12 + pub fn generate_private_key() -> SigningKey { 13 + SigningKey::random(&mut OsRng) 14 + } 15 + 16 + /// Load a private key from a file 17 + pub async fn load_private_key(path: &PathBuf) -> Result<SigningKey> { 18 + let key_bytes = fs::read(path) 19 + .await 20 + .with_context(|| format!("Failed to read private key from {:?}", path))?; 21 + 22 + if key_bytes.len() != 32 { 23 + anyhow::bail!( 24 + "Invalid private key length. Expected 32 bytes, got {}", 25 + key_bytes.len() 26 + ); 27 + } 28 + 29 + let secret_key = SecretKey::from_slice(&key_bytes).context("Failed to parse private key")?; 30 + 31 + Ok(SigningKey::from(secret_key)) 32 + } 33 + 34 + /// Save a private key to a file 35 + pub async fn save_private_key(key: &SigningKey, path: &PathBuf) -> Result<()> { 36 + let key_bytes = key.as_nonzero_scalar().to_bytes(); 37 + 38 + // Create parent directory if it doesn't exist 39 + if let Some(parent) = path.parent() { 40 + fs::create_dir_all(parent) 41 + .await 42 + .with_context(|| format!("Failed to create key directory: {:?}", parent))?; 43 + } 44 + 45 + fs::write(path, key_bytes) 46 + .await 47 + .with_context(|| format!("Failed to write private key to {:?}", path))?; 48 + 49 + // Set restrictive permissions on Unix systems 50 + #[cfg(unix)] 51 + { 52 + use std::os::unix::fs::PermissionsExt; 53 + let mut perms = fs::metadata(path).await?.permissions(); 54 + perms.set_mode(0o600); // rw------- 55 + fs::set_permissions(path, perms).await?; 56 + } 57 + 58 + Ok(()) 59 + } 60 + 61 + /// Convert a public key to AT Protocol compatible multibase format 62 + pub fn public_key_to_multibase(public_key: &VerifyingKey) -> Result<String> { 63 + // Get the compressed public key bytes (33 bytes) 64 + let public_key_bytes = public_key.to_encoded_point(true).as_bytes().to_vec(); 65 + 66 + // Encode as multibase with base58btc (z prefix) 67 + let multibase_string = multibase::encode(Base::Base58Btc, &public_key_bytes); 68 + 69 + Ok(multibase_string) 70 + } 71 + 72 + /// Generate a new key pair and save to files 73 + pub async fn generate_key( 74 + name: String, 75 + keys_dir: PathBuf, 76 + force: bool, 77 + format: String, 78 + ) -> Result<()> { 79 + let private_key_path = keys_dir.join(format!("{}.key", name)); 80 + let public_key_path = keys_dir.join(format!("{}.pub", name)); 81 + 82 + // Check if files already exist 83 + if !force && (private_key_path.exists() || public_key_path.exists()) { 84 + anyhow::bail!( 85 + "Key files already exist for '{}'. Use --force to overwrite.\n Private: {:?}\n Public: {:?}", 86 + name, 87 + private_key_path, 88 + public_key_path 89 + ); 90 + } 91 + 92 + println!( 93 + "{} Generating K256 key pair for '{}'...", 94 + "🔐".blue(), 95 + name.bold() 96 + ); 97 + 98 + // Generate new private key 99 + let private_key = generate_private_key(); 100 + let public_key = private_key.verifying_key(); 101 + 102 + // Save private key 103 + save_private_key(&private_key, &private_key_path) 104 + .await 105 + .with_context(|| format!("Failed to save private key to {:?}", private_key_path))?; 106 + 107 + // Generate public key multibase 108 + let public_key_multibase = 109 + public_key_to_multibase(public_key).context("Failed to generate public key multibase")?; 110 + 111 + // Output based on format 112 + match format.as_str() { 113 + "json" => { 114 + let output = json!({ 115 + "keyName": name, 116 + "privateKeyPath": private_key_path, 117 + "publicKeyPath": public_key_path, 118 + "publicKeyMultibase": public_key_multibase, 119 + "publicKeyHex": hex::encode(public_key.to_encoded_point(false).as_bytes()), 120 + }); 121 + println!("{}", serde_json::to_string_pretty(&output)?); 122 + } 123 + "multibase" => { 124 + println!("{}", public_key_multibase); 125 + } 126 + _ => { 127 + // includes "files" 128 + // Save public key multibase to file 129 + fs::write(&public_key_path, &public_key_multibase) 130 + .await 131 + .with_context(|| format!("Failed to write public key to {:?}", public_key_path))?; 132 + 133 + println!("{} Key pair generated successfully!", "✅".green()); 134 + println!(" {} {}", "Name:".bold(), name); 135 + println!(" {} {:?}", "Private key:".bold(), private_key_path); 136 + println!(" {} {:?}", "Public key:".bold(), public_key_path); 137 + println!( 138 + " {} {}", 139 + "Multibase:".bold(), 140 + public_key_multibase.bright_blue() 141 + ); 142 + println!(); 143 + println!("{} Add this to your DID document:", "💡".yellow()); 144 + println!(" \"publicKeyMultibase\": \"{}\"", public_key_multibase); 145 + } 146 + } 147 + 148 + Ok(()) 149 + } 150 + 151 + /// Extract public key from private key file 152 + pub async fn extract_pubkey(private_key_path: PathBuf, format: String) -> Result<()> { 153 + println!( 154 + "{} Extracting public key from {:?}...", 155 + "🔍".blue(), 156 + private_key_path 157 + ); 158 + 159 + let private_key = load_private_key(&private_key_path) 160 + .await 161 + .with_context(|| format!("Failed to load private key from {:?}", private_key_path))?; 162 + 163 + let public_key = private_key.verifying_key(); 164 + 165 + match format.as_str() { 166 + "multibase" => { 167 + let multibase = public_key_to_multibase(public_key)?; 168 + println!("{}", multibase); 169 + } 170 + "hex" => { 171 + let hex = hex::encode(public_key.to_encoded_point(false).as_bytes()); 172 + println!("{}", hex); 173 + } 174 + "compressed-hex" => { 175 + let hex = hex::encode(public_key.to_encoded_point(true).as_bytes()); 176 + println!("{}", hex); 177 + } 178 + "json" => { 179 + let multibase = public_key_to_multibase(public_key)?; 180 + let hex_uncompressed = hex::encode(public_key.to_encoded_point(false).as_bytes()); 181 + let hex_compressed = hex::encode(public_key.to_encoded_point(true).as_bytes()); 182 + 183 + let output = json!({ 184 + "publicKeyMultibase": multibase, 185 + "publicKeyHex": hex_uncompressed, 186 + "publicKeyHexCompressed": hex_compressed, 187 + }); 188 + println!("{}", serde_json::to_string_pretty(&output)?); 189 + } 190 + _ => { 191 + anyhow::bail!( 192 + "Invalid format '{}'. Use: multibase, hex, compressed-hex, or json", 193 + format 194 + ); 195 + } 196 + } 197 + 198 + Ok(()) 199 + } 200 + 201 + /// List available keys in directory 202 + pub async fn list_keys(keys_dir: PathBuf) -> Result<()> { 203 + if !keys_dir.exists() { 204 + println!("{} No keys directory found at {:?}", "ℹ️".blue(), keys_dir); 205 + println!("Run 'teal gen-key' to create your first key."); 206 + return Ok(()); 207 + } 208 + 209 + let mut keys = Vec::new(); 210 + let mut entries = fs::read_dir(&keys_dir).await?; 211 + 212 + while let Some(entry) = entries.next_entry().await? { 213 + let path = entry.path(); 214 + if let Some(extension) = path.extension() { 215 + if extension == "key" { 216 + if let Some(stem) = path.file_stem() { 217 + if let Some(name) = stem.to_str() { 218 + keys.push(name.to_string()); 219 + } 220 + } 221 + } 222 + } 223 + } 224 + 225 + if keys.is_empty() { 226 + println!("{} No keys found in {:?}", "ℹ️".blue(), keys_dir); 227 + println!("Run 'teal gen-key' to create your first key."); 228 + return Ok(()); 229 + } 230 + 231 + keys.sort(); 232 + 233 + println!("{} Available keys in {:?}:", "🔑".blue(), keys_dir); 234 + println!(); 235 + 236 + let keys_count = keys.len(); 237 + 238 + for key_name in keys { 239 + let private_path = keys_dir.join(format!("{}.key", key_name)); 240 + let public_path = keys_dir.join(format!("{}.pub", key_name)); 241 + 242 + let mut status_parts = Vec::new(); 243 + 244 + if private_path.exists() { 245 + status_parts.push("private".green().to_string()); 246 + } 247 + 248 + if public_path.exists() { 249 + status_parts.push("public".cyan().to_string()); 250 + 251 + // Try to read and display the multibase 252 + if let Ok(multibase) = fs::read_to_string(&public_path).await { 253 + let multibase = multibase.trim(); 254 + println!( 255 + " {} {} ({})", 256 + "•".bold(), 257 + key_name.bold(), 258 + status_parts.join(", ") 259 + ); 260 + println!(" {}: {}", "Multibase".dimmed(), multibase.bright_blue()); 261 + } else { 262 + println!( 263 + " {} {} ({})", 264 + "•".bold(), 265 + key_name.bold(), 266 + status_parts.join(", ") 267 + ); 268 + } 269 + } else { 270 + println!( 271 + " {} {} ({})", 272 + "•".bold(), 273 + key_name.bold(), 274 + status_parts.join(", ") 275 + ); 276 + } 277 + 278 + // Show file modification times 279 + if let Ok(metadata) = fs::metadata(&private_path).await { 280 + if let Ok(modified) = metadata.modified() { 281 + let datetime = chrono::DateTime::<chrono::Local>::from(modified); 282 + println!( 283 + " {}: {}", 284 + "Created".dimmed(), 285 + datetime.format("%Y-%m-%d %H:%M:%S").to_string().dimmed() 286 + ); 287 + } 288 + } 289 + println!(); 290 + } 291 + 292 + println!( 293 + "{} Total: {} key(s)", 294 + "📊".blue(), 295 + keys_count.to_string().bold() 296 + ); 297 + 298 + Ok(()) 299 + } 300 + 301 + /// Rotate a key (backup old, generate new) 302 + pub async fn rotate_key( 303 + keys_dir: PathBuf, 304 + name: String, 305 + backup_dir: Option<PathBuf>, 306 + ) -> Result<()> { 307 + let private_key_path = keys_dir.join(format!("{}.key", name)); 308 + 309 + if !private_key_path.exists() { 310 + anyhow::bail!("Key '{}' does not exist in {:?}", name, keys_dir); 311 + } 312 + 313 + println!("{} Rotating key '{}'...", "🔄".blue(), name.bold()); 314 + 315 + // Backup existing key 316 + let backup_location = backup_dir.unwrap_or_else(|| keys_dir.join("backups")); 317 + 318 + fs::create_dir_all(&backup_location).await?; 319 + 320 + let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S"); 321 + let backup_private = backup_location.join(format!("{}_{}.key", name, timestamp)); 322 + let backup_public = backup_location.join(format!("{}_{}.pub", name, timestamp)); 323 + 324 + fs::copy(&private_key_path, &backup_private).await?; 325 + 326 + let public_key_path = keys_dir.join(format!("{}.pub", name)); 327 + if public_key_path.exists() { 328 + fs::copy(&public_key_path, &backup_public).await?; 329 + } 330 + 331 + println!("Backed up existing key to: {:?}", backup_private); 332 + 333 + // Generate new key 334 + let new_key = generate_private_key(); 335 + save_private_key(&new_key, &private_key_path).await?; 336 + 337 + // Save new public key multibase 338 + let public_key = new_key.verifying_key(); 339 + let multibase = public_key_to_multibase(public_key)?; 340 + fs::write(&public_key_path, &multibase).await?; 341 + 342 + println!("{} Key rotation completed!", "✅".green()); 343 + println!(" {} {}", "New multibase:".bold(), multibase.bright_blue()); 344 + println!(); 345 + println!("{} Update your DID document with:", "💡".yellow()); 346 + println!(" \"publicKeyMultibase\": \"{}\"", multibase); 347 + 348 + Ok(()) 349 + }

+102

tools/teal-cli/src/main.rs

··· 1 + use anyhow::Result; 2 + use clap::{Parser, Subcommand}; 3 + 4 + use std::path::PathBuf; 5 + 6 + mod crypto; 7 + 8 + #[derive(Parser)] 9 + #[command(name = "teal")] 10 + #[command(about = "Teal management utilities")] 11 + #[command(version = "0.1.0")] 12 + struct Cli { 13 + #[command(subcommand)] 14 + command: Commands, 15 + } 16 + 17 + #[derive(Subcommand)] 18 + enum Commands { 19 + /// Generate a new K256 key pair 20 + GenKey { 21 + /// Key name/identifier 22 + #[arg(short, long, default_value = "repo")] 23 + name: String, 24 + 25 + /// Output directory (defaults to ~/.teal/keys) 26 + #[arg(short, long)] 27 + output: Option<PathBuf>, 28 + 29 + /// Overwrite existing keys 30 + #[arg(short, long)] 31 + force: bool, 32 + 33 + /// Output format: json, multibase, or files 34 + #[arg(long, default_value = "files")] 35 + format: String, 36 + }, 37 + 38 + /// Extract public key multibase from private key 39 + ExtractPubkey { 40 + /// Path to private key file 41 + #[arg(short, long)] 42 + private_key: PathBuf, 43 + 44 + /// Output format 45 + #[arg(short, long, default_value = "multibase")] 46 + format: String, 47 + }, 48 + 49 + /// List available keys 50 + List { 51 + /// Keys directory (defaults to ~/.teal/keys) 52 + #[arg(short, long)] 53 + directory: Option<PathBuf>, 54 + }, 55 + 56 + /// Rotate keys (generate new, backup old) 57 + Rotate { 58 + /// Key name to rotate 59 + #[arg(short, long)] 60 + name: String, 61 + 62 + /// Backup directory 63 + #[arg(short, long)] 64 + backup_dir: Option<PathBuf>, 65 + }, 66 + } 67 + 68 + fn get_default_keys_dir() -> PathBuf { 69 + dirs::home_dir() 70 + .unwrap_or_else(|| PathBuf::from(".")) 71 + .join(".teal") 72 + .join("keys") 73 + } 74 + 75 + #[tokio::main] 76 + async fn main() -> Result<()> { 77 + let cli = Cli::parse(); 78 + 79 + match cli.command { 80 + Commands::GenKey { 81 + name, 82 + output, 83 + force, 84 + format, 85 + } => { 86 + let keys_dir = output.unwrap_or_else(get_default_keys_dir); 87 + crypto::generate_key(name, keys_dir, force, format).await 88 + } 89 + Commands::ExtractPubkey { 90 + private_key, 91 + format, 92 + } => crypto::extract_pubkey(private_key, format).await, 93 + Commands::List { directory } => { 94 + let keys_dir = directory.unwrap_or_else(get_default_keys_dir); 95 + crypto::list_keys(keys_dir).await 96 + } 97 + Commands::Rotate { name, backup_dir } => { 98 + let keys_dir = get_default_keys_dir(); 99 + crypto::rotate_key(keys_dir, name, backup_dir).await 100 + } 101 + } 102 + }

+13 -1

turbo.json

··· 23 23 }, 24 24 "lex:gen-server": { 25 25 "dependsOn": [], 26 - "outputs": ["./src/types/**"] 26 + "outputs": ["./src/**"] 27 27 }, 28 28 "lex:gen": { 29 29 "dependsOn": [], ··· 43 43 }, 44 44 "db:migrate": { 45 45 "cache": false 46 + }, 47 + "@teal/amethyst#build": { 48 + "dependsOn": ["@teal/lexicons#lex:gen-server"], 49 + "outputs": ["./build/**"] 50 + }, 51 + "@teal/amethyst#build:web": { 52 + "dependsOn": ["@teal/lexicons#lex:gen-server"], 53 + "outputs": ["./build/**"] 54 + }, 55 + "@teal/amethyst#build:ios": { 56 + "dependsOn": ["@teal/lexicons#lex:gen-server"], 57 + "outputs": ["./build/**"] 46 58 } 47 59 } 48 60 }

Compare changes