Your music, beautifully tracked. All yours. (coming soon) teal.fm
teal-fm atproto

Compare changes

Choose any two refs to compare.

Changed files
+9645 -2994
.github
.sqlx
.vscode
apps
lexicons
migrations
packages
scripts
services
tools
lexicon-cli
src
commands
teal-cli
+51
.dockerignore
···
··· 1 + # Rust build artifacts 2 + target/ 3 + **/target/ 4 + services/target/ 5 + apps/*/target/ 6 + 7 + # Node.js dependencies and build artifacts 8 + node_modules/ 9 + **/node_modules/ 10 + .turbo/ 11 + **/.turbo/ 12 + build/ 13 + dist/ 14 + .next/ 15 + 16 + # Development and cache files 17 + .gitignore 18 + **/.DS_Store 19 + *.log 20 + *.tmp 21 + *.temp 22 + 23 + # IDE and editor files 24 + .vscode/ 25 + .idea/ 26 + *.swp 27 + *.swo 28 + *~ 29 + 30 + # Environment and config files 31 + .env 32 + .env.local 33 + .env.*.local 34 + 35 + # Database files 36 + *.db 37 + *.sqlite 38 + *.sqlite3 39 + 40 + # Test coverage 41 + coverage/ 42 + **/coverage/ 43 + 44 + # Temporary files 45 + tmp/ 46 + temp/ 47 + 48 + # SQLx offline query cache 49 + # Include workspace-level cache for monorepo builds 50 + # Uncomment the line below if you want to force online compilation 51 + # .sqlx/
+22
.env.development
···
··· 1 + # Test Database Environment Configuration 2 + # This file provides database credentials for testing discriminant improvements 3 + 4 + # Database Configuration 5 + DB_USER=postgres 6 + DB_PASSWORD=testpass123 7 + DB_NAME=teal_test 8 + 9 + # Docker Database URL (used by services in compose) 10 + DOCKER_DB_URL=postgres://postgres:testpass123@postgres:5432/teal_test 11 + 12 + # Local Database URL (used by migration tools and local testing) 13 + DATABASE_URL=postgres://postgres:testpass123@localhost:5433/teal_test 14 + 15 + # Redis Configuration (if needed) 16 + REDIS_URL=redis://garnet:6379 17 + 18 + # AT Protocol Configuration (placeholder for testing) 19 + AT_PROTOCOL_JWT_SECRET=test-jwt-secret-for-development-only 20 + 21 + # Client Configuration 22 + CLIENT_ADDRESS=localhost
+10 -7
.env.template
··· 2 NODE_ENV=development 3 PORT=3000 4 HOST=0.0.0.0 5 - PUBLIC_URL=A publicly accessible url for aqua 6 DB_USER=postgres 7 DB_PASSWORD=supersecurepassword123987 8 DB_NAME=teal 9 DATABASE_URL="postgresql://${DB_USER}:${DB_PASSWORD}@localhost:5432/${DB_NAME}" 10 DOCKER_DB_URL="postgresql://${DB_USER}:${DB_PASSWORD}@host.docker.internal:5432/${DB_NAME}" 11 - #This is not currently being used fully so can just use this default pubkey for now 12 DID_WEB_PUBKEY=zQ3sheEnMKhEK87PSu4P2mjAevViqHcjKmgxBWsDQPjLRM9wP 13 - CLIENT_ADDRESS=A publicly accessible host for amethyst like amethyst.teal.fm 14 - PUBLIC_DID_WEB=did:web:{aqua's PUBLIC_URL goes here after did:web:} 15 16 - #amethyst 17 - EXPO_PUBLIC_DID_WEB=same as PUBLIC_DID_WEB 18 - EXPO_PUBLIC_BASE_URL=same as CLIENT_ADDRESS but with http scheme like https://amethyst.teal.fm
··· 2 NODE_ENV=development 3 PORT=3000 4 HOST=0.0.0.0 5 + PUBLIC_URL= # A publicly accessible url for aqua 6 DB_USER=postgres 7 DB_PASSWORD=supersecurepassword123987 8 DB_NAME=teal 9 DATABASE_URL="postgresql://${DB_USER}:${DB_PASSWORD}@localhost:5432/${DB_NAME}" 10 DOCKER_DB_URL="postgresql://${DB_USER}:${DB_PASSWORD}@host.docker.internal:5432/${DB_NAME}" 11 + # `cargo run --bin teal gen-key` to generate a new pubkey 12 DID_WEB_PUBKEY=zQ3sheEnMKhEK87PSu4P2mjAevViqHcjKmgxBWsDQPjLRM9wP 13 + CLIENT_ADDRESS= # A publicly accessible host for amethyst like amethyst.teal.fm 14 + PUBLIC_DID_WEB= # did:web:{aqua's PUBLIC_URL goes here after did:web:} 15 + 16 + # amethyst 17 + EXPO_PUBLIC_DID_WEB= # same as PUBLIC_DID_WEB 18 + EXPO_PUBLIC_BASE_URL= # same as CLIENT_ADDRESS but with http scheme like https://amethyst.teal.fm 19 20 + SQLX_OFFLINE=true 21 + SQLX_OFFLINE_DIR="./.sqlx"
+201
.github/WORKFLOWS.md
···
··· 1 + # GitHub Actions Workflows Documentation 2 + 3 + This document describes the CI/CD workflows configured for the Teal project. 4 + 5 + ## Overview 6 + 7 + The project uses GitHub Actions for continuous integration, deployment, and security scanning. The workflows are designed to handle a polyglot codebase with Rust services, Node.js packages, and a React Native application. 8 + 9 + ## Workflows 10 + 11 + ### ๐Ÿ”ง CI (`ci.yml`) 12 + 13 + **Triggers:** Push/PR to `main` or `develop` branches 14 + 15 + **Purpose:** Primary continuous integration workflow that runs tests, linting, and type checking. 16 + 17 + **Jobs:** 18 + - **rust-check**: Formats, lints (clippy), and tests all Rust code in both `services/` and `apps/` 19 + - **node-check**: Type checking, linting, building, and testing Node.js packages 20 + - **lexicon-check**: Validates lexicon files and ensures generated code is up to date 21 + 22 + **Key Features:** 23 + - Caches Rust and Node.js dependencies for faster builds 24 + - Runs in parallel for optimal performance 25 + - Fails fast if any check fails 26 + 27 + ### ๐Ÿš€ Aqua (`aqua.yml`) 28 + 29 + **Triggers:** Push/PR to `main` with changes to `apps/aqua/**` 30 + 31 + **Purpose:** Builds and pushes the Aqua Rust application Docker image. 32 + 33 + **Features:** 34 + - Multi-platform builds (linux/amd64, linux/arm64) 35 + - Pushes to GitHub Container Registry (ghcr.io) 36 + - Only pushes on main branch (not PRs) 37 + - Uses GitHub Actions cache for Docker layers 38 + 39 + ### ๐Ÿค– Cadet (`cadet.yml`) 40 + 41 + **Triggers:** Push/PR to `main` with changes to `services/cadet/**` 42 + 43 + **Purpose:** Builds and pushes the Cadet Rust service Docker image. 44 + 45 + **Features:** 46 + - Multi-platform builds (linux/amd64, linux/arm64) 47 + - Pushes to GitHub Container Registry (ghcr.io) 48 + - Only pushes on main branch (not PRs) 49 + - Uses GitHub Actions cache for Docker layers 50 + 51 + ### ๐Ÿ”ฎ Amethyst (`amethyst.yml`) 52 + 53 + **Triggers:** Push/PR to `main` with changes to `apps/amethyst/**` 54 + 55 + **Purpose:** Builds the React Native/Expo application for different platforms. 56 + 57 + **Jobs:** 58 + - **build-web**: Builds web version and uploads artifacts 59 + - **build-ios**: Builds iOS version (only on main branch pushes, requires macOS runner) 60 + - **lint-and-test**: Type checking and testing 61 + 62 + **Features:** 63 + - Generates lexicons before building 64 + - Platform-specific builds 65 + - Artifact uploads for build assets 66 + 67 + ### ๐Ÿ› ๏ธ Services (`services.yml`) 68 + 69 + **Triggers:** Push/PR to `main` with changes to `services/**` 70 + 71 + **Purpose:** Dynamically detects and builds all services with Dockerfiles. 72 + 73 + **Jobs:** 74 + - **detect-services**: Scans for services with Dockerfiles 75 + - **build-service**: Matrix build for each detected service 76 + - **test-services**: Runs tests for all services 77 + 78 + **Features:** 79 + - Dynamic service detection 80 + - Skips special directories (target, migrations, types, .sqlx) 81 + - Per-service Docker caching 82 + - Multi-platform builds 83 + 84 + ### ๐ŸŽ‰ Release (`release.yml`) 85 + 86 + **Triggers:** 87 + - Push to tags matching `v*` 88 + - Manual workflow dispatch 89 + 90 + **Purpose:** Creates GitHub releases and builds production Docker images. 91 + 92 + **Jobs:** 93 + - **create-release**: Creates GitHub release with changelog 94 + - **build-and-release-aqua**: Builds and tags Aqua for release 95 + - **build-and-release-cadet**: Builds and tags Cadet for release 96 + - **release-other-services**: Builds other services (rocketman, satellite) 97 + - **build-and-release-amethyst**: Builds Amethyst and uploads to release 98 + 99 + **Features:** 100 + - Automatic changelog extraction 101 + - Production Docker tags (latest + version) 102 + - Release artifact uploads 103 + - Support for pre-releases (tags with `-`) 104 + 105 + ### ๐Ÿ”’ Security (`security.yml`) 106 + 107 + **Triggers:** 108 + - Push/PR to `main` or `develop` 109 + - Daily at 2 AM UTC 110 + - Manual dispatch 111 + 112 + **Purpose:** Comprehensive security scanning and vulnerability detection. 113 + 114 + **Jobs:** 115 + - **rust-security-audit**: Uses `cargo audit` for Rust dependencies 116 + - **node-security-audit**: Uses `pnpm audit` for Node.js dependencies 117 + - **codeql-analysis**: GitHub's semantic code analysis 118 + - **docker-security-scan**: Trivy vulnerability scanning for Docker images 119 + - **secrets-scan**: TruffleHog for secrets detection 120 + 121 + **Features:** 122 + - Fails on high/critical vulnerabilities 123 + - SARIF upload for security tab integration 124 + - Historical scanning with git history 125 + 126 + ## Configuration Files 127 + 128 + ### Dependabot (`dependabot.yml`) 129 + 130 + Automated dependency updates for: 131 + - **npm**: Weekly updates for Node.js dependencies 132 + - **cargo**: Weekly updates for Rust dependencies (services + apps) 133 + - **github-actions**: Weekly updates for workflow actions 134 + - **docker**: Weekly updates for Docker base images 135 + 136 + **Schedule:** Monday-Tuesday mornings, staggered to avoid conflicts 137 + 138 + ## Container Registry 139 + 140 + All Docker images are pushed to GitHub Container Registry: 141 + - `ghcr.io/[owner]/[repo]/aqua` 142 + - `ghcr.io/[owner]/[repo]/cadet` 143 + - `ghcr.io/[owner]/[repo]/[service-name]` 144 + 145 + **Tags:** 146 + - `latest`: Latest build from main branch 147 + - `sha-[commit]`: Specific commit builds 148 + - `v[version]`: Release builds 149 + - `pr-[number]`: Pull request builds (for testing) 150 + 151 + ## Secrets and Permissions 152 + 153 + **Required secrets:** 154 + - `GITHUB_TOKEN`: Automatically provided (for registry access and releases) 155 + 156 + **Permissions used:** 157 + - `contents: read`: Read repository contents 158 + - `packages: write`: Push to GitHub Container Registry 159 + - `security-events: write`: Upload security scan results 160 + - `actions: read`: Access workflow information 161 + 162 + ## Best Practices 163 + 164 + 1. **Path-based triggers**: Workflows only run when relevant files change 165 + 2. **Caching**: Aggressive caching for Rust, Node.js, and Docker layers 166 + 3. **Multi-platform**: Docker images built for amd64 and arm64 167 + 4. **Security-first**: Regular vulnerability scanning and secrets detection 168 + 5. **Fail-fast**: Early termination on critical issues 169 + 6. **Artifact preservation**: Build outputs stored for debugging/deployment 170 + 171 + ## Usage Examples 172 + 173 + ### Manual Release 174 + ```bash 175 + # Tag and push for automatic release 176 + git tag v1.0.0 177 + git push origin v1.0.0 178 + 179 + # Or use workflow dispatch in GitHub UI 180 + ``` 181 + 182 + ### Local Development 183 + ```bash 184 + # Run the same checks locally 185 + pnpm rust:fmt 186 + pnpm rust:clippy 187 + pnpm typecheck 188 + pnpm test 189 + ``` 190 + 191 + ### Debugging Failed Builds 192 + 1. Check the Actions tab for detailed logs 193 + 2. Download artifacts from successful builds 194 + 3. Use the same commands locally with cached dependencies 195 + 196 + ## Maintenance 197 + 198 + - **Weekly**: Review Dependabot PRs 199 + - **Monthly**: Update action versions if not auto-updated 200 + - **Quarterly**: Review and update security scanning tools 201 + - **As needed**: Add new services to release workflow matrix
+77
.github/actions/setup/action.yml
···
··· 1 + name: "Setup Teal Environment" 2 + description: "Sets up the common environment for Teal builds including Node.js, Rust, pnpm, and lexicons" 3 + 4 + inputs: 5 + setup-rust: 6 + description: "Whether to setup Rust toolchain" 7 + required: false 8 + default: "false" 9 + rust-components: 10 + description: 'Rust components to install (e.g., "rustfmt,clippy")' 11 + required: false 12 + default: "rustfmt,clippy" 13 + setup-node: 14 + description: "Whether to setup Node.js and pnpm" 15 + required: false 16 + default: "true" 17 + node-version: 18 + description: "Node.js version to use" 19 + required: false 20 + default: "20" 21 + lexicons-only-rust: 22 + description: "Generate only Rust lexicons" 23 + required: false 24 + default: "false" 25 + cache-key-suffix: 26 + description: "Additional suffix for cache keys" 27 + required: false 28 + default: "" 29 + 30 + runs: 31 + using: "composite" 32 + steps: 33 + - name: Setup lexicons 34 + shell: bash 35 + run: ./scripts/setup-lexicons.sh 36 + 37 + - name: Install pnpm 38 + if: inputs.setup-node == 'true' 39 + uses: pnpm/action-setup@v4 40 + 41 + - name: Setup Node.js 42 + if: inputs.setup-node == 'true' 43 + uses: actions/setup-node@v4 44 + with: 45 + node-version: ${{ inputs.node-version }} 46 + cache: "pnpm" 47 + 48 + - name: Install Node dependencies 49 + if: inputs.setup-node == 'true' 50 + shell: bash 51 + run: pnpm install --frozen-lockfile 52 + 53 + - name: Generate lexicons 54 + if: inputs.setup-node == 'true' 55 + shell: bash 56 + run: | 57 + cd tools/lexicon-cli && pnpm i && pnpm build && cd .. 58 + if [ "${{ inputs.lexicons-only-rust }}" = "true" ]; then 59 + pnpm lex:gen --rust-only 60 + else 61 + pnpm lex:gen 62 + fi 63 + 64 + - name: Install Rust toolchain 65 + if: inputs.setup-rust == 'true' 66 + uses: dtolnay/rust-toolchain@stable 67 + with: 68 + components: ${{ inputs.rust-components }} 69 + 70 + - name: Cache Rust dependencies 71 + if: inputs.setup-rust == 'true' 72 + uses: Swatinem/rust-cache@v2 73 + with: 74 + workspaces: | 75 + services 76 + apps/aqua 77 + key: ${{ inputs.cache-key-suffix }}
+91
.github/workflows/amethyst.yml
···
··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: Build Amethyst 4 + 5 + on: 6 + push: 7 + branches: [main] 8 + paths: 9 + - "apps/amethyst/**" 10 + - "packages/**" 11 + - "lexicons/**" 12 + - "package.json" 13 + - "pnpm-lock.yaml" 14 + - ".github/workflows/amethyst.yml" 15 + pull_request: 16 + branches: [main] 17 + paths: 18 + - "apps/amethyst/**" 19 + - "packages/**" 20 + - "lexicons/**" 21 + - "package.json" 22 + - "pnpm-lock.yaml" 23 + - ".github/workflows/amethyst.yml" 24 + 25 + jobs: 26 + build: 27 + name: Build Amethyst 28 + runs-on: ubuntu-latest 29 + outputs: 30 + build-cache-key: ${{ steps.cache-key.outputs.key }} 31 + steps: 32 + - name: Checkout repository 33 + uses: actions/checkout@v4 34 + 35 + - name: Setup environment 36 + uses: ./.github/actions/setup 37 + with: 38 + setup-node: "true" 39 + 40 + - name: Generate cache key 41 + id: cache-key 42 + run: echo "key=amethyst-build-${{ hashFiles('apps/amethyst/**', 'packages/**', 'lexicons/**') }}" >> $GITHUB_OUTPUT 43 + 44 + - name: Build lex tool 45 + run: cd tools/lexicon-cli && pnpm i && pnpm build 46 + 47 + - name: Build web 48 + run: pnpm turbo build:web --filter=@teal/amethyst 49 + 50 + - name: Type check 51 + run: pnpm turbo check-types --filter=@teal/amethyst 52 + 53 + - name: Run tests 54 + run: pnpm turbo test --filter=@teal/amethyst 55 + 56 + - name: Upload web build artifacts 57 + uses: actions/upload-artifact@v4 58 + with: 59 + name: amethyst-web-build 60 + path: apps/amethyst/build/ 61 + retention-days: 7 62 + 63 + build-ios: 64 + name: Build iOS 65 + runs-on: macos-latest 66 + needs: build 67 + if: github.event_name == 'push' && github.ref == 'refs/heads/main' 68 + steps: 69 + - name: Checkout repository 70 + uses: actions/checkout@v4 71 + 72 + - name: Setup environment 73 + uses: ./.github/actions/setup 74 + with: 75 + setup-node: "true" 76 + 77 + - name: Build lex tool 78 + run: cd tools/lexicon-cli && pnpm i && pnpm build 79 + 80 + - name: Setup Expo CLI 81 + run: npm install -g @expo/cli 82 + 83 + - name: Build iOS 84 + run: pnpm turbo build:ios --filter=@teal/amethyst 85 + 86 + - name: Upload iOS build artifacts 87 + uses: actions/upload-artifact@v4 88 + with: 89 + name: amethyst-ios-build 90 + path: apps/amethyst/build/ 91 + retention-days: 7
+70
.github/workflows/aqua.yml
···
··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: Build and Push Aqua 4 + 5 + on: 6 + push: 7 + branches: 8 + - main 9 + paths: 10 + - "apps/aqua/**" 11 + - "Cargo.toml" 12 + - "Cargo.lock" 13 + - ".github/workflows/aqua.yml" 14 + pull_request: 15 + branches: 16 + - main 17 + paths: 18 + - "apps/aqua/**" 19 + - "Cargo.toml" 20 + - "Cargo.lock" 21 + - ".github/workflows/aqua.yml" 22 + 23 + env: 24 + REGISTRY: ghcr.io 25 + IMAGE_NAME: ${{ github.repository }}/aqua 26 + 27 + jobs: 28 + build-and-push: 29 + runs-on: ubuntu-latest 30 + permissions: 31 + contents: read 32 + packages: write 33 + 34 + steps: 35 + - name: Checkout repository 36 + uses: actions/checkout@v4 37 + 38 + - name: Log in to Container Registry 39 + if: github.event_name != 'pull_request' 40 + uses: docker/login-action@v3 41 + with: 42 + registry: ${{ env.REGISTRY }} 43 + username: ${{ github.actor }} 44 + password: ${{ secrets.GITHUB_TOKEN }} 45 + 46 + - name: Extract metadata 47 + id: meta 48 + uses: docker/metadata-action@v5 49 + with: 50 + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 51 + tags: | 52 + type=ref,event=branch 53 + type=ref,event=pr 54 + type=sha,prefix=sha- 55 + type=raw,value=latest,enable={{is_default_branch}} 56 + 57 + - name: Set up Docker Buildx 58 + uses: docker/setup-buildx-action@v3 59 + 60 + - name: Build and push Docker image 61 + uses: docker/build-push-action@v5 62 + with: 63 + context: . 64 + file: ./apps/aqua/Dockerfile 65 + push: ${{ github.event_name != 'pull_request' }} 66 + tags: ${{ steps.meta.outputs.tags }} 67 + labels: ${{ steps.meta.outputs.labels }} 68 + platforms: linux/amd64,linux/arm64 69 + cache-from: type=gha 70 + cache-to: type=gha,mode=max
+68
.github/workflows/cadet.yml
···
··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: Build and Push Cadet 4 + 5 + on: 6 + push: 7 + branches: [main] 8 + paths: 9 + - "services/cadet/**" 10 + - "Cargo.toml" 11 + - "Cargo.lock" 12 + - ".github/workflows/cadet.yml" 13 + pull_request: 14 + branches: [main] 15 + paths: 16 + - "services/cadet/**" 17 + - "Cargo.toml" 18 + - "Cargo.lock" 19 + - ".github/workflows/cadet.yml" 20 + 21 + env: 22 + REGISTRY: ghcr.io 23 + IMAGE_NAME: ${{ github.repository }}/cadet 24 + 25 + jobs: 26 + build-and-push: 27 + runs-on: ubuntu-latest 28 + permissions: 29 + contents: read 30 + packages: write 31 + 32 + steps: 33 + - name: Checkout repository 34 + uses: actions/checkout@v4 35 + 36 + - name: Log in to Container Registry 37 + if: github.event_name != 'pull_request' 38 + uses: docker/login-action@v3 39 + with: 40 + registry: ${{ env.REGISTRY }} 41 + username: ${{ github.actor }} 42 + password: ${{ secrets.GITHUB_TOKEN }} 43 + 44 + - name: Extract metadata 45 + id: meta 46 + uses: docker/metadata-action@v5 47 + with: 48 + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 49 + tags: | 50 + type=ref,event=branch 51 + type=ref,event=pr 52 + type=sha,prefix=sha- 53 + type=raw,value=latest,enable={{is_default_branch}} 54 + 55 + - name: Set up Docker Buildx 56 + uses: docker/setup-buildx-action@v3 57 + 58 + - name: Build and push Docker image 59 + uses: docker/build-push-action@v5 60 + with: 61 + context: . 62 + file: ./services/cadet/Dockerfile 63 + push: ${{ github.event_name != 'pull_request' }} 64 + tags: ${{ steps.meta.outputs.tags }} 65 + labels: ${{ steps.meta.outputs.labels }} 66 + platforms: linux/amd64,linux/arm64 67 + cache-from: type=gha 68 + cache-to: type=gha,mode=max
+217
.github/workflows/ci.yml
···
··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: CI 4 + 5 + on: 6 + push: 7 + branches: [main, develop] 8 + pull_request: 9 + branches: [main, develop] 10 + 11 + env: 12 + CARGO_TERM_COLOR: always 13 + SQLX_OFFLINE: true 14 + SQLX_OFFLINE_DIR: "./.sqlx" 15 + 16 + jobs: 17 + setup-and-build: 18 + name: Setup and Build All 19 + runs-on: ubuntu-latest 20 + outputs: 21 + rust-cache-key: ${{ steps.rust-cache.outputs.cache-hit }} 22 + node-cache-key: ${{ steps.node-cache.outputs.cache-hit }} 23 + steps: 24 + - name: Checkout repository 25 + uses: actions/checkout@v4 26 + 27 + - name: Setup environment 28 + uses: ./.github/actions/setup 29 + with: 30 + setup-rust: "true" 31 + setup-node: "true" 32 + cache-key-suffix: "ci-build" 33 + 34 + - name: Setup SQLx offline files 35 + run: ./scripts/setup-sqlx-offline.sh 36 + 37 + - name: Build Node packages 38 + run: pnpm build 39 + 40 + - name: Build Rust services (x86_64) 41 + run: | 42 + cargo build --release --all-features 43 + 44 + - name: Build Rust apps (x86_64) 45 + run: | 46 + cd apps/aqua 47 + cargo build --release --all-features 48 + 49 + - name: Collect executables (x86_64) 50 + run: | 51 + mkdir -p artifacts/x86_64 52 + # Copy service executables 53 + if [ -d "services/target/release" ]; then 54 + find services/target/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/x86_64/ \; 55 + fi 56 + # Copy app executables 57 + if [ -d "apps/aqua/target/release" ]; then 58 + find apps/aqua/target/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/x86_64/ \; 59 + fi 60 + echo "x86_64 executables:" 61 + ls -la artifacts/x86_64/ || echo "No executables found" 62 + 63 + - name: Upload Node build artifacts 64 + uses: actions/upload-artifact@v4 65 + with: 66 + name: node-builds 67 + path: | 68 + packages/*/dist/ 69 + apps/amethyst/build/ 70 + retention-days: 1 71 + 72 + - name: Upload Rust build artifacts 73 + uses: actions/upload-artifact@v4 74 + with: 75 + name: rust-builds-x86_64 76 + path: | 77 + artifacts/x86_64/ 78 + retention-days: 1 79 + 80 + rust-cross-compile: 81 + name: Cross-compile Rust 82 + runs-on: ubuntu-latest 83 + needs: setup-and-build 84 + strategy: 85 + matrix: 86 + target: [aarch64-unknown-linux-gnu] 87 + steps: 88 + - name: Checkout repository 89 + uses: actions/checkout@v4 90 + 91 + - name: Setup environment 92 + uses: ./.github/actions/setup 93 + with: 94 + setup-rust: "true" 95 + setup-node: "true" 96 + lexicons-only-rust: "true" 97 + cache-key-suffix: "cross-${{ matrix.target }}" 98 + 99 + - name: Setup SQLx offline files 100 + run: ./scripts/setup-sqlx-offline.sh 101 + 102 + - name: Install cross-compilation tools 103 + run: | 104 + cargo install cross --git https://github.com/cross-rs/cross 105 + rustup target add ${{ matrix.target }} 106 + # Set up environment for cross-compilation 107 + echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV 108 + echo "CROSS_NO_WARNINGS=0" >> $GITHUB_ENV 109 + 110 + - name: Cross-compile services 111 + run: | 112 + cross build --release --all-features --target ${{ matrix.target }} 113 + 114 + - name: Collect cross-compiled executables 115 + run: | 116 + mkdir -p artifacts/${{ matrix.target }} 117 + # Copy service executables 118 + if [ -d "services/target/${{ matrix.target }}/release" ]; then 119 + find services/target/${{ matrix.target }}/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/${{ matrix.target }}/ \; 120 + fi 121 + # Copy app executables 122 + if [ -d "apps/aqua/target/${{ matrix.target }}/release" ]; then 123 + find apps/aqua/target/${{ matrix.target }}/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/${{ matrix.target }}/ \; 124 + fi 125 + echo "Cross-compiled executables for ${{ matrix.target }}:" 126 + ls -la artifacts/${{ matrix.target }}/ || echo "No executables found" 127 + 128 + - name: Upload cross-compiled artifacts 129 + uses: actions/upload-artifact@v4 130 + with: 131 + name: rust-builds-${{ matrix.target }} 132 + path: | 133 + artifacts/${{ matrix.target }}/ 134 + retention-days: 1 135 + 136 + # disabled b/c it's triggered on autogenerated content 137 + # and can't find a way around it rn 138 + 139 + # rust-quality: 140 + # name: Rust Quality Checks 141 + # runs-on: ubuntu-latest 142 + # needs: setup-and-build 143 + # steps: 144 + # - name: Checkout repository 145 + # uses: actions/checkout@v4 146 + 147 + # - name: Setup environment 148 + # uses: ./.github/actions/setup 149 + # with: 150 + # setup-rust: "true" 151 + # setup-node: "true" 152 + # lexicons-only-rust: "true" 153 + # cache-key-suffix: "ci-build" 154 + 155 + # - name: Setup SQLx offline files 156 + # run: ./scripts/setup-sqlx-offline.sh 157 + 158 + # # - name: Check Rust formatting 159 + # # run: | 160 + # # cargo fmt --all -- --check 161 + 162 + # - name: Run Clippy 163 + # run: | 164 + # cargo clippy --all-targets --all-features --workspace --exclude types -- -D warnings 165 + 166 + # - name: Run Rust tests 167 + # run: | 168 + # cargo test --all-features 169 + 170 + # node-quality: 171 + # name: Node.js Quality Checks 172 + # runs-on: ubuntu-latest 173 + # needs: setup-and-build 174 + # steps: 175 + # - name: Checkout repository 176 + # uses: actions/checkout@v4 177 + 178 + # - name: Setup environment 179 + # uses: ./.github/actions/setup 180 + # with: 181 + # setup-node: "true" 182 + # cache-key-suffix: "ci-build" 183 + 184 + # - name: Download Node build artifacts 185 + # uses: actions/download-artifact@v4 186 + # with: 187 + # name: node-builds 188 + # path: . 189 + 190 + # # - name: Type check 191 + # # run: pnpm typecheck 192 + 193 + # - name: Lint and format check 194 + # run: pnpm fix --check 195 + 196 + # - name: Run tests 197 + # run: pnpm test 198 + 199 + lexicon-validation: 200 + name: Lexicon Validation 201 + runs-on: ubuntu-latest 202 + steps: 203 + - name: Checkout repository 204 + uses: actions/checkout@v4 205 + 206 + - name: Setup environment 207 + uses: ./.github/actions/setup 208 + with: 209 + setup-node: "true" 210 + 211 + - name: Validate lexicons 212 + run: pnpm lex:validate 213 + 214 + - name: Check lexicon generation consistency 215 + run: | 216 + pnpm lex:gen 217 + git diff --exit-code || (echo "Lexicon files are out of sync. Run 'pnpm lex:gen' locally." && exit 1)
+236
.github/workflows/release.yml
···
··· 1 + # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json 2 + 3 + name: Release 4 + 5 + on: 6 + push: 7 + tags: 8 + - "v*" 9 + workflow_dispatch: 10 + inputs: 11 + tag: 12 + description: "Release tag" 13 + required: true 14 + type: string 15 + 16 + env: 17 + REGISTRY: ghcr.io 18 + CARGO_TERM_COLOR: always 19 + SQLX_OFFLINE: true 20 + 21 + jobs: 22 + create-release: 23 + name: Create Release 24 + runs-on: ubuntu-latest 25 + outputs: 26 + release_id: ${{ steps.create_release.outputs.id }} 27 + upload_url: ${{ steps.create_release.outputs.upload_url }} 28 + tag: ${{ steps.tag.outputs.tag }} 29 + steps: 30 + - name: Checkout repository 31 + uses: actions/checkout@v4 32 + 33 + - name: Get tag name 34 + id: tag 35 + run: | 36 + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then 37 + echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT 38 + else 39 + echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT 40 + fi 41 + 42 + - name: Generate changelog 43 + id: changelog 44 + run: | 45 + if [ -f "CHANGELOG.md" ]; then 46 + # Extract changelog for this version 47 + awk '/^## \[${{ steps.tag.outputs.tag }}\]/{flag=1; next} /^## \[/{flag=0} flag' CHANGELOG.md > release_notes.md 48 + else 49 + echo "Release ${{ steps.tag.outputs.tag }}" > release_notes.md 50 + fi 51 + 52 + - name: Create Release 53 + id: create_release 54 + uses: actions/create-release@v1 55 + env: 56 + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 57 + with: 58 + tag_name: ${{ steps.tag.outputs.tag }} 59 + release_name: Release ${{ steps.tag.outputs.tag }} 60 + body_path: release_notes.md 61 + draft: false 62 + prerelease: ${{ contains(steps.tag.outputs.tag, '-') }} 63 + 64 + build-all: 65 + name: Build All Artifacts 66 + runs-on: ubuntu-latest 67 + needs: create-release 68 + outputs: 69 + rust-artifacts: ${{ steps.upload-rust.outputs.artifact-id }} 70 + node-artifacts: ${{ steps.upload-node.outputs.artifact-id }} 71 + steps: 72 + - name: Checkout repository 73 + uses: actions/checkout@v4 74 + 75 + - name: Setup environment 76 + uses: ./.github/actions/setup 77 + with: 78 + setup-rust: "true" 79 + setup-node: "true" 80 + cache-key-suffix: "release-${{ needs.create-release.outputs.tag }}" 81 + 82 + - name: Install cross-compilation tools 83 + run: | 84 + cargo install cross 85 + rustup target add aarch64-unknown-linux-gnu 86 + 87 + - name: Build Node.js artifacts 88 + run: | 89 + pnpm build 90 + cd apps/amethyst && pnpm build 91 + 92 + - name: Build Rust services (x86_64) 93 + run: | 94 + cd services 95 + cargo build --release --all-features 96 + 97 + - name: Build Rust services (aarch64) 98 + run: | 99 + cd services 100 + cross build --release --all-features --target aarch64-unknown-linux-gnu 101 + 102 + - name: Build Rust apps (x86_64) 103 + run: | 104 + cd apps/aqua 105 + cargo build --release --all-features 106 + 107 + - name: Build Rust apps (aarch64) 108 + run: | 109 + cd apps/aqua 110 + cross build --release --all-features --target aarch64-unknown-linux-gnu 111 + 112 + - name: Create Amethyst build archive 113 + run: | 114 + cd apps/amethyst 115 + tar -czf amethyst-${{ needs.create-release.outputs.tag }}.tar.gz build/ 116 + 117 + - name: Upload Rust build artifacts 118 + id: upload-rust 119 + uses: actions/upload-artifact@v4 120 + with: 121 + name: rust-release-builds 122 + path: | 123 + target/release/ 124 + target/aarch64-unknown-linux-gnu/release/ 125 + apps/aqua/target/release/ 126 + apps/aqua/target/aarch64-unknown-linux-gnu/release/ 127 + retention-days: 7 128 + 129 + - name: Upload Node build artifacts 130 + id: upload-node 131 + uses: actions/upload-artifact@v4 132 + with: 133 + name: node-release-builds 134 + path: | 135 + packages/*/dist/ 136 + apps/amethyst/build/ 137 + apps/amethyst/amethyst-${{ needs.create-release.outputs.tag }}.tar.gz 138 + retention-days: 7 139 + 140 + - name: Upload Amethyst build to release 141 + uses: actions/upload-release-asset@v1 142 + env: 143 + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 144 + with: 145 + upload_url: ${{ needs.create-release.outputs.upload_url }} 146 + asset_path: ./apps/amethyst/amethyst-${{ needs.create-release.outputs.tag }}.tar.gz 147 + asset_name: amethyst-${{ needs.create-release.outputs.tag }}.tar.gz 148 + asset_content_type: application/gzip 149 + 150 + release-services: 151 + name: Release Services 152 + runs-on: ubuntu-latest 153 + needs: [create-release, build-all] 154 + permissions: 155 + contents: read 156 + packages: write 157 + strategy: 158 + matrix: 159 + service: 160 + - name: aqua 161 + dockerfile: apps/aqua/Dockerfile 162 + context: . 163 + - name: cadet 164 + dockerfile: services/cadet/Dockerfile 165 + context: . 166 + - name: rocketman 167 + dockerfile: services/rocketman/Dockerfile 168 + context: . 169 + - name: satellite 170 + dockerfile: services/satellite/Dockerfile 171 + context: . 172 + steps: 173 + - name: Checkout repository 174 + uses: actions/checkout@v4 175 + 176 + - name: Check if service has Dockerfile 177 + id: check 178 + run: | 179 + if [ -f "${{ matrix.service.dockerfile }}" ]; then 180 + echo "has_dockerfile=true" >> $GITHUB_OUTPUT 181 + echo "Service ${{ matrix.service.name }} has Dockerfile" 182 + else 183 + echo "has_dockerfile=false" >> $GITHUB_OUTPUT 184 + echo "Service ${{ matrix.service.name }} does not have Dockerfile, skipping" 185 + fi 186 + 187 + - name: Setup environment 188 + if: steps.check.outputs.has_dockerfile == 'true' 189 + uses: ./.github/actions/setup 190 + with: 191 + setup-node: "true" 192 + lexicons-only-rust: "true" 193 + 194 + - name: Download build artifacts 195 + if: steps.check.outputs.has_dockerfile == 'true' 196 + uses: actions/download-artifact@v4 197 + with: 198 + name: rust-release-builds 199 + path: . 200 + 201 + - name: Log in to Container Registry 202 + if: steps.check.outputs.has_dockerfile == 'true' 203 + uses: docker/login-action@v3 204 + with: 205 + registry: ${{ env.REGISTRY }} 206 + username: ${{ github.actor }} 207 + password: ${{ secrets.GITHUB_TOKEN }} 208 + 209 + - name: Extract metadata 210 + if: steps.check.outputs.has_dockerfile == 'true' 211 + id: meta 212 + uses: docker/metadata-action@v5 213 + with: 214 + images: ${{ env.REGISTRY }}/${{ github.repository }}/${{ matrix.service.name }} 215 + tags: | 216 + type=raw,value=latest 217 + type=raw,value=${{ needs.create-release.outputs.tag }} 218 + 219 + - name: Set up Docker Buildx 220 + if: steps.check.outputs.has_dockerfile == 'true' 221 + uses: docker/setup-buildx-action@v3 222 + 223 + - name: Build and push Docker image 224 + if: steps.check.outputs.has_dockerfile == 'true' 225 + uses: docker/build-push-action@v5 226 + with: 227 + context: ${{ matrix.service.context }} 228 + file: ${{ matrix.service.dockerfile }} 229 + push: true 230 + tags: ${{ steps.meta.outputs.tags }} 231 + labels: ${{ steps.meta.outputs.labels }} 232 + platforms: linux/amd64,linux/arm64 233 + cache-from: type=gha,scope=${{ matrix.service.name }} 234 + cache-to: type=gha,mode=max,scope=${{ matrix.service.name }} 235 + build-args: | 236 + BUILDKIT_INLINE_CACHE=1
+4 -17
.gitignore
··· 55 56 # generated lexicons 57 # js lexicons 58 - */**/lexicons 59 # rust lexicons (types :))) 60 - */**/types 61 62 # vendor directory for submodules 63 !vendor/ ··· 66 vendor/**/dist/ 67 vendor/**/node_modules/ 68 69 - # lexicons directory structure 70 - !lexicons/ 71 - # Track our custom lexicons 72 - !lexicons/fm.teal.alpha/ 73 - !lexicons/fm.teal.alpha/**/*.json 74 - # Track the symlinks to atproto lexicons 75 - !lexicons/app 76 - !lexicons/chat 77 - !lexicons/com 78 - !lexicons/tools 79 - # But ignore any generated files within lexicons 80 - lexicons/**/*.js 81 - lexicons/**/*.d.ts 82 - lexicons/**/dist/ 83 - lexicons/**/node_modules/
··· 55 56 # generated lexicons 57 # js lexicons 58 + packages/lexicons/src 59 # rust lexicons (types :))) 60 + services/types/src 61 62 # vendor directory for submodules 63 !vendor/ ··· 66 vendor/**/dist/ 67 vendor/**/node_modules/ 68 69 + # claude 70 + .claude
+126
.pre-commit-config.yaml
···
··· 1 + # Pre-commit configuration for Teal project 2 + # Install with: pip install pre-commit && pre-commit install 3 + # Run manually with: pre-commit run --all-files 4 + 5 + repos: 6 + # General file checks 7 + - repo: https://github.com/pre-commit/pre-commit-hooks 8 + rev: v4.6.0 9 + hooks: 10 + - id: trailing-whitespace 11 + - id: end-of-file-fixer 12 + - id: check-yaml 13 + - id: check-json 14 + - id: check-toml 15 + - id: check-merge-conflict 16 + - id: check-added-large-files 17 + args: ["--maxkb=500"] 18 + - id: mixed-line-ending 19 + args: ["--fix=lf"] 20 + 21 + # TypeScript/JavaScript formatting and linting 22 + - repo: local 23 + hooks: 24 + - id: prettier 25 + name: Prettier 26 + entry: pnpm prettier --write 27 + language: system 28 + files: \.(ts|tsx|js|jsx|json|md|yaml|yml)$ 29 + pass_filenames: true 30 + 31 + - id: biome-check 32 + name: Biome Check 33 + entry: pnpm biome check --apply 34 + language: system 35 + files: \.(ts|tsx|js|jsx)$ 36 + pass_filenames: false 37 + 38 + # TypeScript check temporarily disabled due to vendor compilation issues 39 + # - id: typescript-check 40 + # name: TypeScript Check 41 + # entry: pnpm typecheck 42 + # language: system 43 + # files: \.(ts|tsx)$ 44 + # pass_filenames: false 45 + 46 + # Rust formatting and linting 47 + - repo: local 48 + hooks: 49 + - id: cargo-fmt-services 50 + name: Cargo Format (Services Workspace) 51 + entry: bash -c 'cd services && cargo fmt' 52 + language: system 53 + files: services/.*\.rs$ 54 + pass_filenames: false 55 + 56 + - id: cargo-clippy-services 57 + name: Cargo Clippy (Services Workspace) 58 + entry: bash -c 'cd services && cargo clippy -- -D warnings' 59 + language: system 60 + files: services/.*\.rs$ 61 + pass_filenames: false 62 + 63 + - id: cargo-fmt-apps 64 + name: Cargo Format (Apps) 65 + entry: bash -c 'for dir in apps/*/; do if [ -f "$dir/Cargo.toml" ]; then cd "$dir" && cargo fmt && cd ../..; fi; done' 66 + language: system 67 + files: apps/.*\.rs$ 68 + pass_filenames: false 69 + 70 + - id: cargo-clippy-apps 71 + name: Cargo Clippy (Apps) 72 + entry: bash -c 'for dir in apps/*/; do if [ -f "$dir/Cargo.toml" ]; then cd "$dir" && cargo clippy -- -D warnings && cd ../..; fi; done' 73 + language: system 74 + files: apps/.*\.rs$ 75 + pass_filenames: false 76 + 77 + # Lexicon validation and generation 78 + - repo: local 79 + hooks: 80 + - id: lexicon-validate 81 + name: Validate Lexicons 82 + entry: pnpm lex:validate 83 + language: system 84 + files: lexicons/.*\.json$ 85 + pass_filenames: false 86 + 87 + - id: lexicon-generate 88 + name: Generate Lexicons (files ignored by .gitignore) 89 + entry: pnpm lex:gen-server 90 + language: system 91 + files: lexicons/.*\.json$ 92 + pass_filenames: false 93 + always_run: false 94 + 95 + # Optional: Additional checks 96 + - repo: local 97 + hooks: 98 + - id: no-console-log 99 + name: Check for console.log 100 + entry: bash -c 'if grep -r "console\.log" --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" .; then echo "Found console.log statements. Please remove them."; exit 1; fi' 101 + language: system 102 + files: \.(ts|tsx|js|jsx)$ 103 + pass_filenames: false 104 + 105 + - id: check-todos 106 + name: Check for TODO/FIXME 107 + entry: bash -c 'if grep -r -i "TODO\|FIXME" --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" --include="*.rs" .; then echo "Found TODO/FIXME comments. Consider addressing them."; fi' 108 + language: system 109 + files: \.(ts|tsx|js|jsx|rs)$ 110 + pass_filenames: false 111 + verbose: true 112 + 113 + # Global settings 114 + default_language_version: 115 + node: system 116 + python: python3 117 + 118 + # Skip certain hooks for specific file patterns 119 + exclude: | 120 + (?x)^( 121 + vendor/.*| 122 + node_modules/.*| 123 + target/.*| 124 + .git/.*| 125 + .*\.lock$ 126 + )$
+46
.sqlx/query-00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n ae1.name as synthetic_name,\n ae2.name as target_name,\n similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score,\n COUNT(ptae1.play_uri) as synthetic_plays,\n COUNT(ptae2.play_uri) as target_plays\n FROM artists_extended ae1\n CROSS JOIN artists_extended ae2\n LEFT JOIN play_to_artists_extended ptae1 ON ae1.id = ptae1.artist_id\n LEFT JOIN play_to_artists_extended ptae2 ON ae2.id = ptae2.artist_id\n WHERE ae1.id != ae2.id\n AND ae1.mbid_type = 'synthetic'\n AND ae2.mbid_type = 'musicbrainz'\n AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1\n GROUP BY ae1.id, ae1.name, ae2.id, ae2.name, similarity_score\n ORDER BY similarity_score DESC\n LIMIT 10\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "synthetic_name", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "target_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "similarity_score", 19 + "type_info": "Float4" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "synthetic_plays", 24 + "type_info": "Int8" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "target_plays", 29 + "type_info": "Int8" 30 + } 31 + ], 32 + "parameters": { 33 + "Left": [ 34 + "Float4" 35 + ] 36 + }, 37 + "nullable": [ 38 + false, 39 + false, 40 + null, 41 + null, 42 + null 43 + ] 44 + }, 45 + "hash": "00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75" 46 + }
+12
.sqlx/query-0d7c3ef80c20dac6efd0fe3c430d7f41b1c90368ff99ce8a09f66bca63864d1e.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "REFRESH MATERIALIZED VIEW mv_release_play_counts;", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [] 8 + }, 9 + "nullable": [] 10 + }, 11 + "hash": "0d7c3ef80c20dac6efd0fe3c430d7f41b1c90368ff99ce8a09f66bca63864d1e" 12 + }
+35
.sqlx/query-0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n pta.artist_mbid as mbid,\n pta.artist_name as name,\n COUNT(*) as play_count\n FROM plays p\n INNER JOIN play_to_artists pta ON p.uri = pta.play_uri\n WHERE p.did = $1\n AND pta.artist_mbid IS NOT NULL\n AND pta.artist_name IS NOT NULL\n GROUP BY pta.artist_mbid, pta.artist_name\n ORDER BY play_count DESC\n LIMIT $2\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "play_count", 19 + "type_info": "Int8" 20 + } 21 + ], 22 + "parameters": { 23 + "Left": [ 24 + "Text", 25 + "Int8" 26 + ] 27 + }, 28 + "nullable": [ 29 + false, 30 + true, 31 + null 32 + ] 33 + }, 34 + "hash": "0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273" 35 + }
+14
.sqlx/query-0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM artists_extended WHERE id = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Int4" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec" 14 + }
+112
.sqlx/query-0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays\n LEFT JOIN play_to_artists as pta ON uri = pta.play_uri\n WHERE did = ANY($1)\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time desc\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "uri", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "did", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "rkey", 19 + "type_info": "Text" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "cid", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "isrc", 29 + "type_info": "Text" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "duration", 34 + "type_info": "Int4" 35 + }, 36 + { 37 + "ordinal": 6, 38 + "name": "track_name", 39 + "type_info": "Text" 40 + }, 41 + { 42 + "ordinal": 7, 43 + "name": "played_time", 44 + "type_info": "Timestamptz" 45 + }, 46 + { 47 + "ordinal": 8, 48 + "name": "processed_time", 49 + "type_info": "Timestamptz" 50 + }, 51 + { 52 + "ordinal": 9, 53 + "name": "release_mbid", 54 + "type_info": "Uuid" 55 + }, 56 + { 57 + "ordinal": 10, 58 + "name": "release_name", 59 + "type_info": "Text" 60 + }, 61 + { 62 + "ordinal": 11, 63 + "name": "recording_mbid", 64 + "type_info": "Uuid" 65 + }, 66 + { 67 + "ordinal": 12, 68 + "name": "submission_client_agent", 69 + "type_info": "Text" 70 + }, 71 + { 72 + "ordinal": 13, 73 + "name": "music_service_base_domain", 74 + "type_info": "Text" 75 + }, 76 + { 77 + "ordinal": 14, 78 + "name": "origin_url", 79 + "type_info": "Text" 80 + }, 81 + { 82 + "ordinal": 15, 83 + "name": "artists", 84 + "type_info": "Json" 85 + } 86 + ], 87 + "parameters": { 88 + "Left": [ 89 + "TextArray" 90 + ] 91 + }, 92 + "nullable": [ 93 + false, 94 + false, 95 + false, 96 + false, 97 + true, 98 + true, 99 + false, 100 + true, 101 + true, 102 + true, 103 + true, 104 + true, 105 + true, 106 + true, 107 + true, 108 + null 109 + ] 110 + }, 111 + "hash": "0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1" 112 + }
+22
.sqlx/query-193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT extract_discriminant($1)", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "extract_discriminant", 9 + "type_info": "Text" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Text" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7" 22 + }
+14
.sqlx/query-1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM releases WHERE mbid = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Uuid" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c" 14 + }
+14
.sqlx/query-1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM play_to_artists WHERE play_uri = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363" 14 + }
+22
.sqlx/query-28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT extract_edition_discriminant($1)", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "extract_edition_discriminant", 9 + "type_info": "Text" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Text" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e" 22 + }
+52
.sqlx/query-2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n r1.mbid as release1_mbid,\n r1.name as release1_name,\n r2.mbid as release2_mbid,\n r2.name as release2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists\n FROM releases r1\n CROSS JOIN releases r2\n INNER JOIN plays p1 ON p1.release_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.release_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id -- Same artist\n AND (\n (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR\n (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))\n ) -- Same or no discriminants\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist\n ORDER BY similarity_score DESC, shared_artists DESC\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "release1_mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "release1_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "release2_mbid", 19 + "type_info": "Uuid" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "release2_name", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "similarity_score", 29 + "type_info": "Float4" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "shared_artists", 34 + "type_info": "Int8" 35 + } 36 + ], 37 + "parameters": { 38 + "Left": [ 39 + "Float4" 40 + ] 41 + }, 42 + "nullable": [ 43 + false, 44 + false, 45 + false, 46 + false, 47 + null, 48 + null 49 + ] 50 + }, 51 + "hash": "2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149" 52 + }
+14
.sqlx/query-2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM play_to_artists_extended WHERE artist_id = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Int4" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a" 14 + }
+12
.sqlx/query-3d84a9e1ed05846bc931eea9b90fd88cae8b636968af4bd2f9b1a9927d15379d.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "REFRESH MATERIALIZED VIEW mv_global_play_count;", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [] 8 + }, 9 + "nullable": [] 10 + }, 11 + "hash": "3d84a9e1ed05846bc931eea9b90fd88cae8b636968af4bd2f9b1a9927d15379d" 12 + }
+22
.sqlx/query-413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT COUNT(*) FROM plays WHERE recording_mbid = $1", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "count", 9 + "type_info": "Int8" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0" 22 + }
+14
.sqlx/query-5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n DELETE FROM profiles WHERE did = $1\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1" 14 + }
+112
.sqlx/query-651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays\n LEFT JOIN play_to_artists as pta ON uri = pta.play_uri\n WHERE uri = $1\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time desc\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "uri", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "did", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "rkey", 19 + "type_info": "Text" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "cid", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "isrc", 29 + "type_info": "Text" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "duration", 34 + "type_info": "Int4" 35 + }, 36 + { 37 + "ordinal": 6, 38 + "name": "track_name", 39 + "type_info": "Text" 40 + }, 41 + { 42 + "ordinal": 7, 43 + "name": "played_time", 44 + "type_info": "Timestamptz" 45 + }, 46 + { 47 + "ordinal": 8, 48 + "name": "processed_time", 49 + "type_info": "Timestamptz" 50 + }, 51 + { 52 + "ordinal": 9, 53 + "name": "release_mbid", 54 + "type_info": "Uuid" 55 + }, 56 + { 57 + "ordinal": 10, 58 + "name": "release_name", 59 + "type_info": "Text" 60 + }, 61 + { 62 + "ordinal": 11, 63 + "name": "recording_mbid", 64 + "type_info": "Uuid" 65 + }, 66 + { 67 + "ordinal": 12, 68 + "name": "submission_client_agent", 69 + "type_info": "Text" 70 + }, 71 + { 72 + "ordinal": 13, 73 + "name": "music_service_base_domain", 74 + "type_info": "Text" 75 + }, 76 + { 77 + "ordinal": 14, 78 + "name": "origin_url", 79 + "type_info": "Text" 80 + }, 81 + { 82 + "ordinal": 15, 83 + "name": "artists", 84 + "type_info": "Json" 85 + } 86 + ], 87 + "parameters": { 88 + "Left": [ 89 + "Text" 90 + ] 91 + }, 92 + "nullable": [ 93 + false, 94 + false, 95 + false, 96 + false, 97 + true, 98 + true, 99 + false, 100 + true, 101 + true, 102 + true, 103 + true, 104 + true, 105 + true, 106 + true, 107 + true, 108 + null 109 + ] 110 + }, 111 + "hash": "651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0" 112 + }
+16
.sqlx/query-6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n UPDATE play_to_artists_extended\n SET artist_id = $1, artist_name = $2\n WHERE artist_id = $3\n AND NOT EXISTS (\n SELECT 1 FROM play_to_artists_extended existing\n WHERE existing.play_uri = play_to_artists_extended.play_uri\n AND existing.artist_id = $1\n )\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Int4", 9 + "Text", 10 + "Int4" 11 + ] 12 + }, 13 + "nullable": [] 14 + }, 15 + "hash": "6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48" 16 + }
+52
.sqlx/query-6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n ae1.id as synthetic_id,\n ae1.name as synthetic_name,\n ae2.id as target_id,\n ae2.name as target_name,\n ae2.mbid as target_mbid,\n similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score\n FROM artists_extended ae1\n CROSS JOIN artists_extended ae2\n WHERE ae1.id != ae2.id\n AND ae1.mbid_type = 'synthetic'\n AND ae2.mbid_type = 'musicbrainz'\n AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1\n ORDER BY similarity_score DESC\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "synthetic_id", 9 + "type_info": "Int4" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "synthetic_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "target_id", 19 + "type_info": "Int4" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "target_name", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "target_mbid", 29 + "type_info": "Uuid" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "similarity_score", 34 + "type_info": "Float4" 35 + } 36 + ], 37 + "parameters": { 38 + "Left": [ 39 + "Float4" 40 + ] 41 + }, 42 + "nullable": [ 43 + false, 44 + false, 45 + false, 46 + false, 47 + true, 48 + null 49 + ] 50 + }, 51 + "hash": "6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56" 52 + }
+23
.sqlx/query-76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'musicbrainz')\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n updated_at = NOW()\n RETURNING id;\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "id", 9 + "type_info": "Int4" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid", 15 + "Text" 16 + ] 17 + }, 18 + "nullable": [ 19 + false 20 + ] 21 + }, 22 + "hash": "76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0" 23 + }
+29
.sqlx/query-7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO plays (\n uri, cid, did, rkey, isrc, duration, track_name, played_time,\n processed_time, release_mbid, release_name, recording_mbid,\n submission_client_agent, music_service_base_domain, artist_names_raw,\n track_discriminant, release_discriminant\n ) VALUES (\n $1, $2, $3, $4, $5, $6, $7, $8,\n NOW(), $9, $10, $11, $12, $13, $14, $15, $16\n ) ON CONFLICT(uri) DO UPDATE SET\n isrc = EXCLUDED.isrc,\n duration = EXCLUDED.duration,\n track_name = EXCLUDED.track_name,\n played_time = EXCLUDED.played_time,\n processed_time = EXCLUDED.processed_time,\n release_mbid = EXCLUDED.release_mbid,\n release_name = EXCLUDED.release_name,\n recording_mbid = EXCLUDED.recording_mbid,\n submission_client_agent = EXCLUDED.submission_client_agent,\n music_service_base_domain = EXCLUDED.music_service_base_domain,\n artist_names_raw = EXCLUDED.artist_names_raw,\n track_discriminant = EXCLUDED.track_discriminant,\n release_discriminant = EXCLUDED.release_discriminant;\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Text", 10 + "Text", 11 + "Text", 12 + "Text", 13 + "Int4", 14 + "Text", 15 + "Timestamptz", 16 + "Uuid", 17 + "Text", 18 + "Uuid", 19 + "Text", 20 + "Text", 21 + "Jsonb", 22 + "Text", 23 + "Text" 24 + ] 25 + }, 26 + "nullable": [] 27 + }, 28 + "hash": "7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85" 29 + }
+16
.sqlx/query-7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "UPDATE plays SET release_mbid = $1, release_name = $2 WHERE release_mbid = $3", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Uuid", 9 + "Text", 10 + "Uuid" 11 + ] 12 + }, 13 + "nullable": [] 14 + }, 15 + "hash": "7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10" 16 + }
+18
.sqlx/query-8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO statii (uri, did, rkey, cid, record)\n VALUES ($1, $2, $3, $4, $5)\n ON CONFLICT (uri) DO UPDATE SET\n cid = EXCLUDED.cid,\n record = EXCLUDED.record,\n indexed_at = NOW();\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Text", 10 + "Text", 11 + "Text", 12 + "Jsonb" 13 + ] 14 + }, 15 + "nullable": [] 16 + }, 17 + "hash": "8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946" 18 + }
+34
.sqlx/query-97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n p.release_mbid as mbid,\n p.release_name as name,\n COUNT(*) as play_count\n FROM plays p\n WHERE p.release_mbid IS NOT NULL\n AND p.release_name IS NOT NULL\n GROUP BY p.release_mbid, p.release_name\n ORDER BY play_count DESC\n LIMIT $1\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "play_count", 19 + "type_info": "Int8" 20 + } 21 + ], 22 + "parameters": { 23 + "Left": [ 24 + "Int8" 25 + ] 26 + }, 27 + "nullable": [ 28 + true, 29 + true, 30 + null 31 + ] 32 + }, 33 + "hash": "97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59" 34 + }
+12
.sqlx/query-9af33e4329198dee7814519573b63858eaf69f08ad2959d96ffee5c8387af0ba.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "REFRESH MATERIALIZED VIEW mv_artist_play_counts;", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [] 8 + }, 9 + "nullable": [] 10 + }, 11 + "hash": "9af33e4329198dee7814519573b63858eaf69f08ad2959d96ffee5c8387af0ba" 12 + }
+16
.sqlx/query-9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) VALUES\n ($1, $2, $3)\n ON CONFLICT (play_uri, artist_id) DO NOTHING;\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Int4", 10 + "Text" 11 + ] 12 + }, 13 + "nullable": [] 14 + }, 15 + "hash": "9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3" 16 + }
+24
.sqlx/query-9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO recordings (mbid, name, discriminant) VALUES ($1, $2, $3)\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n discriminant = COALESCE(EXCLUDED.discriminant, recordings.discriminant)\n RETURNING mbid;\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid", 15 + "Text", 16 + "Text" 17 + ] 18 + }, 19 + "nullable": [ 20 + false 21 + ] 22 + }, 23 + "hash": "9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81" 24 + }
+14
.sqlx/query-9d4e872755f90087f64f116d8fee340218e09b40ab8f94b5d9d17b9c39bf3d4f.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM plays WHERE uri = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "9d4e872755f90087f64f116d8fee340218e09b40ab8f94b5d9d17b9c39bf3d4f" 14 + }
+22
.sqlx/query-ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT generate_synthetic_mbid($1)", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "generate_synthetic_mbid", 9 + "type_info": "Uuid" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Text" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd" 22 + }
+35
.sqlx/query-af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n p.release_mbid as mbid,\n p.release_name as name,\n COUNT(*) as play_count\n FROM plays p\n WHERE p.did = $1\n AND p.release_mbid IS NOT NULL\n AND p.release_name IS NOT NULL\n GROUP BY p.release_mbid, p.release_name\n ORDER BY play_count DESC\n LIMIT $2\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "play_count", 19 + "type_info": "Int8" 20 + } 21 + ], 22 + "parameters": { 23 + "Left": [ 24 + "Text", 25 + "Int8" 26 + ] 27 + }, 28 + "nullable": [ 29 + true, 30 + true, 31 + null 32 + ] 33 + }, 34 + "hash": "af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3" 35 + }
+46
.sqlx/query-b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n r1.name as recording1_name,\n r2.name as recording2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists,\n STRING_AGG(DISTINCT ae.name, ', ') as artist_names\n FROM recordings r1\n CROSS JOIN recordings r2\n INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0\n ORDER BY similarity_score DESC\n LIMIT 5\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "recording1_name", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "recording2_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "similarity_score", 19 + "type_info": "Float4" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "shared_artists", 24 + "type_info": "Int8" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "artist_names", 29 + "type_info": "Text" 30 + } 31 + ], 32 + "parameters": { 33 + "Left": [ 34 + "Float4" 35 + ] 36 + }, 37 + "nullable": [ 38 + false, 39 + false, 40 + null, 41 + null, 42 + null 43 + ] 44 + }, 45 + "hash": "b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453" 46 + }
+15
.sqlx/query-b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "UPDATE artists_extended SET name = $1, updated_at = NOW() WHERE id = $2", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Int4" 10 + ] 11 + }, 12 + "nullable": [] 13 + }, 14 + "hash": "b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56" 15 + }
+65
.sqlx/query-b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT\n p.avatar,\n p.banner,\n p.created_at,\n p.description,\n p.description_facets,\n p.did,\n p.display_name,\n s.record as status\n FROM profiles p\n LEFT JOIN statii s ON p.did = s.did AND s.rkey = 'self'\n WHERE (p.did = ANY($1))\n OR (p.handle = ANY($2))", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "avatar", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "banner", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "created_at", 19 + "type_info": "Timestamptz" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "description", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "description_facets", 29 + "type_info": "Jsonb" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "did", 34 + "type_info": "Text" 35 + }, 36 + { 37 + "ordinal": 6, 38 + "name": "display_name", 39 + "type_info": "Text" 40 + }, 41 + { 42 + "ordinal": 7, 43 + "name": "status", 44 + "type_info": "Jsonb" 45 + } 46 + ], 47 + "parameters": { 48 + "Left": [ 49 + "TextArray", 50 + "TextArray" 51 + ] 52 + }, 53 + "nullable": [ 54 + true, 55 + true, 56 + true, 57 + true, 58 + true, 59 + false, 60 + true, 61 + true 62 + ] 63 + }, 64 + "hash": "b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837" 65 + }
+34
.sqlx/query-b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n pta.artist_mbid as mbid,\n pta.artist_name as name,\n COUNT(*) as play_count\n FROM plays p\n INNER JOIN play_to_artists pta ON p.uri = pta.play_uri\n WHERE pta.artist_mbid IS NOT NULL\n AND pta.artist_name IS NOT NULL\n GROUP BY pta.artist_mbid, pta.artist_name\n ORDER BY play_count DESC\n LIMIT $1\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "play_count", 19 + "type_info": "Int8" 20 + } 21 + ], 22 + "parameters": { 23 + "Left": [ 24 + "Int8" 25 + ] 26 + }, 27 + "nullable": [ 28 + false, 29 + true, 30 + null 31 + ] 32 + }, 33 + "hash": "b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e" 34 + }
+21
.sqlx/query-b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO profiles (did, handle, display_name, description, description_facets, avatar, banner, created_at)\n VALUES ($1, $2, $3, $4, $5, $6, $7, $8)\n ON CONFLICT (did) DO UPDATE SET\n display_name = EXCLUDED.display_name,\n description = EXCLUDED.description,\n description_facets = EXCLUDED.description_facets,\n avatar = EXCLUDED.avatar,\n banner = EXCLUDED.banner,\n created_at = EXCLUDED.created_at;\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text", 9 + "Text", 10 + "Text", 11 + "Text", 12 + "Jsonb", 13 + "Text", 14 + "Text", 15 + "Timestamptz" 16 + ] 17 + }, 18 + "nullable": [] 19 + }, 20 + "hash": "b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded" 21 + }
+22
.sqlx/query-bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "SELECT COUNT(*) FROM plays WHERE release_mbid = $1", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "count", 9 + "type_info": "Int8" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid" 15 + ] 16 + }, 17 + "nullable": [ 18 + null 19 + ] 20 + }, 21 + "hash": "bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224" 22 + }
+12
.sqlx/query-bf9c6d3bf0f9594ae1c02dc85c9887b747aaa5f0c3e67d9381c3867c4f67ae6d.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "REFRESH MATERIALIZED VIEW mv_recording_play_counts;", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [] 8 + }, 9 + "nullable": [] 10 + }, 11 + "hash": "bf9c6d3bf0f9594ae1c02dc85c9887b747aaa5f0c3e67d9381c3867c4f67ae6d" 12 + }
+46
.sqlx/query-cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n r1.name as release1_name,\n r2.name as release2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists,\n STRING_AGG(DISTINCT ae.name, ', ') as artist_names\n FROM releases r1\n CROSS JOIN releases r2\n INNER JOIN plays p1 ON p1.release_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.release_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0\n ORDER BY similarity_score DESC\n LIMIT 5\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "release1_name", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "release2_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "similarity_score", 19 + "type_info": "Float4" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "shared_artists", 24 + "type_info": "Int8" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "artist_names", 29 + "type_info": "Text" 30 + } 31 + ], 32 + "parameters": { 33 + "Left": [ 34 + "Float4" 35 + ] 36 + }, 37 + "nullable": [ 38 + false, 39 + false, 40 + null, 41 + null, 42 + null 43 + ] 44 + }, 45 + "hash": "cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c" 46 + }
+15
.sqlx/query-cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "UPDATE plays SET recording_mbid = $1 WHERE recording_mbid = $2", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Uuid", 9 + "Uuid" 10 + ] 11 + }, 12 + "nullable": [] 13 + }, 14 + "hash": "cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2" 15 + }
+14
.sqlx/query-d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "DELETE FROM recordings WHERE mbid = $1", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Uuid" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45" 14 + }
+14
.sqlx/query-d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n DELETE FROM statii WHERE uri = $1\n ", 4 + "describe": { 5 + "columns": [], 6 + "parameters": { 7 + "Left": [ 8 + "Text" 9 + ] 10 + }, 11 + "nullable": [] 12 + }, 13 + "hash": "d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961" 14 + }
+112
.sqlx/query-f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays p\n LEFT JOIN play_to_artists as pta ON p.uri = pta.play_uri\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time DESC\n LIMIT $1\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "uri", 9 + "type_info": "Text" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "did", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "rkey", 19 + "type_info": "Text" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "cid", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "isrc", 29 + "type_info": "Text" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "duration", 34 + "type_info": "Int4" 35 + }, 36 + { 37 + "ordinal": 6, 38 + "name": "track_name", 39 + "type_info": "Text" 40 + }, 41 + { 42 + "ordinal": 7, 43 + "name": "played_time", 44 + "type_info": "Timestamptz" 45 + }, 46 + { 47 + "ordinal": 8, 48 + "name": "processed_time", 49 + "type_info": "Timestamptz" 50 + }, 51 + { 52 + "ordinal": 9, 53 + "name": "release_mbid", 54 + "type_info": "Uuid" 55 + }, 56 + { 57 + "ordinal": 10, 58 + "name": "release_name", 59 + "type_info": "Text" 60 + }, 61 + { 62 + "ordinal": 11, 63 + "name": "recording_mbid", 64 + "type_info": "Uuid" 65 + }, 66 + { 67 + "ordinal": 12, 68 + "name": "submission_client_agent", 69 + "type_info": "Text" 70 + }, 71 + { 72 + "ordinal": 13, 73 + "name": "music_service_base_domain", 74 + "type_info": "Text" 75 + }, 76 + { 77 + "ordinal": 14, 78 + "name": "origin_url", 79 + "type_info": "Text" 80 + }, 81 + { 82 + "ordinal": 15, 83 + "name": "artists", 84 + "type_info": "Json" 85 + } 86 + ], 87 + "parameters": { 88 + "Left": [ 89 + "Int8" 90 + ] 91 + }, 92 + "nullable": [ 93 + false, 94 + false, 95 + false, 96 + false, 97 + true, 98 + true, 99 + false, 100 + true, 101 + true, 102 + true, 103 + true, 104 + true, 105 + true, 106 + true, 107 + true, 108 + null 109 + ] 110 + }, 111 + "hash": "f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0" 112 + }
+23
.sqlx/query-f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'synthetic')\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n updated_at = NOW()\n RETURNING id;\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "id", 9 + "type_info": "Int4" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid", 15 + "Text" 16 + ] 17 + }, 18 + "nullable": [ 19 + false 20 + ] 21 + }, 22 + "hash": "f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734" 23 + }
+24
.sqlx/query-f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n INSERT INTO releases (mbid, name, discriminant) VALUES ($1, $2, $3)\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n discriminant = COALESCE(EXCLUDED.discriminant, releases.discriminant)\n RETURNING mbid;\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "mbid", 9 + "type_info": "Uuid" 10 + } 11 + ], 12 + "parameters": { 13 + "Left": [ 14 + "Uuid", 15 + "Text", 16 + "Text" 17 + ] 18 + }, 19 + "nullable": [ 20 + false 21 + ] 22 + }, 23 + "hash": "f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672" 24 + }
+28
.sqlx/query-fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT\n ae.id,\n ae.name\n FROM artists_extended ae\n WHERE ae.mbid_type = 'musicbrainz'\n AND (\n LOWER(TRIM(ae.name)) = $1\n OR LOWER(TRIM(ae.name)) LIKE '%' || $1 || '%'\n OR $1 LIKE '%' || LOWER(TRIM(ae.name)) || '%'\n OR similarity(LOWER(TRIM(ae.name)), $1) > 0.6\n )\n ORDER BY similarity(LOWER(TRIM(ae.name)), $1) DESC\n LIMIT 10\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "id", 9 + "type_info": "Int4" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "name", 14 + "type_info": "Text" 15 + } 16 + ], 17 + "parameters": { 18 + "Left": [ 19 + "Text" 20 + ] 21 + }, 22 + "nullable": [ 23 + false, 24 + false 25 + ] 26 + }, 27 + "hash": "fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7" 28 + }
+52
.sqlx/query-ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0.json
···
··· 1 + { 2 + "db_name": "PostgreSQL", 3 + "query": "\n SELECT DISTINCT\n r1.mbid as recording1_mbid,\n r1.name as recording1_name,\n r2.mbid as recording2_mbid,\n r2.name as recording2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists\n FROM recordings r1\n CROSS JOIN recordings r2\n INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id -- Same artist\n AND (\n (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR\n (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))\n ) -- Same or no discriminants\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist\n ORDER BY similarity_score DESC, shared_artists DESC\n ", 4 + "describe": { 5 + "columns": [ 6 + { 7 + "ordinal": 0, 8 + "name": "recording1_mbid", 9 + "type_info": "Uuid" 10 + }, 11 + { 12 + "ordinal": 1, 13 + "name": "recording1_name", 14 + "type_info": "Text" 15 + }, 16 + { 17 + "ordinal": 2, 18 + "name": "recording2_mbid", 19 + "type_info": "Uuid" 20 + }, 21 + { 22 + "ordinal": 3, 23 + "name": "recording2_name", 24 + "type_info": "Text" 25 + }, 26 + { 27 + "ordinal": 4, 28 + "name": "similarity_score", 29 + "type_info": "Float4" 30 + }, 31 + { 32 + "ordinal": 5, 33 + "name": "shared_artists", 34 + "type_info": "Int8" 35 + } 36 + ], 37 + "parameters": { 38 + "Left": [ 39 + "Float4" 40 + ] 41 + }, 42 + "nullable": [ 43 + false, 44 + false, 45 + false, 46 + false, 47 + null, 48 + null 49 + ] 50 + }, 51 + "hash": "ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0" 52 + }
-3
.vscode/settings.json
··· 1 - { 2 - "deno.enable": false 3 - }
···
+616 -241
Cargo.lock
··· 121 dependencies = [ 122 "anyhow", 123 "async-trait", 124 "atrium-api", 125 "axum", 126 - "base64", 127 "chrono", 128 "clap", 129 "dotenvy", ··· 165 checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" 166 167 [[package]] 168 name = "async-lock" 169 version = "3.4.0" 170 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 187 ] 188 189 [[package]] 190 name = "atoi" 191 version = "2.0.0" 192 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 210 "atrium-common", 211 "atrium-xrpc", 212 "chrono", 213 - "http", 214 "ipld-core", 215 "langtag", 216 "regex", ··· 243 source = "registry+https://github.com/rust-lang/crates.io-index" 244 checksum = "0216ad50ce34e9ff982e171c3659e65dedaa2ed5ac2994524debdc9a9647ffa8" 245 dependencies = [ 246 - "http", 247 "serde", 248 "serde_html_form", 249 "serde_json", ··· 259 260 [[package]] 261 name = "aws-lc-rs" 262 - version = "1.13.2" 263 source = "registry+https://github.com/rust-lang/crates.io-index" 264 - checksum = "08b5d4e069cbc868041a64bd68dc8cb39a0d79585cd6c5a24caa8c2d622121be" 265 dependencies = [ 266 "aws-lc-sys", 267 "zeroize", ··· 291 "bytes", 292 "form_urlencoded", 293 "futures-util", 294 - "http", 295 "http-body", 296 "http-body-util", 297 "hyper", ··· 324 dependencies = [ 325 "bytes", 326 "futures-core", 327 - "http", 328 "http-body", 329 "http-body-util", 330 "mime", ··· 348 ] 349 350 [[package]] 351 name = "backtrace" 352 version = "0.3.75" 353 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 369 checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" 370 371 [[package]] 372 name = "base64" 373 version = "0.22.1" 374 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 397 "proc-macro2", 398 "quote", 399 "regex", 400 - "rustc-hash", 401 "shlex", 402 "syn 2.0.104", 403 "which", ··· 504 version = "1.10.1" 505 source = "registry+https://github.com/rust-lang/crates.io-index" 506 checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 507 508 [[package]] 509 name = "cadet" ··· 511 dependencies = [ 512 "anyhow", 513 "async-trait", 514 "atrium-api", 515 - "base64", 516 "chrono", 517 "cid 0.11.1", 518 "dotenvy", 519 "flume", 520 "iroh-car", 521 "libipld", 522 "metrics 0.23.1", ··· 528 "reqwest", 529 "rocketman", 530 "serde", 531 "serde_json", 532 "sqlx", 533 "time", 534 "tokio", 535 - "tokio-tungstenite", 536 "tracing", 537 "tracing-subscriber", 538 "types", ··· 583 584 [[package]] 585 name = "cc" 586 - version = "1.2.30" 587 source = "registry+https://github.com/rust-lang/crates.io-index" 588 - checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" 589 dependencies = [ 590 "jobserver", 591 "libc", ··· 608 checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" 609 610 [[package]] 611 name = "chrono" 612 version = "0.4.41" 613 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 662 663 [[package]] 664 name = "clap" 665 - version = "4.5.41" 666 source = "registry+https://github.com/rust-lang/crates.io-index" 667 - checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" 668 dependencies = [ 669 "clap_builder", 670 "clap_derive", ··· 672 673 [[package]] 674 name = "clap_builder" 675 - version = "4.5.41" 676 source = "registry+https://github.com/rust-lang/crates.io-index" 677 - checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" 678 dependencies = [ 679 "anstream", 680 "anstyle", ··· 716 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 717 718 [[package]] 719 name = "combine" 720 version = "4.6.7" 721 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 810 checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" 811 812 [[package]] 813 name = "crossbeam-channel" 814 version = "0.5.15" 815 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 841 version = "0.8.21" 842 source = "registry+https://github.com/rust-lang/crates.io-index" 843 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 844 845 [[package]] 846 name = "crypto-common" ··· 1026 ] 1027 1028 [[package]] 1029 name = "displaydoc" 1030 version = "0.2.5" 1031 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1049 checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" 1050 1051 [[package]] 1052 name = "either" 1053 version = "1.15.0" 1054 source = "registry+https://github.com/rust-lang/crates.io-index" 1055 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 1056 dependencies = [ 1057 "serde", 1058 ] 1059 1060 [[package]] ··· 1121 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 1122 1123 [[package]] 1124 name = "flume" 1125 version = "0.11.1" 1126 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1145 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 1146 1147 [[package]] 1148 - name = "foreign-types" 1149 - version = "0.3.2" 1150 - source = "registry+https://github.com/rust-lang/crates.io-index" 1151 - checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" 1152 - dependencies = [ 1153 - "foreign-types-shared", 1154 - ] 1155 - 1156 - [[package]] 1157 - name = "foreign-types-shared" 1158 - version = "0.1.1" 1159 - source = "registry+https://github.com/rust-lang/crates.io-index" 1160 - checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" 1161 - 1162 - [[package]] 1163 name = "form_urlencoded" 1164 version = "1.2.1" 1165 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1296 dependencies = [ 1297 "typenum", 1298 "version_check", 1299 ] 1300 1301 [[package]] ··· 1318 checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 1319 dependencies = [ 1320 "cfg-if", 1321 "libc", 1322 "r-efi", 1323 "wasi 0.14.2+wasi-0.2.4", 1324 ] 1325 1326 [[package]] ··· 1336 checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" 1337 1338 [[package]] 1339 name = "h2" 1340 version = "0.4.11" 1341 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1346 "fnv", 1347 "futures-core", 1348 "futures-sink", 1349 - "http", 1350 "indexmap", 1351 "slab", 1352 "tokio", ··· 1421 1422 [[package]] 1423 name = "http" 1424 version = "1.3.1" 1425 source = "registry+https://github.com/rust-lang/crates.io-index" 1426 checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" ··· 1437 checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" 1438 dependencies = [ 1439 "bytes", 1440 - "http", 1441 ] 1442 1443 [[package]] ··· 1448 dependencies = [ 1449 "bytes", 1450 "futures-core", 1451 - "http", 1452 "http-body", 1453 "pin-project-lite", 1454 ] ··· 1475 "futures-channel", 1476 "futures-util", 1477 "h2", 1478 - "http", 1479 "http-body", 1480 "httparse", 1481 "httpdate", ··· 1492 source = "registry+https://github.com/rust-lang/crates.io-index" 1493 checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" 1494 dependencies = [ 1495 - "http", 1496 "hyper", 1497 "hyper-util", 1498 - "rustls", 1499 - "rustls-native-certs", 1500 "rustls-pki-types", 1501 "tokio", 1502 - "tokio-rustls", 1503 "tower-service", 1504 - ] 1505 - 1506 - [[package]] 1507 - name = "hyper-tls" 1508 - version = "0.6.0" 1509 - source = "registry+https://github.com/rust-lang/crates.io-index" 1510 - checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" 1511 - dependencies = [ 1512 - "bytes", 1513 - "http-body-util", 1514 - "hyper", 1515 - "hyper-util", 1516 - "native-tls", 1517 - "tokio", 1518 - "tokio-native-tls", 1519 - "tower-service", 1520 ] 1521 1522 [[package]] 1523 name = "hyper-util" 1524 - version = "0.1.15" 1525 source = "registry+https://github.com/rust-lang/crates.io-index" 1526 - checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df" 1527 dependencies = [ 1528 - "base64", 1529 "bytes", 1530 "futures-channel", 1531 "futures-core", 1532 "futures-util", 1533 - "http", 1534 "http-body", 1535 "hyper", 1536 "ipnet", 1537 "libc", 1538 "percent-encoding", 1539 "pin-project-lite", 1540 - "socket2 0.5.10", 1541 - "system-configuration", 1542 "tokio", 1543 "tower-service", 1544 "tracing", 1545 - "windows-registry", 1546 ] 1547 1548 [[package]] ··· 1694 1695 [[package]] 1696 name = "io-uring" 1697 - version = "0.7.8" 1698 source = "registry+https://github.com/rust-lang/crates.io-index" 1699 - checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" 1700 dependencies = [ 1701 "bitflags 2.9.1", 1702 "cfg-if", ··· 1732 1733 [[package]] 1734 name = "iroh-car" 1735 - version = "0.4.0" 1736 source = "registry+https://github.com/rust-lang/crates.io-index" 1737 - checksum = "475a6f0ebd64c87ea011021c67f10b57930f6c286e0163807066bfb83553b1b6" 1738 dependencies = [ 1739 "anyhow", 1740 - "cid 0.10.1", 1741 "futures", 1742 - "libipld", 1743 "thiserror 1.0.69", 1744 "tokio", 1745 "unsigned-varint 0.7.2", ··· 1784 dependencies = [ 1785 "once_cell", 1786 "wasm-bindgen", 1787 ] 1788 1789 [[package]] ··· 1921 checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" 1922 dependencies = [ 1923 "cfg-if", 1924 - "windows-targets 0.53.2", 1925 ] 1926 1927 [[package]] ··· 1931 checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" 1932 1933 [[package]] 1934 name = "libsqlite3-sys" 1935 version = "0.30.1" 1936 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1997 ] 1998 1999 [[package]] 2000 name = "matchers" 2001 version = "0.1.0" 2002 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2053 source = "registry+https://github.com/rust-lang/crates.io-index" 2054 checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" 2055 dependencies = [ 2056 - "base64", 2057 "http-body-util", 2058 "hyper", 2059 "hyper-rustls", ··· 2079 "hashbrown 0.15.4", 2080 "metrics 0.24.2", 2081 "quanta", 2082 - "rand 0.9.1", 2083 "rand_xoshiro", 2084 "sketches-ddsketch", 2085 ] ··· 2147 "bytes", 2148 "encoding_rs", 2149 "futures-util", 2150 - "http", 2151 "httparse", 2152 "memchr", 2153 "mime", ··· 2262 ] 2263 2264 [[package]] 2265 - name = "native-tls" 2266 - version = "0.2.14" 2267 - source = "registry+https://github.com/rust-lang/crates.io-index" 2268 - checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" 2269 - dependencies = [ 2270 - "libc", 2271 - "log", 2272 - "openssl", 2273 - "openssl-probe", 2274 - "openssl-sys", 2275 - "schannel", 2276 - "security-framework 2.11.1", 2277 - "security-framework-sys", 2278 - "tempfile", 2279 - ] 2280 - 2281 - [[package]] 2282 name = "nom" 2283 version = "7.1.3" 2284 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2305 dependencies = [ 2306 "overload", 2307 "winapi", 2308 ] 2309 2310 [[package]] ··· 2400 checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 2401 2402 [[package]] 2403 - name = "openssl" 2404 - version = "0.10.73" 2405 - source = "registry+https://github.com/rust-lang/crates.io-index" 2406 - checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" 2407 - dependencies = [ 2408 - "bitflags 2.9.1", 2409 - "cfg-if", 2410 - "foreign-types", 2411 - "libc", 2412 - "once_cell", 2413 - "openssl-macros", 2414 - "openssl-sys", 2415 - ] 2416 - 2417 - [[package]] 2418 - name = "openssl-macros" 2419 - version = "0.1.1" 2420 - source = "registry+https://github.com/rust-lang/crates.io-index" 2421 - checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" 2422 - dependencies = [ 2423 - "proc-macro2", 2424 - "quote", 2425 - "syn 2.0.104", 2426 - ] 2427 - 2428 - [[package]] 2429 name = "openssl-probe" 2430 version = "0.1.6" 2431 source = "registry+https://github.com/rust-lang/crates.io-index" 2432 checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" 2433 2434 [[package]] 2435 - name = "openssl-sys" 2436 - version = "0.9.109" 2437 source = "registry+https://github.com/rust-lang/crates.io-index" 2438 - checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" 2439 - dependencies = [ 2440 - "cc", 2441 - "libc", 2442 - "pkg-config", 2443 - "vcpkg", 2444 - ] 2445 2446 [[package]] 2447 name = "overload" ··· 2494 checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 2495 2496 [[package]] 2497 - name = "pin-project" 2498 - version = "1.1.10" 2499 - source = "registry+https://github.com/rust-lang/crates.io-index" 2500 - checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" 2501 - dependencies = [ 2502 - "pin-project-internal", 2503 - ] 2504 - 2505 - [[package]] 2506 - name = "pin-project-internal" 2507 - version = "1.1.10" 2508 - source = "registry+https://github.com/rust-lang/crates.io-index" 2509 - checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" 2510 - dependencies = [ 2511 - "proc-macro2", 2512 - "quote", 2513 - "syn 2.0.104", 2514 - ] 2515 - 2516 - [[package]] 2517 name = "pin-project-lite" 2518 version = "0.2.16" 2519 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2584 2585 [[package]] 2586 name = "prettyplease" 2587 - version = "0.2.35" 2588 source = "registry+https://github.com/rust-lang/crates.io-index" 2589 - checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a" 2590 dependencies = [ 2591 "proc-macro2", 2592 "syn 2.0.104", ··· 2669 ] 2670 2671 [[package]] 2672 name = "quote" 2673 version = "1.0.40" 2674 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2696 2697 [[package]] 2698 name = "rand" 2699 - version = "0.9.1" 2700 source = "registry+https://github.com/rust-lang/crates.io-index" 2701 - checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" 2702 dependencies = [ 2703 "rand_chacha 0.9.0", 2704 "rand_core 0.9.3", ··· 2762 2763 [[package]] 2764 name = "redis" 2765 - version = "0.24.0" 2766 source = "registry+https://github.com/rust-lang/crates.io-index" 2767 - checksum = "c580d9cbbe1d1b479e8d67cf9daf6a62c957e6846048408b80b43ac3f6af84cd" 2768 dependencies = [ 2769 "arc-swap", 2770 - "async-trait", 2771 "bytes", 2772 "combine", 2773 - "futures", 2774 "futures-util", 2775 "itoa", 2776 "percent-encoding", 2777 "pin-project-lite", 2778 "ryu", 2779 "sha1_smol", 2780 - "socket2 0.4.10", 2781 "tokio", 2782 - "tokio-retry", 2783 "tokio-util", 2784 "url", 2785 ] 2786 2787 [[package]] 2788 name = "redox_syscall" 2789 - version = "0.5.13" 2790 source = "registry+https://github.com/rust-lang/crates.io-index" 2791 - checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" 2792 dependencies = [ 2793 "bitflags 2.9.1", 2794 ] 2795 2796 [[package]] 2797 name = "regex" 2798 version = "1.11.1" 2799 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2843 source = "registry+https://github.com/rust-lang/crates.io-index" 2844 checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" 2845 dependencies = [ 2846 - "base64", 2847 "bytes", 2848 - "encoding_rs", 2849 "futures-core", 2850 - "h2", 2851 - "http", 2852 "http-body", 2853 "http-body-util", 2854 "hyper", 2855 "hyper-rustls", 2856 - "hyper-tls", 2857 "hyper-util", 2858 "js-sys", 2859 "log", 2860 - "mime", 2861 - "native-tls", 2862 "percent-encoding", 2863 "pin-project-lite", 2864 "rustls-pki-types", 2865 "serde", 2866 "serde_json", 2867 "serde_urlencoded", 2868 "sync_wrapper", 2869 "tokio", 2870 - "tokio-native-tls", 2871 "tower", 2872 "tower-http", 2873 "tower-service", 2874 "url", 2875 "wasm-bindgen", 2876 "wasm-bindgen-futures", 2877 "web-sys", 2878 ] 2879 2880 [[package]] ··· 2903 [[package]] 2904 name = "rocketman" 2905 version = "0.2.3" 2906 dependencies = [ 2907 "anyhow", 2908 "async-trait", ··· 2910 "derive_builder", 2911 "flume", 2912 "futures-util", 2913 - "metrics 0.23.1", 2914 "rand 0.8.5", 2915 "serde", 2916 "serde_json", 2917 "tokio", 2918 - "tokio-tungstenite", 2919 "tracing", 2920 "tracing-subscriber", 2921 "url", ··· 2944 2945 [[package]] 2946 name = "rustc-demangle" 2947 - version = "0.1.25" 2948 source = "registry+https://github.com/rust-lang/crates.io-index" 2949 - checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" 2950 2951 [[package]] 2952 name = "rustc-hash" 2953 version = "1.1.0" 2954 source = "registry+https://github.com/rust-lang/crates.io-index" 2955 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 2956 2957 [[package]] 2958 name = "rustc_version" ··· 2991 2992 [[package]] 2993 name = "rustls" 2994 - version = "0.23.29" 2995 source = "registry+https://github.com/rust-lang/crates.io-index" 2996 - checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1" 2997 dependencies = [ 2998 "aws-lc-rs", 2999 "once_cell", 3000 "rustls-pki-types", 3001 - "rustls-webpki", 3002 "subtle", 3003 "zeroize", 3004 ] 3005 3006 [[package]] 3007 name = "rustls-native-certs" 3008 version = "0.8.1" 3009 source = "registry+https://github.com/rust-lang/crates.io-index" 3010 checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" ··· 3016 ] 3017 3018 [[package]] 3019 name = "rustls-pki-types" 3020 version = "1.12.0" 3021 source = "registry+https://github.com/rust-lang/crates.io-index" 3022 checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" 3023 dependencies = [ 3024 "zeroize", 3025 ] 3026 3027 [[package]] ··· 3070 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 3071 3072 [[package]] 3073 name = "security-framework" 3074 version = "2.11.1" 3075 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3169 ] 3170 3171 [[package]] 3172 name = "serde_json" 3173 - version = "1.0.141" 3174 source = "registry+https://github.com/rust-lang/crates.io-index" 3175 - checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" 3176 dependencies = [ 3177 "itoa", 3178 "memchr", ··· 3256 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 3257 3258 [[package]] 3259 name = "signature" 3260 version = "2.2.0" 3261 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3288 3289 [[package]] 3290 name = "socket2" 3291 - version = "0.4.10" 3292 source = "registry+https://github.com/rust-lang/crates.io-index" 3293 - checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" 3294 dependencies = [ 3295 "libc", 3296 - "winapi", 3297 ] 3298 3299 [[package]] 3300 name = "socket2" 3301 - version = "0.5.10" 3302 source = "registry+https://github.com/rust-lang/crates.io-index" 3303 - checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" 3304 dependencies = [ 3305 "libc", 3306 - "windows-sys 0.52.0", 3307 ] 3308 3309 [[package]] ··· 3344 source = "registry+https://github.com/rust-lang/crates.io-index" 3345 checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" 3346 dependencies = [ 3347 - "base64", 3348 "bytes", 3349 "crc", 3350 "crossbeam-queue", ··· 3361 "memchr", 3362 "once_cell", 3363 "percent-encoding", 3364 "serde", 3365 "serde_json", 3366 "sha2", ··· 3372 "tracing", 3373 "url", 3374 "uuid", 3375 ] 3376 3377 [[package]] ··· 3419 checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" 3420 dependencies = [ 3421 "atoi", 3422 - "base64", 3423 "bitflags 2.9.1", 3424 "byteorder", 3425 "bytes", ··· 3463 checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" 3464 dependencies = [ 3465 "atoi", 3466 - "base64", 3467 "bitflags 2.9.1", 3468 "byteorder", 3469 "crc", ··· 3641 ] 3642 3643 [[package]] 3644 - name = "system-configuration" 3645 - version = "0.6.1" 3646 source = "registry+https://github.com/rust-lang/crates.io-index" 3647 - checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" 3648 - dependencies = [ 3649 - "bitflags 2.9.1", 3650 - "core-foundation 0.9.4", 3651 - "system-configuration-sys", 3652 - ] 3653 3654 [[package]] 3655 - name = "system-configuration-sys" 3656 - version = "0.6.0" 3657 - source = "registry+https://github.com/rust-lang/crates.io-index" 3658 - checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" 3659 dependencies = [ 3660 - "core-foundation-sys", 3661 - "libc", 3662 ] 3663 - 3664 - [[package]] 3665 - name = "tagptr" 3666 - version = "0.2.0" 3667 - source = "registry+https://github.com/rust-lang/crates.io-index" 3668 - checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" 3669 3670 [[package]] 3671 name = "tempfile" ··· 3789 3790 [[package]] 3791 name = "tokio" 3792 - version = "1.46.1" 3793 source = "registry+https://github.com/rust-lang/crates.io-index" 3794 - checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" 3795 dependencies = [ 3796 "backtrace", 3797 "bytes", 3798 "io-uring", 3799 "libc", 3800 "mio", 3801 "pin-project-lite", 3802 "slab", 3803 - "socket2 0.5.10", 3804 "tokio-macros", 3805 - "windows-sys 0.52.0", 3806 ] 3807 3808 [[package]] ··· 3817 ] 3818 3819 [[package]] 3820 - name = "tokio-native-tls" 3821 - version = "0.3.1" 3822 - source = "registry+https://github.com/rust-lang/crates.io-index" 3823 - checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" 3824 - dependencies = [ 3825 - "native-tls", 3826 - "tokio", 3827 - ] 3828 - 3829 - [[package]] 3830 - name = "tokio-retry" 3831 - version = "0.3.0" 3832 source = "registry+https://github.com/rust-lang/crates.io-index" 3833 - checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" 3834 dependencies = [ 3835 - "pin-project", 3836 - "rand 0.8.5", 3837 "tokio", 3838 ] 3839 ··· 3843 source = "registry+https://github.com/rust-lang/crates.io-index" 3844 checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" 3845 dependencies = [ 3846 - "rustls", 3847 "tokio", 3848 ] 3849 ··· 3860 3861 [[package]] 3862 name = "tokio-tungstenite" 3863 version = "0.24.0" 3864 source = "registry+https://github.com/rust-lang/crates.io-index" 3865 checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9" 3866 dependencies = [ 3867 "futures-util", 3868 "log", 3869 "tokio", 3870 - "tungstenite", 3871 ] 3872 3873 [[package]] ··· 3934 "bitflags 2.9.1", 3935 "bytes", 3936 "futures-util", 3937 - "http", 3938 "http-body", 3939 "iri-string", 3940 "pin-project-lite", ··· 4036 4037 [[package]] 4038 name = "tungstenite" 4039 version = "0.24.0" 4040 source = "registry+https://github.com/rust-lang/crates.io-index" 4041 checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a" ··· 4043 "byteorder", 4044 "bytes", 4045 "data-encoding", 4046 - "http", 4047 "httparse", 4048 "log", 4049 "rand 0.8.5", 4050 "sha1", 4051 "thiserror 1.0.69", 4052 "utf-8", ··· 4065 "atrium-api", 4066 "atrium-xrpc", 4067 "chrono", 4068 - "http", 4069 "ipld-core", 4070 "langtag", 4071 "regex", ··· 4074 "serde_ipld_dagcbor", 4075 "serde_json", 4076 "thiserror 2.0.12", 4077 - "uuid", 4078 ] 4079 4080 [[package]] ··· 4331 ] 4332 4333 [[package]] 4334 name = "web-sys" 4335 version = "0.3.77" 4336 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4348 dependencies = [ 4349 "js-sys", 4350 "wasm-bindgen", 4351 ] 4352 4353 [[package]] ··· 4523 ] 4524 4525 [[package]] 4526 - name = "windows-registry" 4527 - version = "0.5.3" 4528 - source = "registry+https://github.com/rust-lang/crates.io-index" 4529 - checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" 4530 - dependencies = [ 4531 - "windows-link", 4532 - "windows-result 0.3.4", 4533 - "windows-strings", 4534 - ] 4535 - 4536 - [[package]] 4537 name = "windows-result" 4538 version = "0.1.2" 4539 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4593 source = "registry+https://github.com/rust-lang/crates.io-index" 4594 checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 4595 dependencies = [ 4596 - "windows-targets 0.53.2", 4597 ] 4598 4599 [[package]] ··· 4629 4630 [[package]] 4631 name = "windows-targets" 4632 - version = "0.53.2" 4633 source = "registry+https://github.com/rust-lang/crates.io-index" 4634 - checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" 4635 dependencies = [ 4636 "windows_aarch64_gnullvm 0.53.0", 4637 "windows_aarch64_msvc 0.53.0", 4638 "windows_i686_gnu 0.53.0",
··· 121 dependencies = [ 122 "anyhow", 123 "async-trait", 124 + "atmst", 125 "atrium-api", 126 "axum", 127 + "base64 0.22.1", 128 "chrono", 129 "clap", 130 "dotenvy", ··· 166 checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" 167 168 [[package]] 169 + name = "async-compression" 170 + version = "0.4.27" 171 + source = "registry+https://github.com/rust-lang/crates.io-index" 172 + checksum = "ddb939d66e4ae03cee6091612804ba446b12878410cfa17f785f4dd67d4014e8" 173 + dependencies = [ 174 + "flate2", 175 + "futures-core", 176 + "memchr", 177 + "pin-project-lite", 178 + "tokio", 179 + ] 180 + 181 + [[package]] 182 name = "async-lock" 183 version = "3.4.0" 184 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 201 ] 202 203 [[package]] 204 + name = "atmst" 205 + version = "0.0.1" 206 + source = "registry+https://github.com/rust-lang/crates.io-index" 207 + checksum = "aeb2a4631a64a242ae62c3ceb140adfa2a8bdacb1b22a6549db5de2ce3389c1d" 208 + dependencies = [ 209 + "async-trait", 210 + "bytes", 211 + "cid 0.11.1", 212 + "dashmap", 213 + "futures", 214 + "ipld-core", 215 + "iroh-car", 216 + "log", 217 + "multihash 0.19.3", 218 + "serde", 219 + "serde_ipld_dagcbor", 220 + "serde_ipld_dagjson", 221 + "sha2", 222 + "thiserror 1.0.69", 223 + "tokio", 224 + ] 225 + 226 + [[package]] 227 name = "atoi" 228 version = "2.0.0" 229 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 247 "atrium-common", 248 "atrium-xrpc", 249 "chrono", 250 + "http 1.3.1", 251 "ipld-core", 252 "langtag", 253 "regex", ··· 280 source = "registry+https://github.com/rust-lang/crates.io-index" 281 checksum = "0216ad50ce34e9ff982e171c3659e65dedaa2ed5ac2994524debdc9a9647ffa8" 282 dependencies = [ 283 + "http 1.3.1", 284 "serde", 285 "serde_html_form", 286 "serde_json", ··· 296 297 [[package]] 298 name = "aws-lc-rs" 299 + version = "1.13.3" 300 source = "registry+https://github.com/rust-lang/crates.io-index" 301 + checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" 302 dependencies = [ 303 "aws-lc-sys", 304 "zeroize", ··· 328 "bytes", 329 "form_urlencoded", 330 "futures-util", 331 + "http 1.3.1", 332 "http-body", 333 "http-body-util", 334 "hyper", ··· 361 dependencies = [ 362 "bytes", 363 "futures-core", 364 + "http 1.3.1", 365 "http-body", 366 "http-body-util", 367 "mime", ··· 385 ] 386 387 [[package]] 388 + name = "backon" 389 + version = "1.5.2" 390 + source = "registry+https://github.com/rust-lang/crates.io-index" 391 + checksum = "592277618714fbcecda9a02ba7a8781f319d26532a88553bbacc77ba5d2b3a8d" 392 + dependencies = [ 393 + "fastrand", 394 + ] 395 + 396 + [[package]] 397 name = "backtrace" 398 version = "0.3.75" 399 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 415 checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" 416 417 [[package]] 418 + name = "base16ct" 419 + version = "0.2.0" 420 + source = "registry+https://github.com/rust-lang/crates.io-index" 421 + checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" 422 + 423 + [[package]] 424 + name = "base64" 425 + version = "0.21.7" 426 + source = "registry+https://github.com/rust-lang/crates.io-index" 427 + checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" 428 + 429 + [[package]] 430 name = "base64" 431 version = "0.22.1" 432 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 455 "proc-macro2", 456 "quote", 457 "regex", 458 + "rustc-hash 1.1.0", 459 "shlex", 460 "syn 2.0.104", 461 "which", ··· 562 version = "1.10.1" 563 source = "registry+https://github.com/rust-lang/crates.io-index" 564 checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 565 + dependencies = [ 566 + "serde", 567 + ] 568 569 [[package]] 570 name = "cadet" ··· 572 dependencies = [ 573 "anyhow", 574 "async-trait", 575 + "atmst", 576 "atrium-api", 577 + "base64 0.22.1", 578 "chrono", 579 "cid 0.11.1", 580 "dotenvy", 581 "flume", 582 + "futures", 583 "iroh-car", 584 "libipld", 585 "metrics 0.23.1", ··· 591 "reqwest", 592 "rocketman", 593 "serde", 594 + "serde_ipld_dagcbor", 595 "serde_json", 596 "sqlx", 597 "time", 598 "tokio", 599 + "tokio-tungstenite 0.24.0", 600 "tracing", 601 "tracing-subscriber", 602 "types", ··· 647 648 [[package]] 649 name = "cc" 650 + version = "1.2.31" 651 source = "registry+https://github.com/rust-lang/crates.io-index" 652 + checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" 653 dependencies = [ 654 "jobserver", 655 "libc", ··· 672 checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" 673 674 [[package]] 675 + name = "cfg_aliases" 676 + version = "0.2.1" 677 + source = "registry+https://github.com/rust-lang/crates.io-index" 678 + checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 679 + 680 + [[package]] 681 name = "chrono" 682 version = "0.4.41" 683 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 732 733 [[package]] 734 name = "clap" 735 + version = "4.5.42" 736 source = "registry+https://github.com/rust-lang/crates.io-index" 737 + checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882" 738 dependencies = [ 739 "clap_builder", 740 "clap_derive", ··· 742 743 [[package]] 744 name = "clap_builder" 745 + version = "4.5.42" 746 source = "registry+https://github.com/rust-lang/crates.io-index" 747 + checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966" 748 dependencies = [ 749 "anstream", 750 "anstyle", ··· 786 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 787 788 [[package]] 789 + name = "colored" 790 + version = "2.2.0" 791 + source = "registry+https://github.com/rust-lang/crates.io-index" 792 + checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" 793 + dependencies = [ 794 + "lazy_static", 795 + "windows-sys 0.59.0", 796 + ] 797 + 798 + [[package]] 799 name = "combine" 800 version = "4.6.7" 801 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 890 checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" 891 892 [[package]] 893 + name = "crc32fast" 894 + version = "1.5.0" 895 + source = "registry+https://github.com/rust-lang/crates.io-index" 896 + checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" 897 + dependencies = [ 898 + "cfg-if", 899 + ] 900 + 901 + [[package]] 902 name = "crossbeam-channel" 903 version = "0.5.15" 904 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 930 version = "0.8.21" 931 source = "registry+https://github.com/rust-lang/crates.io-index" 932 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 933 + 934 + [[package]] 935 + name = "crypto-bigint" 936 + version = "0.5.5" 937 + source = "registry+https://github.com/rust-lang/crates.io-index" 938 + checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" 939 + dependencies = [ 940 + "generic-array", 941 + "rand_core 0.6.4", 942 + "subtle", 943 + "zeroize", 944 + ] 945 946 [[package]] 947 name = "crypto-common" ··· 1127 ] 1128 1129 [[package]] 1130 + name = "dirs" 1131 + version = "5.0.1" 1132 + source = "registry+https://github.com/rust-lang/crates.io-index" 1133 + checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" 1134 + dependencies = [ 1135 + "dirs-sys", 1136 + ] 1137 + 1138 + [[package]] 1139 + name = "dirs-sys" 1140 + version = "0.4.1" 1141 + source = "registry+https://github.com/rust-lang/crates.io-index" 1142 + checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" 1143 + dependencies = [ 1144 + "libc", 1145 + "option-ext", 1146 + "redox_users", 1147 + "windows-sys 0.48.0", 1148 + ] 1149 + 1150 + [[package]] 1151 name = "displaydoc" 1152 version = "0.2.5" 1153 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1171 checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" 1172 1173 [[package]] 1174 + name = "ecdsa" 1175 + version = "0.16.9" 1176 + source = "registry+https://github.com/rust-lang/crates.io-index" 1177 + checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" 1178 + dependencies = [ 1179 + "der", 1180 + "digest", 1181 + "elliptic-curve", 1182 + "rfc6979", 1183 + "signature", 1184 + "spki", 1185 + ] 1186 + 1187 + [[package]] 1188 name = "either" 1189 version = "1.15.0" 1190 source = "registry+https://github.com/rust-lang/crates.io-index" 1191 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 1192 dependencies = [ 1193 "serde", 1194 + ] 1195 + 1196 + [[package]] 1197 + name = "elliptic-curve" 1198 + version = "0.13.8" 1199 + source = "registry+https://github.com/rust-lang/crates.io-index" 1200 + checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" 1201 + dependencies = [ 1202 + "base16ct", 1203 + "crypto-bigint", 1204 + "digest", 1205 + "ff", 1206 + "generic-array", 1207 + "group", 1208 + "pkcs8", 1209 + "rand_core 0.6.4", 1210 + "sec1", 1211 + "subtle", 1212 + "zeroize", 1213 ] 1214 1215 [[package]] ··· 1276 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 1277 1278 [[package]] 1279 + name = "ff" 1280 + version = "0.13.1" 1281 + source = "registry+https://github.com/rust-lang/crates.io-index" 1282 + checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" 1283 + dependencies = [ 1284 + "rand_core 0.6.4", 1285 + "subtle", 1286 + ] 1287 + 1288 + [[package]] 1289 + name = "flate2" 1290 + version = "1.1.2" 1291 + source = "registry+https://github.com/rust-lang/crates.io-index" 1292 + checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" 1293 + dependencies = [ 1294 + "crc32fast", 1295 + "miniz_oxide", 1296 + ] 1297 + 1298 + [[package]] 1299 name = "flume" 1300 version = "0.11.1" 1301 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1320 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 1321 1322 [[package]] 1323 name = "form_urlencoded" 1324 version = "1.2.1" 1325 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1456 dependencies = [ 1457 "typenum", 1458 "version_check", 1459 + "zeroize", 1460 ] 1461 1462 [[package]] ··· 1479 checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 1480 dependencies = [ 1481 "cfg-if", 1482 + "js-sys", 1483 "libc", 1484 "r-efi", 1485 "wasi 0.14.2+wasi-0.2.4", 1486 + "wasm-bindgen", 1487 ] 1488 1489 [[package]] ··· 1499 checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" 1500 1501 [[package]] 1502 + name = "group" 1503 + version = "0.13.0" 1504 + source = "registry+https://github.com/rust-lang/crates.io-index" 1505 + checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" 1506 + dependencies = [ 1507 + "ff", 1508 + "rand_core 0.6.4", 1509 + "subtle", 1510 + ] 1511 + 1512 + [[package]] 1513 name = "h2" 1514 version = "0.4.11" 1515 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1520 "fnv", 1521 "futures-core", 1522 "futures-sink", 1523 + "http 1.3.1", 1524 "indexmap", 1525 "slab", 1526 "tokio", ··· 1595 1596 [[package]] 1597 name = "http" 1598 + version = "0.2.12" 1599 + source = "registry+https://github.com/rust-lang/crates.io-index" 1600 + checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" 1601 + dependencies = [ 1602 + "bytes", 1603 + "fnv", 1604 + "itoa", 1605 + ] 1606 + 1607 + [[package]] 1608 + name = "http" 1609 version = "1.3.1" 1610 source = "registry+https://github.com/rust-lang/crates.io-index" 1611 checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" ··· 1622 checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" 1623 dependencies = [ 1624 "bytes", 1625 + "http 1.3.1", 1626 ] 1627 1628 [[package]] ··· 1633 dependencies = [ 1634 "bytes", 1635 "futures-core", 1636 + "http 1.3.1", 1637 "http-body", 1638 "pin-project-lite", 1639 ] ··· 1660 "futures-channel", 1661 "futures-util", 1662 "h2", 1663 + "http 1.3.1", 1664 "http-body", 1665 "httparse", 1666 "httpdate", ··· 1677 source = "registry+https://github.com/rust-lang/crates.io-index" 1678 checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" 1679 dependencies = [ 1680 + "http 1.3.1", 1681 "hyper", 1682 "hyper-util", 1683 + "rustls 0.23.31", 1684 + "rustls-native-certs 0.8.1", 1685 "rustls-pki-types", 1686 "tokio", 1687 + "tokio-rustls 0.26.2", 1688 "tower-service", 1689 + "webpki-roots 1.0.2", 1690 ] 1691 1692 [[package]] 1693 name = "hyper-util" 1694 + version = "0.1.16" 1695 source = "registry+https://github.com/rust-lang/crates.io-index" 1696 + checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" 1697 dependencies = [ 1698 + "base64 0.22.1", 1699 "bytes", 1700 "futures-channel", 1701 "futures-core", 1702 "futures-util", 1703 + "http 1.3.1", 1704 "http-body", 1705 "hyper", 1706 "ipnet", 1707 "libc", 1708 "percent-encoding", 1709 "pin-project-lite", 1710 + "socket2 0.6.0", 1711 "tokio", 1712 "tower-service", 1713 "tracing", 1714 ] 1715 1716 [[package]] ··· 1862 1863 [[package]] 1864 name = "io-uring" 1865 + version = "0.7.9" 1866 source = "registry+https://github.com/rust-lang/crates.io-index" 1867 + checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" 1868 dependencies = [ 1869 "bitflags 2.9.1", 1870 "cfg-if", ··· 1900 1901 [[package]] 1902 name = "iroh-car" 1903 + version = "0.5.1" 1904 source = "registry+https://github.com/rust-lang/crates.io-index" 1905 + checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 1906 dependencies = [ 1907 "anyhow", 1908 + "cid 0.11.1", 1909 "futures", 1910 + "serde", 1911 + "serde_ipld_dagcbor", 1912 "thiserror 1.0.69", 1913 "tokio", 1914 "unsigned-varint 0.7.2", ··· 1953 dependencies = [ 1954 "once_cell", 1955 "wasm-bindgen", 1956 + ] 1957 + 1958 + [[package]] 1959 + name = "k256" 1960 + version = "0.13.4" 1961 + source = "registry+https://github.com/rust-lang/crates.io-index" 1962 + checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b" 1963 + dependencies = [ 1964 + "cfg-if", 1965 + "ecdsa", 1966 + "elliptic-curve", 1967 + "once_cell", 1968 + "sha2", 1969 + "signature", 1970 ] 1971 1972 [[package]] ··· 2104 checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" 2105 dependencies = [ 2106 "cfg-if", 2107 + "windows-targets 0.53.3", 2108 ] 2109 2110 [[package]] ··· 2114 checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" 2115 2116 [[package]] 2117 + name = "libredox" 2118 + version = "0.1.9" 2119 + source = "registry+https://github.com/rust-lang/crates.io-index" 2120 + checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" 2121 + dependencies = [ 2122 + "bitflags 2.9.1", 2123 + "libc", 2124 + ] 2125 + 2126 + [[package]] 2127 name = "libsqlite3-sys" 2128 version = "0.30.1" 2129 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2190 ] 2191 2192 [[package]] 2193 + name = "lru-slab" 2194 + version = "0.1.2" 2195 + source = "registry+https://github.com/rust-lang/crates.io-index" 2196 + checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" 2197 + 2198 + [[package]] 2199 name = "matchers" 2200 version = "0.1.0" 2201 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2252 source = "registry+https://github.com/rust-lang/crates.io-index" 2253 checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" 2254 dependencies = [ 2255 + "base64 0.22.1", 2256 "http-body-util", 2257 "hyper", 2258 "hyper-rustls", ··· 2278 "hashbrown 0.15.4", 2279 "metrics 0.24.2", 2280 "quanta", 2281 + "rand 0.9.2", 2282 "rand_xoshiro", 2283 "sketches-ddsketch", 2284 ] ··· 2346 "bytes", 2347 "encoding_rs", 2348 "futures-util", 2349 + "http 1.3.1", 2350 "httparse", 2351 "memchr", 2352 "mime", ··· 2461 ] 2462 2463 [[package]] 2464 name = "nom" 2465 version = "7.1.3" 2466 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2487 dependencies = [ 2488 "overload", 2489 "winapi", 2490 + ] 2491 + 2492 + [[package]] 2493 + name = "num-bigint" 2494 + version = "0.4.6" 2495 + source = "registry+https://github.com/rust-lang/crates.io-index" 2496 + checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" 2497 + dependencies = [ 2498 + "num-integer", 2499 + "num-traits", 2500 ] 2501 2502 [[package]] ··· 2592 checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 2593 2594 [[package]] 2595 name = "openssl-probe" 2596 version = "0.1.6" 2597 source = "registry+https://github.com/rust-lang/crates.io-index" 2598 checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" 2599 2600 [[package]] 2601 + name = "option-ext" 2602 + version = "0.2.0" 2603 source = "registry+https://github.com/rust-lang/crates.io-index" 2604 + checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" 2605 2606 [[package]] 2607 name = "overload" ··· 2654 checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 2655 2656 [[package]] 2657 name = "pin-project-lite" 2658 version = "0.2.16" 2659 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2724 2725 [[package]] 2726 name = "prettyplease" 2727 + version = "0.2.36" 2728 source = "registry+https://github.com/rust-lang/crates.io-index" 2729 + checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" 2730 dependencies = [ 2731 "proc-macro2", 2732 "syn 2.0.104", ··· 2809 ] 2810 2811 [[package]] 2812 + name = "quinn" 2813 + version = "0.11.8" 2814 + source = "registry+https://github.com/rust-lang/crates.io-index" 2815 + checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" 2816 + dependencies = [ 2817 + "bytes", 2818 + "cfg_aliases", 2819 + "pin-project-lite", 2820 + "quinn-proto", 2821 + "quinn-udp", 2822 + "rustc-hash 2.1.1", 2823 + "rustls 0.23.31", 2824 + "socket2 0.5.10", 2825 + "thiserror 2.0.12", 2826 + "tokio", 2827 + "tracing", 2828 + "web-time", 2829 + ] 2830 + 2831 + [[package]] 2832 + name = "quinn-proto" 2833 + version = "0.11.12" 2834 + source = "registry+https://github.com/rust-lang/crates.io-index" 2835 + checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" 2836 + dependencies = [ 2837 + "bytes", 2838 + "getrandom 0.3.3", 2839 + "lru-slab", 2840 + "rand 0.9.2", 2841 + "ring", 2842 + "rustc-hash 2.1.1", 2843 + "rustls 0.23.31", 2844 + "rustls-pki-types", 2845 + "slab", 2846 + "thiserror 2.0.12", 2847 + "tinyvec", 2848 + "tracing", 2849 + "web-time", 2850 + ] 2851 + 2852 + [[package]] 2853 + name = "quinn-udp" 2854 + version = "0.5.13" 2855 + source = "registry+https://github.com/rust-lang/crates.io-index" 2856 + checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" 2857 + dependencies = [ 2858 + "cfg_aliases", 2859 + "libc", 2860 + "once_cell", 2861 + "socket2 0.5.10", 2862 + "tracing", 2863 + "windows-sys 0.59.0", 2864 + ] 2865 + 2866 + [[package]] 2867 name = "quote" 2868 version = "1.0.40" 2869 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2891 2892 [[package]] 2893 name = "rand" 2894 + version = "0.9.2" 2895 source = "registry+https://github.com/rust-lang/crates.io-index" 2896 + checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" 2897 dependencies = [ 2898 "rand_chacha 0.9.0", 2899 "rand_core 0.9.3", ··· 2957 2958 [[package]] 2959 name = "redis" 2960 + version = "0.32.4" 2961 source = "registry+https://github.com/rust-lang/crates.io-index" 2962 + checksum = "e1f66bf4cac9733a23bcdf1e0e01effbaaad208567beba68be8f67e5f4af3ee1" 2963 dependencies = [ 2964 "arc-swap", 2965 + "backon", 2966 "bytes", 2967 + "cfg-if", 2968 "combine", 2969 + "futures-channel", 2970 "futures-util", 2971 "itoa", 2972 + "num-bigint", 2973 "percent-encoding", 2974 "pin-project-lite", 2975 "ryu", 2976 "sha1_smol", 2977 + "socket2 0.6.0", 2978 "tokio", 2979 "tokio-util", 2980 "url", 2981 ] 2982 2983 [[package]] 2984 name = "redox_syscall" 2985 + version = "0.5.17" 2986 source = "registry+https://github.com/rust-lang/crates.io-index" 2987 + checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" 2988 dependencies = [ 2989 "bitflags 2.9.1", 2990 ] 2991 2992 [[package]] 2993 + name = "redox_users" 2994 + version = "0.4.6" 2995 + source = "registry+https://github.com/rust-lang/crates.io-index" 2996 + checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" 2997 + dependencies = [ 2998 + "getrandom 0.2.16", 2999 + "libredox", 3000 + "thiserror 1.0.69", 3001 + ] 3002 + 3003 + [[package]] 3004 name = "regex" 3005 version = "1.11.1" 3006 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3050 source = "registry+https://github.com/rust-lang/crates.io-index" 3051 checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" 3052 dependencies = [ 3053 + "async-compression", 3054 + "base64 0.22.1", 3055 "bytes", 3056 "futures-core", 3057 + "futures-util", 3058 + "http 1.3.1", 3059 "http-body", 3060 "http-body-util", 3061 "hyper", 3062 "hyper-rustls", 3063 "hyper-util", 3064 "js-sys", 3065 "log", 3066 "percent-encoding", 3067 "pin-project-lite", 3068 + "quinn", 3069 + "rustls 0.23.31", 3070 "rustls-pki-types", 3071 "serde", 3072 "serde_json", 3073 "serde_urlencoded", 3074 "sync_wrapper", 3075 "tokio", 3076 + "tokio-rustls 0.26.2", 3077 + "tokio-util", 3078 "tower", 3079 "tower-http", 3080 "tower-service", 3081 "url", 3082 "wasm-bindgen", 3083 "wasm-bindgen-futures", 3084 + "wasm-streams", 3085 "web-sys", 3086 + "webpki-roots 1.0.2", 3087 + ] 3088 + 3089 + [[package]] 3090 + name = "rfc6979" 3091 + version = "0.4.0" 3092 + source = "registry+https://github.com/rust-lang/crates.io-index" 3093 + checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" 3094 + dependencies = [ 3095 + "hmac", 3096 + "subtle", 3097 ] 3098 3099 [[package]] ··· 3122 [[package]] 3123 name = "rocketman" 3124 version = "0.2.3" 3125 + source = "registry+https://github.com/rust-lang/crates.io-index" 3126 + checksum = "9928fe43979c19ff1f46f7920c30b76dfcead7a4d571c9836c4d02da8587f844" 3127 dependencies = [ 3128 "anyhow", 3129 "async-trait", ··· 3131 "derive_builder", 3132 "flume", 3133 "futures-util", 3134 + "metrics 0.24.2", 3135 "rand 0.8.5", 3136 "serde", 3137 "serde_json", 3138 "tokio", 3139 + "tokio-tungstenite 0.20.1", 3140 "tracing", 3141 "tracing-subscriber", 3142 "url", ··· 3165 3166 [[package]] 3167 name = "rustc-demangle" 3168 + version = "0.1.26" 3169 source = "registry+https://github.com/rust-lang/crates.io-index" 3170 + checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" 3171 3172 [[package]] 3173 name = "rustc-hash" 3174 version = "1.1.0" 3175 source = "registry+https://github.com/rust-lang/crates.io-index" 3176 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 3177 + 3178 + [[package]] 3179 + name = "rustc-hash" 3180 + version = "2.1.1" 3181 + source = "registry+https://github.com/rust-lang/crates.io-index" 3182 + checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 3183 3184 [[package]] 3185 name = "rustc_version" ··· 3218 3219 [[package]] 3220 name = "rustls" 3221 + version = "0.21.12" 3222 + source = "registry+https://github.com/rust-lang/crates.io-index" 3223 + checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" 3224 + dependencies = [ 3225 + "log", 3226 + "ring", 3227 + "rustls-webpki 0.101.7", 3228 + "sct", 3229 + ] 3230 + 3231 + [[package]] 3232 + name = "rustls" 3233 + version = "0.23.31" 3234 source = "registry+https://github.com/rust-lang/crates.io-index" 3235 + checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" 3236 dependencies = [ 3237 "aws-lc-rs", 3238 "once_cell", 3239 + "ring", 3240 "rustls-pki-types", 3241 + "rustls-webpki 0.103.4", 3242 "subtle", 3243 "zeroize", 3244 ] 3245 3246 [[package]] 3247 name = "rustls-native-certs" 3248 + version = "0.6.3" 3249 + source = "registry+https://github.com/rust-lang/crates.io-index" 3250 + checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" 3251 + dependencies = [ 3252 + "openssl-probe", 3253 + "rustls-pemfile", 3254 + "schannel", 3255 + "security-framework 2.11.1", 3256 + ] 3257 + 3258 + [[package]] 3259 + name = "rustls-native-certs" 3260 version = "0.8.1" 3261 source = "registry+https://github.com/rust-lang/crates.io-index" 3262 checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" ··· 3268 ] 3269 3270 [[package]] 3271 + name = "rustls-pemfile" 3272 + version = "1.0.4" 3273 + source = "registry+https://github.com/rust-lang/crates.io-index" 3274 + checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" 3275 + dependencies = [ 3276 + "base64 0.21.7", 3277 + ] 3278 + 3279 + [[package]] 3280 name = "rustls-pki-types" 3281 version = "1.12.0" 3282 source = "registry+https://github.com/rust-lang/crates.io-index" 3283 checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" 3284 dependencies = [ 3285 + "web-time", 3286 "zeroize", 3287 + ] 3288 + 3289 + [[package]] 3290 + name = "rustls-webpki" 3291 + version = "0.101.7" 3292 + source = "registry+https://github.com/rust-lang/crates.io-index" 3293 + checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" 3294 + dependencies = [ 3295 + "ring", 3296 + "untrusted", 3297 ] 3298 3299 [[package]] ··· 3342 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 3343 3344 [[package]] 3345 + name = "sct" 3346 + version = "0.7.1" 3347 + source = "registry+https://github.com/rust-lang/crates.io-index" 3348 + checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" 3349 + dependencies = [ 3350 + "ring", 3351 + "untrusted", 3352 + ] 3353 + 3354 + [[package]] 3355 + name = "sec1" 3356 + version = "0.7.3" 3357 + source = "registry+https://github.com/rust-lang/crates.io-index" 3358 + checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" 3359 + dependencies = [ 3360 + "base16ct", 3361 + "der", 3362 + "generic-array", 3363 + "pkcs8", 3364 + "subtle", 3365 + "zeroize", 3366 + ] 3367 + 3368 + [[package]] 3369 name = "security-framework" 3370 version = "2.11.1" 3371 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3465 ] 3466 3467 [[package]] 3468 + name = "serde_ipld_dagjson" 3469 + version = "0.2.0" 3470 + source = "registry+https://github.com/rust-lang/crates.io-index" 3471 + checksum = "3359b47ba7f4a306ef5984665e10539e212e97217afa489437d533208eecda36" 3472 + dependencies = [ 3473 + "ipld-core", 3474 + "serde", 3475 + "serde_json", 3476 + ] 3477 + 3478 + [[package]] 3479 name = "serde_json" 3480 + version = "1.0.142" 3481 source = "registry+https://github.com/rust-lang/crates.io-index" 3482 + checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" 3483 dependencies = [ 3484 "itoa", 3485 "memchr", ··· 3563 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 3564 3565 [[package]] 3566 + name = "signal-hook-registry" 3567 + version = "1.4.5" 3568 + source = "registry+https://github.com/rust-lang/crates.io-index" 3569 + checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" 3570 + dependencies = [ 3571 + "libc", 3572 + ] 3573 + 3574 + [[package]] 3575 name = "signature" 3576 version = "2.2.0" 3577 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3604 3605 [[package]] 3606 name = "socket2" 3607 + version = "0.5.10" 3608 source = "registry+https://github.com/rust-lang/crates.io-index" 3609 + checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" 3610 dependencies = [ 3611 "libc", 3612 + "windows-sys 0.52.0", 3613 ] 3614 3615 [[package]] 3616 name = "socket2" 3617 + version = "0.6.0" 3618 source = "registry+https://github.com/rust-lang/crates.io-index" 3619 + checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" 3620 dependencies = [ 3621 "libc", 3622 + "windows-sys 0.59.0", 3623 ] 3624 3625 [[package]] ··· 3660 source = "registry+https://github.com/rust-lang/crates.io-index" 3661 checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" 3662 dependencies = [ 3663 + "base64 0.22.1", 3664 "bytes", 3665 "crc", 3666 "crossbeam-queue", ··· 3677 "memchr", 3678 "once_cell", 3679 "percent-encoding", 3680 + "rustls 0.23.31", 3681 "serde", 3682 "serde_json", 3683 "sha2", ··· 3689 "tracing", 3690 "url", 3691 "uuid", 3692 + "webpki-roots 0.26.11", 3693 ] 3694 3695 [[package]] ··· 3737 checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" 3738 dependencies = [ 3739 "atoi", 3740 + "base64 0.22.1", 3741 "bitflags 2.9.1", 3742 "byteorder", 3743 "bytes", ··· 3781 checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" 3782 dependencies = [ 3783 "atoi", 3784 + "base64 0.22.1", 3785 "bitflags 2.9.1", 3786 "byteorder", 3787 "crc", ··· 3959 ] 3960 3961 [[package]] 3962 + name = "tagptr" 3963 + version = "0.2.0" 3964 source = "registry+https://github.com/rust-lang/crates.io-index" 3965 + checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" 3966 3967 [[package]] 3968 + name = "teal-cli" 3969 + version = "0.1.0" 3970 dependencies = [ 3971 + "anyhow", 3972 + "chrono", 3973 + "clap", 3974 + "colored", 3975 + "dirs", 3976 + "hex", 3977 + "k256", 3978 + "multibase", 3979 + "rand 0.8.5", 3980 + "serde", 3981 + "serde_json", 3982 + "tempfile", 3983 + "tokio", 3984 ] 3985 3986 [[package]] 3987 name = "tempfile" ··· 4105 4106 [[package]] 4107 name = "tokio" 4108 + version = "1.47.1" 4109 source = "registry+https://github.com/rust-lang/crates.io-index" 4110 + checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" 4111 dependencies = [ 4112 "backtrace", 4113 "bytes", 4114 "io-uring", 4115 "libc", 4116 "mio", 4117 + "parking_lot", 4118 "pin-project-lite", 4119 + "signal-hook-registry", 4120 "slab", 4121 + "socket2 0.6.0", 4122 "tokio-macros", 4123 + "windows-sys 0.59.0", 4124 ] 4125 4126 [[package]] ··· 4135 ] 4136 4137 [[package]] 4138 + name = "tokio-rustls" 4139 + version = "0.24.1" 4140 source = "registry+https://github.com/rust-lang/crates.io-index" 4141 + checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" 4142 dependencies = [ 4143 + "rustls 0.21.12", 4144 "tokio", 4145 ] 4146 ··· 4150 source = "registry+https://github.com/rust-lang/crates.io-index" 4151 checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" 4152 dependencies = [ 4153 + "rustls 0.23.31", 4154 "tokio", 4155 ] 4156 ··· 4167 4168 [[package]] 4169 name = "tokio-tungstenite" 4170 + version = "0.20.1" 4171 + source = "registry+https://github.com/rust-lang/crates.io-index" 4172 + checksum = "212d5dcb2a1ce06d81107c3d0ffa3121fe974b73f068c8282cb1c32328113b6c" 4173 + dependencies = [ 4174 + "futures-util", 4175 + "log", 4176 + "rustls 0.21.12", 4177 + "rustls-native-certs 0.6.3", 4178 + "tokio", 4179 + "tokio-rustls 0.24.1", 4180 + "tungstenite 0.20.1", 4181 + "webpki-roots 0.25.4", 4182 + ] 4183 + 4184 + [[package]] 4185 + name = "tokio-tungstenite" 4186 version = "0.24.0" 4187 source = "registry+https://github.com/rust-lang/crates.io-index" 4188 checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9" 4189 dependencies = [ 4190 "futures-util", 4191 "log", 4192 + "rustls 0.23.31", 4193 + "rustls-pki-types", 4194 "tokio", 4195 + "tokio-rustls 0.26.2", 4196 + "tungstenite 0.24.0", 4197 + "webpki-roots 0.26.11", 4198 ] 4199 4200 [[package]] ··· 4261 "bitflags 2.9.1", 4262 "bytes", 4263 "futures-util", 4264 + "http 1.3.1", 4265 "http-body", 4266 "iri-string", 4267 "pin-project-lite", ··· 4363 4364 [[package]] 4365 name = "tungstenite" 4366 + version = "0.20.1" 4367 + source = "registry+https://github.com/rust-lang/crates.io-index" 4368 + checksum = "9e3dac10fd62eaf6617d3a904ae222845979aec67c615d1c842b4002c7666fb9" 4369 + dependencies = [ 4370 + "byteorder", 4371 + "bytes", 4372 + "data-encoding", 4373 + "http 0.2.12", 4374 + "httparse", 4375 + "log", 4376 + "rand 0.8.5", 4377 + "rustls 0.21.12", 4378 + "sha1", 4379 + "thiserror 1.0.69", 4380 + "url", 4381 + "utf-8", 4382 + ] 4383 + 4384 + [[package]] 4385 + name = "tungstenite" 4386 version = "0.24.0" 4387 source = "registry+https://github.com/rust-lang/crates.io-index" 4388 checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a" ··· 4390 "byteorder", 4391 "bytes", 4392 "data-encoding", 4393 + "http 1.3.1", 4394 "httparse", 4395 "log", 4396 "rand 0.8.5", 4397 + "rustls 0.23.31", 4398 + "rustls-pki-types", 4399 "sha1", 4400 "thiserror 1.0.69", 4401 "utf-8", ··· 4414 "atrium-api", 4415 "atrium-xrpc", 4416 "chrono", 4417 + "http 1.3.1", 4418 "ipld-core", 4419 "langtag", 4420 "regex", ··· 4423 "serde_ipld_dagcbor", 4424 "serde_json", 4425 "thiserror 2.0.12", 4426 ] 4427 4428 [[package]] ··· 4679 ] 4680 4681 [[package]] 4682 + name = "wasm-streams" 4683 + version = "0.4.2" 4684 + source = "registry+https://github.com/rust-lang/crates.io-index" 4685 + checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" 4686 + dependencies = [ 4687 + "futures-util", 4688 + "js-sys", 4689 + "wasm-bindgen", 4690 + "wasm-bindgen-futures", 4691 + "web-sys", 4692 + ] 4693 + 4694 + [[package]] 4695 name = "web-sys" 4696 version = "0.3.77" 4697 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4709 dependencies = [ 4710 "js-sys", 4711 "wasm-bindgen", 4712 + ] 4713 + 4714 + [[package]] 4715 + name = "webpki-roots" 4716 + version = "0.25.4" 4717 + source = "registry+https://github.com/rust-lang/crates.io-index" 4718 + checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" 4719 + 4720 + [[package]] 4721 + name = "webpki-roots" 4722 + version = "0.26.11" 4723 + source = "registry+https://github.com/rust-lang/crates.io-index" 4724 + checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" 4725 + dependencies = [ 4726 + "webpki-roots 1.0.2", 4727 + ] 4728 + 4729 + [[package]] 4730 + name = "webpki-roots" 4731 + version = "1.0.2" 4732 + source = "registry+https://github.com/rust-lang/crates.io-index" 4733 + checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" 4734 + dependencies = [ 4735 + "rustls-pki-types", 4736 ] 4737 4738 [[package]] ··· 4908 ] 4909 4910 [[package]] 4911 name = "windows-result" 4912 version = "0.1.2" 4913 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4967 source = "registry+https://github.com/rust-lang/crates.io-index" 4968 checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" 4969 dependencies = [ 4970 + "windows-targets 0.53.3", 4971 ] 4972 4973 [[package]] ··· 5003 5004 [[package]] 5005 name = "windows-targets" 5006 + version = "0.53.3" 5007 source = "registry+https://github.com/rust-lang/crates.io-index" 5008 + checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" 5009 dependencies = [ 5010 + "windows-link", 5011 "windows_aarch64_gnullvm 0.53.0", 5012 "windows_aarch64_msvc 0.53.0", 5013 "windows_i686_gnu 0.53.0",
+30 -8
Cargo.toml
··· 1 [workspace] 2 - members = ["apps/aqua", "services/cadet", "services/rocketman"] 3 resolver = "2" 4 5 [workspace.dependencies] 6 # Shared dependencies 7 - tokio = { version = "1.0", features = ["rt-multi-thread", "macros"] } 8 axum = { version = "0.8", features = ["macros"] } 9 tower-http = { version = "0.6", features = ["cors"] } 10 - sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "uuid"] } 11 serde = { version = "1.0", features = ["derive"] } 12 anyhow = "1.0" 13 serde_json = "1.0" 14 tracing = "0.1" 15 tracing-subscriber = "0.3" 16 metrics = "0.23" 17 - reqwest = { version = "0.12", features = ["json"] } 18 url = "2.5" 19 rand = "0.8" 20 flume = "0.11" 21 async-trait = "0.1" 22 time = "0.3" 23 dotenvy = "0.15" 24 - tokio-tungstenite = "0.24" 25 atrium-api = "0.25" 26 chrono = "0.4" 27 uuid = { version = "1.0", features = ["v4", "serde"] } 28 types = { path = "services/types" } 29 - rocketman = { path = "services/rocketman" } 30 31 # CAR and IPLD dependencies 32 - iroh-car = "0.4" 33 libipld = { version = "0.16", features = ["dag-cbor", "dag-json"] } 34 cid = "0.11" 35 base64 = "0.22" 36 37 # Redis for job queues and caching 38 - redis = { version = "0.24", features = ["tokio-comp", "connection-manager"] }
··· 1 [workspace] 2 + members = ["apps/aqua", "services/cadet", "tools/teal-cli"] 3 + default-members = ["services/types"] 4 resolver = "2" 5 6 [workspace.dependencies] 7 # Shared dependencies 8 + tokio = { version = "1.0", features = [ 9 + "rt-multi-thread", 10 + "macros", 11 + "time", 12 + "net", 13 + "sync", 14 + ] } 15 axum = { version = "0.8", features = ["macros"] } 16 tower-http = { version = "0.6", features = ["cors"] } 17 + sqlx = { version = "0.8", features = [ 18 + "runtime-tokio", 19 + "postgres", 20 + "uuid", 21 + "tls-rustls", 22 + ] } 23 serde = { version = "1.0", features = ["derive"] } 24 anyhow = "1.0" 25 serde_json = "1.0" 26 tracing = "0.1" 27 tracing-subscriber = "0.3" 28 metrics = "0.23" 29 + reqwest = { version = "0.12", default-features = false, features = [ 30 + "json", 31 + "rustls-tls", 32 + "stream", 33 + "gzip", 34 + ] } 35 url = "2.5" 36 rand = "0.8" 37 flume = "0.11" 38 async-trait = "0.1" 39 time = "0.3" 40 dotenvy = "0.15" 41 + tokio-tungstenite = { version = "*", default-features = false, features = [ 42 + "rustls-tls-webpki-roots", 43 + "connect", 44 + "handshake", 45 + ] } 46 atrium-api = "0.25" 47 chrono = "0.4" 48 uuid = { version = "1.0", features = ["v4", "serde"] } 49 types = { path = "services/types" } 50 + rocketman = "0.2.3" 51 52 # CAR and IPLD dependencies 53 + iroh-car = "0.5" 54 libipld = { version = "0.16", features = ["dag-cbor", "dag-json"] } 55 cid = "0.11" 56 base64 = "0.22" 57 + atmst = "0.0.1" 58 59 # Redis for job queues and caching 60 + redis = { version = "0.32", features = ["tokio-comp", "connection-manager"] }
+18
Cross.toml
···
··· 1 + [build.env] 2 + passthrough = [ 3 + "CARGO_HOME", 4 + "CARGO_TARGET_DIR", 5 + "SQLX_OFFLINE", 6 + "PKG_CONFIG_ALLOW_CROSS", 7 + ] 8 + 9 + [target.aarch64-unknown-linux-gnu] 10 + image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main" 11 + 12 + [target.aarch64-unknown-linux-gnu.env] 13 + passthrough = [ 14 + "CARGO_HOME", 15 + "CARGO_TARGET_DIR", 16 + "SQLX_OFFLINE", 17 + "PKG_CONFIG_ALLOW_CROSS", 18 + ]
+14 -9
README.md
··· 29 # Install all dependencies (Node.js and Rust) 30 pnpm install 31 32 # Set up environment configuration 33 cp apps/aqua/.env.example apps/aqua/.env 34 ··· 90 - **Format**: `YYYYMMDDHHMMSS_description.sql` (timestamped SQL files) 91 - **Type**: Forward-only SQL migrations managed by SQLx 92 93 - #### Database Schema 94 - 95 - The database includes tables for: 96 - - **Music data**: `artists`, `releases`, `recordings`, `plays` 97 - - **User data**: `profiles`, `statii` (status records), `featured_items` 98 - - **CAR imports**: `car_import_requests`, `car_blocks`, `car_extracted_records` 99 - - **Analytics**: Materialized views for play counts and top charts 100 - 101 ## Development 102 103 To start the development server run: ··· 106 turbo dev --filter=@teal/aqua 107 ``` 108 109 - Open http://localhost:3000/ with your browser to see the home page. You will need to login with Bluesky to test the posting functionality of the app. Note: if the redirect back to the app after you login isn't working correctly, you may need to replace the `127.0.0.1` with `localhost`. 110 111 ### Running the full stack in docker for development 112 ··· 153 154 # Show lexicon change impact 155 pnpm lex:diff 156 ``` 157 158 See [`tools/lexicon-cli/README.md`](tools/lexicon-cli/README.md) for detailed documentation.
··· 29 # Install all dependencies (Node.js and Rust) 30 pnpm install 31 32 + # clone submodules 33 + git submodule update --init --recursive 34 + 35 # Set up environment configuration 36 cp apps/aqua/.env.example apps/aqua/.env 37 ··· 93 - **Format**: `YYYYMMDDHHMMSS_description.sql` (timestamped SQL files) 94 - **Type**: Forward-only SQL migrations managed by SQLx 95 96 ## Development 97 98 To start the development server run: ··· 101 turbo dev --filter=@teal/aqua 102 ``` 103 104 + Open http://localhost:3000/ with your browser to see the home page. Note: if the redirect back to the app after you login isn't working correctly, you may need to replace the `127.0.0.1` with `localhost`, or you may need to set up a publicly accessible endpoint for the app to post to (see below). 105 106 ### Running the full stack in docker for development 107 ··· 148 149 # Show lexicon change impact 150 pnpm lex:diff 151 + ``` 152 + 153 + # Updating Vendored Lexicons 154 + To update vendored lexicons (anything that's not under fm.teal), follow these steps: 155 + ```bash 156 + cd vendor/atproto 157 + git pull origin main 158 + cd ../.. 159 + git add vendor/atproto 160 + git commit -m "Update atproto lexicons to latest" 161 ``` 162 163 See [`tools/lexicon-cli/README.md`](tools/lexicon-cli/README.md) for detailed documentation.
+24 -14
apps/amethyst/Dockerfile
··· 18 COPY packages/lexicons/ ./packages/lexicons/ 19 COPY packages/tsconfig/ ./packages/tsconfig/ 20 21 # Copy the aqua app 22 COPY apps/amethyst/ ./apps/amethyst/ 23 24 # Copy .env 25 COPY ../../.env ./apps/amethyst/.env 26 27 - # Build the aqua app 28 WORKDIR /app/apps/amethyst 29 - RUN pnpm install 30 RUN pnpm run build:web 31 32 #create the client-json 33 RUN echo '{ \ 34 - "redirect_uris": ["https://'"${CLIENT_ADDRESS}"'/auth/callback"], \ 35 - "response_types": ["code"], \ 36 - "grant_types": ["authorization_code", "refresh_token"], \ 37 - "scope": "atproto transition:generic", \ 38 - "token_endpoint_auth_method": "none", \ 39 - "application_type": "web", \ 40 - "client_id": "https://'"${CLIENT_ADDRESS}"'/client-metadata.json", \ 41 - "client_name": "teal", \ 42 - "client_uri": "https://'"${CLIENT_ADDRESS}"'", \ 43 - "dpop_bound_access_tokens": true \ 44 - }' > /app/client-metadata.json 45 46 47 FROM caddy:2.1.0-alpine AS caddy ··· 50 EXPOSE 443/udp 51 COPY /apps/amethyst/Caddyfile /etc/caddy/Caddyfile 52 COPY --from=builder /app/apps/amethyst/build /srv 53 - COPY --from=builder /app/client-metadata.json /srv/client-metadata.json
··· 18 COPY packages/lexicons/ ./packages/lexicons/ 19 COPY packages/tsconfig/ ./packages/tsconfig/ 20 21 + # Copy lexicons source data 22 + COPY lexicons/ ./lexicons/ 23 + 24 # Copy the aqua app 25 COPY apps/amethyst/ ./apps/amethyst/ 26 27 # Copy .env 28 COPY ../../.env ./apps/amethyst/.env 29 30 + # Install dependencies and generate lexicons 31 + RUN cd tools/lexicon-cli && pnpm build 32 + 33 + # Generate lexicons before building amethyst 34 + RUN pnpm lex:gen-server 35 + 36 + RUN pnpm install 37 + 38 + # Build the amethyst app 39 WORKDIR /app/apps/amethyst 40 RUN pnpm run build:web 41 42 #create the client-json 43 RUN echo '{ \ 44 + "redirect_uris": ["https://'"${CLIENT_ADDRESS}"'/auth/callback"], \ 45 + "response_types": ["code"], \ 46 + "grant_types": ["authorization_code", "refresh_token"], \ 47 + "scope": "atproto transition:generic", \ 48 + "token_endpoint_auth_method": "none", \ 49 + "application_type": "web", \ 50 + "client_id": "https://'"${CLIENT_ADDRESS}"'/client-metadata.json", \ 51 + "client_name": "teal", \ 52 + "client_uri": "https://'"${CLIENT_ADDRESS}"'", \ 53 + "dpop_bound_access_tokens": true \ 54 + }' > /app/client-metadata.json 55 56 57 FROM caddy:2.1.0-alpine AS caddy ··· 60 EXPOSE 443/udp 61 COPY /apps/amethyst/Caddyfile /etc/caddy/Caddyfile 62 COPY --from=builder /app/apps/amethyst/build /srv 63 + COPY --from=builder /app/client-metadata.json /srv/client-metadata.json
+2 -2
apps/aqua/Cargo.toml
··· 19 tracing-subscriber.workspace = true 20 sqlx = { workspace = true, features = ["time"] } 21 dotenvy.workspace = true 22 - 23 types.workspace = true 24 - chrono = "0.4.41" 25 26 # CAR import functionality 27 iroh-car.workspace = true ··· 29 reqwest.workspace = true 30 url.workspace = true 31 clap = { version = "4.0", features = ["derive"] } 32 33 # Redis for job queues 34 redis.workspace = true
··· 19 tracing-subscriber.workspace = true 20 sqlx = { workspace = true, features = ["time"] } 21 dotenvy.workspace = true 22 types.workspace = true 23 + chrono.workspace = true 24 25 # CAR import functionality 26 iroh-car.workspace = true ··· 28 reqwest.workspace = true 29 url.workspace = true 30 clap = { version = "4.0", features = ["derive"] } 31 + atmst.workspace = true 32 33 # Redis for job queues 34 redis.workspace = true
+20
apps/aqua/Cross.toml
···
··· 1 + [build.env] 2 + passthrough = [ 3 + "CARGO_HOME", 4 + "CARGO_TARGET_DIR", 5 + "SQLX_OFFLINE", 6 + "PKG_CONFIG_ALLOW_CROSS", 7 + ] 8 + 9 + [target.aarch64-unknown-linux-gnu] 10 + image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main" 11 + 12 + [target.aarch64-unknown-linux-gnu.env] 13 + passthrough = ["CARGO_HOME", "CARGO_TARGET_DIR", "SQLX_OFFLINE"] 14 + # Allow cross-compilation of native dependencies 15 + PKG_CONFIG_ALLOW_CROSS = "1" 16 + # Use static linking to reduce runtime dependencies 17 + RUSTFLAGS = "-C target-feature=+crt-static -C link-arg=-s" 18 + # Disable problematic features that might require OpenSSL 19 + CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc" 20 + CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"
+90
apps/aqua/Dockerfile
···
··· 1 + # Docker build args for cross-platform builds (must be at the top) 2 + ARG TARGETPLATFORM 3 + ARG BUILDPLATFORM 4 + ARG TARGETARCH 5 + ARG TARGETOS 6 + 7 + FROM --platform=${BUILDPLATFORM} rust:latest AS buildah 8 + 9 + # Create appuser 10 + ENV USER=app 11 + ENV UID=10001 12 + 13 + RUN adduser \ 14 + --disabled-password \ 15 + --gecos "" \ 16 + --home "/nonexistent" \ 17 + --shell "/sbin/nologin" \ 18 + --no-create-home \ 19 + --uid "${UID}" \ 20 + "${USER}" 21 + 22 + WORKDIR /buildah 23 + 24 + # Re-declare ARGs after FROM (Docker requirement) 25 + ARG TARGETPLATFORM 26 + ARG BUILDPLATFORM 27 + ARG TARGETARCH 28 + ARG TARGETOS 29 + 30 + # Debug platform detection before copying files 31 + RUN echo "DEBUG Before copy: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH BUILDPLATFORM=$BUILDPLATFORM" 32 + 33 + COPY ./ . 34 + 35 + # Setup lexicons and install dependencies 36 + RUN ./scripts/setup-lexicons.sh 37 + 38 + # Install Node.js and pnpm for lexicon generation 39 + RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/* 40 + RUN npm install -g pnpm 41 + 42 + # Install dependencies and generate lexicons 43 + RUN pnpm install 44 + RUN cd tools/lexicon-cli && pnpm build 45 + RUN pnpm lex:gen --rust-only 46 + 47 + # Install cross-compilation toolchains 48 + RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu 49 + 50 + # Enable ARM64 architecture and install cross-compilation tools 51 + RUN dpkg --add-architecture arm64 && \ 52 + apt-get update && \ 53 + apt-get install -y \ 54 + gcc-aarch64-linux-gnu \ 55 + libssl-dev:arm64 \ 56 + libssl-dev \ 57 + pkg-config \ 58 + && rm -rf /var/lib/apt/lists/* 59 + 60 + # Set up cross-compilation environment 61 + ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc 62 + ENV PKG_CONFIG_ALLOW_CROSS=1 63 + ENV PKG_CONFIG_PATH_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu/pkgconfig 64 + ENV OPENSSL_DIR_aarch64_unknown_linux_gnu=/usr 65 + ENV OPENSSL_LIB_DIR_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu 66 + ENV OPENSSL_INCLUDE_DIR_aarch64_unknown_linux_gnu=/usr/include/openssl 67 + 68 + 69 + # Debug platform detection and run build 70 + RUN . ./target.sh && \ 71 + touch apps/aqua/src/main.rs && \ 72 + echo "Building for $TARGET_ARCH" && \ 73 + cargo build --release --target $RUST_TARGET --package aqua && \ 74 + cp target/$RUST_TARGET/release/aqua target/aqua 75 + 76 + FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc 77 + 78 + # Import from builder. 79 + COPY --from=buildah /etc/passwd /etc/passwd 80 + COPY --from=buildah /etc/group /etc/group 81 + 82 + WORKDIR /app 83 + 84 + # Copy our build 85 + COPY --from=buildah /buildah/target/aqua ./ 86 + 87 + # Use an unprivileged user. 88 + USER app:app 89 + 90 + CMD ["/app/aqua"]
+62
apps/aqua/examples/did_demo.rs
···
··· 1 + use serde_json::json; 2 + 3 + /// Generate a DID document for did:web 4 + fn generate_did_document(host: &str) -> serde_json::Value { 5 + json!({ 6 + "@context": [ 7 + "https://www.w3.org/ns/did/v1", 8 + "https://w3id.org/security/multikey/v1", 9 + "https://w3id.org/security/suites/secp256k1-2019/v1" 10 + ], 11 + "id": format!("did:web:{}", host), 12 + "alsoKnownAs": [ 13 + format!("at://{}", host) 14 + ], 15 + "service": [ 16 + { 17 + "id": "#bsky_fg", 18 + "type": "BskyFeedGenerator", 19 + "serviceEndpoint": format!("https://{}", host) 20 + }, 21 + { 22 + "id": "#atproto_pds", 23 + "type": "AtprotoPersonalDataServer", 24 + "serviceEndpoint": format!("https://{}", host) 25 + } 26 + ], 27 + "verificationMethod": [ 28 + { 29 + "id": format!("did:web:{}#atproto", host), 30 + "type": "Multikey", 31 + "controller": format!("did:web:{}", host), 32 + "publicKeyMultibase": "z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK" 33 + } 34 + ] 35 + }) 36 + } 37 + 38 + fn main() { 39 + println!("DID Document Generation Demo"); 40 + println!("===========================\n"); 41 + 42 + let test_hosts = vec![ 43 + "localhost:3000", 44 + "bsky.social", 45 + "my-atproto-service.com", 46 + "example.org:8080", 47 + ]; 48 + 49 + for host in test_hosts { 50 + println!("DID Document for host: {}", host); 51 + println!("URL: https://{}/.well-known/did.json", host); 52 + println!("DID: did:web:{}", host); 53 + println!(); 54 + 55 + let did_doc = generate_did_document(host); 56 + println!("{}", serde_json::to_string_pretty(&did_doc).unwrap()); 57 + println!("\n{}\n", "=".repeat(80)); 58 + } 59 + 60 + println!("The well-known endpoint /.well-known/did.json will serve this JSON structure"); 61 + println!("when accessed via HTTP GET request to your Aqua server."); 62 + }
+275 -82
apps/aqua/src/api/mod.rs
··· 1 use axum::{Extension, Json, extract::Multipart, extract::Path, http::StatusCode}; 2 use serde::{Deserialize, Serialize}; 3 - use tracing::{info, error}; 4 - use anyhow::Result; 5 - use uuid; 6 - 7 - use sys_info; 8 9 use crate::ctx::Context; 10 use crate::redis_client::RedisClient; 11 12 #[derive(Debug, Serialize, Deserialize)] 13 pub struct MetaOsInfo { ··· 61 /// Get CAR import job status 62 pub async fn get_car_import_job_status( 63 Path(job_id): Path<String>, 64 - ) -> Result<Json<types::jobs::CarImportJobStatus>, (StatusCode, Json<ErrorResponse>)> { 65 - use types::jobs::queue_keys; 66 - 67 info!("Getting status for job: {}", job_id); 68 - 69 // Parse job ID 70 let job_uuid = match uuid::Uuid::parse_str(&job_id) { 71 Ok(uuid) => uuid, ··· 77 return Err((StatusCode::BAD_REQUEST, Json(error_response))); 78 } 79 }; 80 - 81 // Connect to Redis 82 - let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 83 let redis_client = match RedisClient::new(&redis_url) { 84 Ok(client) => client, 85 Err(e) => { ··· 91 return Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response))); 92 } 93 }; 94 - 95 // Get job status 96 - match redis_client.get_job_status(&queue_keys::job_status_key(&job_uuid)).await { 97 - Ok(Some(status_data)) => { 98 - match serde_json::from_str::<types::jobs::CarImportJobStatus>(&status_data) { 99 - Ok(status) => Ok(Json(status)), 100 - Err(e) => { 101 - error!("Failed to parse job status: {}", e); 102 - let error_response = ErrorResponse { 103 - error: "Failed to parse job status".to_string(), 104 - details: Some(e.to_string()), 105 - }; 106 - Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response))) 107 - } 108 } 109 - } 110 Ok(None) => { 111 let error_response = ErrorResponse { 112 error: "Job not found".to_string(), ··· 165 mut multipart: Multipart, 166 ) -> Result<Json<CarImportResponse>, StatusCode> { 167 info!("Received CAR file upload request"); 168 - 169 let mut car_data: Option<Vec<u8>> = None; 170 let mut import_id: Option<String> = None; 171 let mut description: Option<String> = None; 172 - 173 // Process multipart form data 174 - while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? { 175 let name = field.name().unwrap_or("").to_string(); 176 - 177 match name.as_str() { 178 "car_file" => { 179 let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?; ··· 192 } 193 } 194 } 195 - 196 let car_bytes = car_data.ok_or(StatusCode::BAD_REQUEST)?; 197 let final_import_id = import_id.unwrap_or_else(|| { 198 // Generate a unique import ID 199 format!("car-import-{}", chrono::Utc::now().timestamp()) 200 }); 201 - 202 // Validate CAR file format 203 match validate_car_file(&car_bytes).await { 204 Ok(_) => { 205 - info!("CAR file validation successful for import {}", final_import_id); 206 } 207 Err(e) => { 208 error!("CAR file validation failed: {}", e); 209 return Err(StatusCode::BAD_REQUEST); 210 } 211 } 212 - 213 // Store CAR import request in database for processing 214 - match store_car_import_request(&ctx, &final_import_id, &car_bytes, description.as_deref()).await { 215 Ok(_) => { 216 - info!("CAR import request stored successfully: {}", final_import_id); 217 Ok(Json(CarImportResponse { 218 import_id: final_import_id, 219 status: "queued".to_string(), ··· 232 axum::extract::Path(import_id): axum::extract::Path<String>, 233 ) -> Result<Json<CarImportResponse>, StatusCode> { 234 match get_import_status(&ctx, &import_id).await { 235 - Ok(Some(status)) => { 236 - Ok(Json(CarImportResponse { 237 - import_id, 238 - status: status.status, 239 - message: status.message, 240 - })) 241 - } 242 Ok(None) => Err(StatusCode::NOT_FOUND), 243 Err(e) => { 244 error!("Failed to get import status: {}", e); ··· 248 } 249 250 async fn validate_car_file(car_data: &[u8]) -> Result<()> { 251 - use std::io::Cursor; 252 use iroh_car::CarReader; 253 - 254 let cursor = Cursor::new(car_data); 255 let reader = CarReader::new(cursor).await?; 256 let header = reader.header(); 257 - 258 // Basic validation - ensure we have at least one root CID 259 if header.roots().is_empty() { 260 return Err(anyhow::anyhow!("CAR file has no root CIDs")); 261 } 262 - 263 info!("CAR file validated: {} root CIDs", header.roots().len()); 264 Ok(()) 265 } ··· 293 Extension(ctx): Extension<Context>, 294 Json(request): Json<FetchCarRequest>, 295 ) -> Result<Json<FetchCarResponse>, (StatusCode, Json<ErrorResponse>)> { 296 - info!("Received CAR fetch request for user: {}", request.user_identifier); 297 - 298 // Resolve user identifier to DID and PDS 299 let (user_did, pds_host) = match resolve_user_to_pds(&request.user_identifier).await { 300 Ok(result) => result, ··· 302 error!("Failed to resolve user {}: {}", request.user_identifier, e); 303 let error_response = ErrorResponse { 304 error: "Failed to resolve user".to_string(), 305 - details: if request.debug.unwrap_or(false) { Some(e.to_string()) } else { None }, 306 }; 307 return Err((StatusCode::BAD_REQUEST, Json(error_response))); 308 } 309 }; 310 - 311 - info!("Resolved {} to DID {} on PDS {}", request.user_identifier, user_did, pds_host); 312 - 313 // Generate import ID 314 - let import_id = format!("pds-fetch-{}-{}", 315 - user_did.replace(":", "-"), 316 chrono::Utc::now().timestamp() 317 ); 318 - 319 // Fetch CAR file from PDS 320 match fetch_car_from_pds(&pds_host, &user_did, request.since.as_deref()).await { 321 Ok(car_data) => { 322 - info!("Successfully fetched CAR file for {} ({} bytes)", user_did, car_data.len()); 323 - 324 // Store the fetched CAR file for processing 325 - let description = Some(format!("Fetched from PDS {} for user {}", pds_host, request.user_identifier)); 326 - match store_car_import_request(&ctx, &import_id, &car_data, description.as_deref()).await { 327 Ok(_) => { 328 info!("CAR import request stored successfully: {}", import_id); 329 Ok(Json(FetchCarResponse { ··· 371 372 /// Resolve a handle to a DID using com.atproto.identity.resolveHandle 373 async fn resolve_handle_to_did(handle: &str) -> Result<String> { 374 - let url = format!("https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", handle); 375 - 376 let response = reqwest::get(&url).await?; 377 if !response.status().is_success() { 378 - return Err(anyhow::anyhow!("Failed to resolve handle {}: {}", handle, response.status())); 379 } 380 - 381 let json: serde_json::Value = response.json().await?; 382 - let did = json["did"].as_str() 383 .ok_or_else(|| anyhow::anyhow!("No DID found in response for handle {}", handle))?; 384 - 385 Ok(did.to_string()) 386 } 387 ··· 390 // For DID:plc, use the PLC directory 391 if did.starts_with("did:plc:") { 392 let url = format!("https://plc.directory/{}", did); 393 - 394 let response = reqwest::get(&url).await?; 395 if !response.status().is_success() { 396 - return Err(anyhow::anyhow!("Failed to resolve DID {}: {}", did, response.status())); 397 } 398 - 399 let doc: serde_json::Value = response.json().await?; 400 - 401 // Find the PDS service endpoint 402 if let Some(services) = doc["service"].as_array() { 403 for service in services { ··· 405 if let Some(endpoint) = service["serviceEndpoint"].as_str() { 406 // Extract hostname from URL 407 let url = url::Url::parse(endpoint)?; 408 - let host = url.host_str() 409 - .ok_or_else(|| anyhow::anyhow!("Invalid PDS endpoint URL: {}", endpoint))?; 410 return Ok(host.to_string()); 411 } 412 } 413 } 414 } 415 - 416 - Err(anyhow::anyhow!("No PDS service found in DID document for {}", did)) 417 } else { 418 Err(anyhow::anyhow!("Unsupported DID method: {}", did)) 419 } ··· 421 422 /// Fetch CAR file from PDS using com.atproto.sync.getRepo 423 pub async fn fetch_car_from_pds(pds_host: &str, did: &str, since: Option<&str>) -> Result<Vec<u8>> { 424 - let mut url = format!("https://{}/xrpc/com.atproto.sync.getRepo?did={}", pds_host, did); 425 - 426 if let Some(since_rev) = since { 427 url.push_str(&format!("&since={}", since_rev)); 428 } 429 - 430 info!("Fetching CAR file from: {}", url); 431 - 432 let response = reqwest::get(&url).await?; 433 if !response.status().is_success() { 434 - return Err(anyhow::anyhow!("Failed to fetch CAR from PDS {}: {}", pds_host, response.status())); 435 } 436 - 437 // Verify content type 438 - let content_type = response.headers() 439 .get("content-type") 440 .and_then(|h| h.to_str().ok()) 441 .unwrap_or(""); 442 - 443 if !content_type.contains("application/vnd.ipld.car") { 444 return Err(anyhow::anyhow!("Unexpected content type: {}", content_type)); 445 } 446 - 447 let car_data = response.bytes().await?; 448 Ok(car_data.to_vec()) 449 }
··· 1 + use anyhow::Result; 2 use axum::{Extension, Json, extract::Multipart, extract::Path, http::StatusCode}; 3 use serde::{Deserialize, Serialize}; 4 + use serde_json::{Value, json}; 5 + use tracing::{error, info}; 6 7 use crate::ctx::Context; 8 use crate::redis_client::RedisClient; 9 + use crate::types::CarImportJobStatus; 10 11 #[derive(Debug, Serialize, Deserialize)] 12 pub struct MetaOsInfo { ··· 60 /// Get CAR import job status 61 pub async fn get_car_import_job_status( 62 Path(job_id): Path<String>, 63 + ) -> Result<Json<CarImportJobStatus>, (StatusCode, Json<ErrorResponse>)> { 64 + use crate::types::queue_keys; 65 + 66 info!("Getting status for job: {}", job_id); 67 + 68 // Parse job ID 69 let job_uuid = match uuid::Uuid::parse_str(&job_id) { 70 Ok(uuid) => uuid, ··· 76 return Err((StatusCode::BAD_REQUEST, Json(error_response))); 77 } 78 }; 79 + 80 // Connect to Redis 81 + let redis_url = 82 + std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 83 let redis_client = match RedisClient::new(&redis_url) { 84 Ok(client) => client, 85 Err(e) => { ··· 91 return Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response))); 92 } 93 }; 94 + 95 // Get job status 96 + match redis_client 97 + .get_job_status(&queue_keys::job_status_key(&job_uuid)) 98 + .await 99 + { 100 + Ok(Some(status_data)) => match serde_json::from_str::<CarImportJobStatus>(&status_data) { 101 + Ok(status) => Ok(Json(status)), 102 + Err(e) => { 103 + error!("Failed to parse job status: {}", e); 104 + let error_response = ErrorResponse { 105 + error: "Failed to parse job status".to_string(), 106 + details: Some(e.to_string()), 107 + }; 108 + Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response))) 109 } 110 + }, 111 Ok(None) => { 112 let error_response = ErrorResponse { 113 error: "Job not found".to_string(), ··· 166 mut multipart: Multipart, 167 ) -> Result<Json<CarImportResponse>, StatusCode> { 168 info!("Received CAR file upload request"); 169 + 170 let mut car_data: Option<Vec<u8>> = None; 171 let mut import_id: Option<String> = None; 172 let mut description: Option<String> = None; 173 + 174 // Process multipart form data 175 + while let Some(field) = multipart 176 + .next_field() 177 + .await 178 + .map_err(|_| StatusCode::BAD_REQUEST)? 179 + { 180 let name = field.name().unwrap_or("").to_string(); 181 + 182 match name.as_str() { 183 "car_file" => { 184 let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?; ··· 197 } 198 } 199 } 200 + 201 let car_bytes = car_data.ok_or(StatusCode::BAD_REQUEST)?; 202 let final_import_id = import_id.unwrap_or_else(|| { 203 // Generate a unique import ID 204 format!("car-import-{}", chrono::Utc::now().timestamp()) 205 }); 206 + 207 // Validate CAR file format 208 match validate_car_file(&car_bytes).await { 209 Ok(_) => { 210 + info!( 211 + "CAR file validation successful for import {}", 212 + final_import_id 213 + ); 214 } 215 Err(e) => { 216 error!("CAR file validation failed: {}", e); 217 return Err(StatusCode::BAD_REQUEST); 218 } 219 } 220 + 221 // Store CAR import request in database for processing 222 + match store_car_import_request(&ctx, &final_import_id, &car_bytes, description.as_deref()).await 223 + { 224 Ok(_) => { 225 + info!( 226 + "CAR import request stored successfully: {}", 227 + final_import_id 228 + ); 229 Ok(Json(CarImportResponse { 230 import_id: final_import_id, 231 status: "queued".to_string(), ··· 244 axum::extract::Path(import_id): axum::extract::Path<String>, 245 ) -> Result<Json<CarImportResponse>, StatusCode> { 246 match get_import_status(&ctx, &import_id).await { 247 + Ok(Some(status)) => Ok(Json(CarImportResponse { 248 + import_id, 249 + status: status.status, 250 + message: status.message, 251 + })), 252 Ok(None) => Err(StatusCode::NOT_FOUND), 253 Err(e) => { 254 error!("Failed to get import status: {}", e); ··· 258 } 259 260 async fn validate_car_file(car_data: &[u8]) -> Result<()> { 261 use iroh_car::CarReader; 262 + use std::io::Cursor; 263 + 264 let cursor = Cursor::new(car_data); 265 let reader = CarReader::new(cursor).await?; 266 let header = reader.header(); 267 + 268 // Basic validation - ensure we have at least one root CID 269 if header.roots().is_empty() { 270 return Err(anyhow::anyhow!("CAR file has no root CIDs")); 271 } 272 + 273 info!("CAR file validated: {} root CIDs", header.roots().len()); 274 Ok(()) 275 } ··· 303 Extension(ctx): Extension<Context>, 304 Json(request): Json<FetchCarRequest>, 305 ) -> Result<Json<FetchCarResponse>, (StatusCode, Json<ErrorResponse>)> { 306 + info!( 307 + "Received CAR fetch request for user: {}", 308 + request.user_identifier 309 + ); 310 + 311 // Resolve user identifier to DID and PDS 312 let (user_did, pds_host) = match resolve_user_to_pds(&request.user_identifier).await { 313 Ok(result) => result, ··· 315 error!("Failed to resolve user {}: {}", request.user_identifier, e); 316 let error_response = ErrorResponse { 317 error: "Failed to resolve user".to_string(), 318 + details: if request.debug.unwrap_or(false) { 319 + Some(e.to_string()) 320 + } else { 321 + None 322 + }, 323 }; 324 return Err((StatusCode::BAD_REQUEST, Json(error_response))); 325 } 326 }; 327 + 328 + info!( 329 + "Resolved {} to DID {} on PDS {}", 330 + request.user_identifier, user_did, pds_host 331 + ); 332 + 333 // Generate import ID 334 + let import_id = format!( 335 + "pds-fetch-{}-{}", 336 + user_did.replace(":", "-"), 337 chrono::Utc::now().timestamp() 338 ); 339 + 340 // Fetch CAR file from PDS 341 match fetch_car_from_pds(&pds_host, &user_did, request.since.as_deref()).await { 342 Ok(car_data) => { 343 + info!( 344 + "Successfully fetched CAR file for {} ({} bytes)", 345 + user_did, 346 + car_data.len() 347 + ); 348 + 349 // Store the fetched CAR file for processing 350 + let description = Some(format!( 351 + "Fetched from PDS {} for user {}", 352 + pds_host, request.user_identifier 353 + )); 354 + match store_car_import_request(&ctx, &import_id, &car_data, description.as_deref()) 355 + .await 356 + { 357 Ok(_) => { 358 info!("CAR import request stored successfully: {}", import_id); 359 Ok(Json(FetchCarResponse { ··· 401 402 /// Resolve a handle to a DID using com.atproto.identity.resolveHandle 403 async fn resolve_handle_to_did(handle: &str) -> Result<String> { 404 + let url = format!( 405 + "https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", 406 + handle 407 + ); 408 + 409 let response = reqwest::get(&url).await?; 410 if !response.status().is_success() { 411 + return Err(anyhow::anyhow!( 412 + "Failed to resolve handle {}: {}", 413 + handle, 414 + response.status() 415 + )); 416 } 417 + 418 let json: serde_json::Value = response.json().await?; 419 + let did = json["did"] 420 + .as_str() 421 .ok_or_else(|| anyhow::anyhow!("No DID found in response for handle {}", handle))?; 422 + 423 Ok(did.to_string()) 424 } 425 ··· 428 // For DID:plc, use the PLC directory 429 if did.starts_with("did:plc:") { 430 let url = format!("https://plc.directory/{}", did); 431 + 432 let response = reqwest::get(&url).await?; 433 if !response.status().is_success() { 434 + return Err(anyhow::anyhow!( 435 + "Failed to resolve DID {}: {}", 436 + did, 437 + response.status() 438 + )); 439 } 440 + 441 let doc: serde_json::Value = response.json().await?; 442 + 443 // Find the PDS service endpoint 444 if let Some(services) = doc["service"].as_array() { 445 for service in services { ··· 447 if let Some(endpoint) = service["serviceEndpoint"].as_str() { 448 // Extract hostname from URL 449 let url = url::Url::parse(endpoint)?; 450 + let host = url.host_str().ok_or_else(|| { 451 + anyhow::anyhow!("Invalid PDS endpoint URL: {}", endpoint) 452 + })?; 453 return Ok(host.to_string()); 454 } 455 } 456 } 457 } 458 + 459 + Err(anyhow::anyhow!( 460 + "No PDS service found in DID document for {}", 461 + did 462 + )) 463 } else { 464 Err(anyhow::anyhow!("Unsupported DID method: {}", did)) 465 } ··· 467 468 /// Fetch CAR file from PDS using com.atproto.sync.getRepo 469 pub async fn fetch_car_from_pds(pds_host: &str, did: &str, since: Option<&str>) -> Result<Vec<u8>> { 470 + let mut url = format!( 471 + "https://{}/xrpc/com.atproto.sync.getRepo?did={}", 472 + pds_host, did 473 + ); 474 + 475 if let Some(since_rev) = since { 476 url.push_str(&format!("&since={}", since_rev)); 477 } 478 + 479 info!("Fetching CAR file from: {}", url); 480 + 481 let response = reqwest::get(&url).await?; 482 if !response.status().is_success() { 483 + return Err(anyhow::anyhow!( 484 + "Failed to fetch CAR from PDS {}: {}", 485 + pds_host, 486 + response.status() 487 + )); 488 } 489 + 490 // Verify content type 491 + let content_type = response 492 + .headers() 493 .get("content-type") 494 .and_then(|h| h.to_str().ok()) 495 .unwrap_or(""); 496 + 497 if !content_type.contains("application/vnd.ipld.car") { 498 return Err(anyhow::anyhow!("Unexpected content type: {}", content_type)); 499 } 500 + 501 let car_data = response.bytes().await?; 502 Ok(car_data.to_vec()) 503 } 504 + 505 + /// Generate a DID document for did:web 506 + fn generate_did_document(host: &str, pubkey: &str) -> Value { 507 + json!({ 508 + "@context": [ 509 + "https://www.w3.org/ns/did/v1", 510 + "https://w3id.org/security/multikey/v1", 511 + "https://w3id.org/security/suites/secp256k1-2019/v1" 512 + ], 513 + "id": format!("did:web:{}", host), 514 + "alsoKnownAs": [ 515 + format!("at://{}", host) 516 + ], 517 + "service": [ 518 + { 519 + "id": "#bsky_fg", 520 + "type": "BskyFeedGenerator", 521 + "serviceEndpoint": format!("https://{}", host) 522 + }, 523 + { 524 + "id": "#atproto_pds", 525 + "type": "AtprotoPersonalDataServer", 526 + "serviceEndpoint": format!("https://{}", host) 527 + } 528 + ], 529 + "verificationMethod": [ 530 + { 531 + "id": format!("did:web:{}#atproto", host), 532 + "type": "Multikey", 533 + "controller": format!("did:web:{}", host), 534 + "publicKeyMultibase": pubkey 535 + } 536 + ] 537 + }) 538 + } 539 + 540 + /// Handler for /.well-known/did.json endpoint 541 + pub async fn get_did_document( 542 + Extension(_ctx): Extension<Context>, 543 + ) -> impl axum::response::IntoResponse { 544 + // Get the host from environment variable or use default 545 + let host = std::env::var("APP_HOST") 546 + .or_else(|_| std::env::var("HOST")) 547 + .unwrap_or_else(|_| "localhost:3000".to_string()); 548 + 549 + // get pubkey from environment variable or use default 550 + let pubkey = std::env::var("TEST_PUBKEY").unwrap_or_else(|_| { 551 + "z6Mkw5f8g3h4j5k6l7m8n9o0p1q2r3s4t5u6v7w8x9y0z1a2b3c4d5e6f7g8h9i".to_string() 552 + }); 553 + 554 + let did_doc = generate_did_document(&host, &pubkey); 555 + 556 + ( 557 + StatusCode::OK, 558 + [("Content-Type", "application/json")], 559 + Json(did_doc), 560 + ) 561 + } 562 + 563 + #[cfg(test)] 564 + mod tests { 565 + use super::*; 566 + 567 + const TEST_PUBKEY: &str = "z6Mkw5f8g3h4j5k6l7m8n9o0p1q2r3s4t5u6v7w8x9y0z1a2b3c4d5e6f7g8h9i"; 568 + 569 + #[test] 570 + fn test_generate_did_document() { 571 + let host = "example.com"; 572 + let did_doc = generate_did_document(host, TEST_PUBKEY); 573 + 574 + // Verify the structure of the generated DID document 575 + assert_eq!(did_doc["id"], format!("did:web:{}", host)); 576 + assert_eq!(did_doc["alsoKnownAs"][0], format!("at://{}", host)); 577 + 578 + // Check services 579 + let services = did_doc["service"].as_array().unwrap(); 580 + assert_eq!(services.len(), 2); 581 + 582 + let bsky_fg = &services[0]; 583 + assert_eq!(bsky_fg["id"], "#bsky_fg"); 584 + assert_eq!(bsky_fg["type"], "BskyFeedGenerator"); 585 + assert_eq!(bsky_fg["serviceEndpoint"], format!("https://{}", host)); 586 + 587 + let atproto_pds = &services[1]; 588 + assert_eq!(atproto_pds["id"], "#atproto_pds"); 589 + assert_eq!(atproto_pds["type"], "AtprotoPersonalDataServer"); 590 + assert_eq!(atproto_pds["serviceEndpoint"], format!("https://{}", host)); 591 + 592 + // Check verification method 593 + let verification_methods = did_doc["verificationMethod"].as_array().unwrap(); 594 + assert_eq!(verification_methods.len(), 1); 595 + 596 + let vm = &verification_methods[0]; 597 + assert_eq!(vm["id"], format!("did:web:{}#atproto", host)); 598 + assert_eq!(vm["type"], "Multikey"); 599 + assert_eq!(vm["controller"], format!("did:web:{}", host)); 600 + assert!(vm["publicKeyMultibase"].as_str().unwrap().starts_with("z")); 601 + } 602 + 603 + #[test] 604 + fn test_did_document_context() { 605 + let host = "test.example.org"; 606 + let did_doc = generate_did_document(host, TEST_PUBKEY); 607 + 608 + let context = did_doc["@context"].as_array().unwrap(); 609 + assert_eq!(context.len(), 3); 610 + assert_eq!(context[0], "https://www.w3.org/ns/did/v1"); 611 + assert_eq!(context[1], "https://w3id.org/security/multikey/v1"); 612 + assert_eq!( 613 + context[2], 614 + "https://w3id.org/security/suites/secp256k1-2019/v1" 615 + ); 616 + } 617 + 618 + #[test] 619 + fn test_different_hosts() { 620 + // Test with different host formats 621 + let hosts = vec![ 622 + "localhost:3000", 623 + "bsky.social", 624 + "example.org:8080", 625 + "my-service.com", 626 + ]; 627 + 628 + for host in hosts { 629 + let did_doc = generate_did_document(host, TEST_PUBKEY); 630 + 631 + // Verify basic structure for each host 632 + assert_eq!(did_doc["id"], format!("did:web:{}", host)); 633 + assert_eq!(did_doc["alsoKnownAs"][0], format!("at://{}", host)); 634 + 635 + let services = did_doc["service"].as_array().unwrap(); 636 + assert_eq!(services.len(), 2); 637 + 638 + let verification_methods = did_doc["verificationMethod"].as_array().unwrap(); 639 + assert_eq!(verification_methods.len(), 1); 640 + } 641 + } 642 + }
+50 -25
apps/aqua/src/main.rs
··· 1 - use axum::{Router, extract::Extension, routing::{get, post}}; 2 use std::net::SocketAddr; 3 use tower_http::cors::CorsLayer; 4 - use clap::{Arg, Command}; 5 use uuid::Uuid; 6 - use chrono::Utc; 7 8 use ctx::RawContext; 9 use repos::DataSource; 10 use repos::pg::PgDataSource; 11 - use redis_client::RedisClient; 12 13 mod api; 14 mod ctx; 15 mod db; 16 mod repos; 17 mod xrpc; 18 - mod redis_client; 19 20 #[tokio::main] 21 async fn main() -> Result<(), String> { ··· 32 .long("import-identity-car") 33 .value_name("HANDLE_OR_DID") 34 .help("Import CAR file for a specific identity (handle or DID)") 35 - .action(clap::ArgAction::Set) 36 ) 37 .get_matches(); 38 39 let db = db::init_pool().await.expect("failed to init db"); 40 let pgds = PgDataSource::new(db.clone()).boxed(); 41 - let ctx = RawContext::new(pgds).build(); 42 43 // Check if we should import a CAR file instead of starting the server 44 if let Some(identity) = matches.get_one::<String>("import-identity-car") { ··· 50 51 let app = Router::new() 52 .route("/meta_info", get(api::get_meta_info)) 53 .route("/api/car/upload", post(api::upload_car_import)) 54 .route("/api/car/fetch", post(api::fetch_car_from_user)) 55 - .route("/api/car/status/{import_id}", get(api::get_car_import_status)) 56 - .route("/api/car/job-status/{job_id}", get(api::get_car_import_job_status)) 57 .nest("/xrpc/", xrpc::actor::actor_routes()) 58 .nest("/xrpc/", xrpc::feed::feed_routes()) 59 .nest("/xrpc/", xrpc::stats::stats_routes()) ··· 69 } 70 71 async fn import_identity_car(_ctx: &ctx::Context, identity: &str) -> Result<(), String> { 72 - use tracing::{info, error}; 73 - use types::jobs::{CarImportJob, CarImportJobStatus, JobStatus, queue_keys}; 74 - 75 info!("Submitting CAR import job for identity: {}", identity); 76 - 77 // Connect to Redis 78 - let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 79 - let redis_client = RedisClient::new(&redis_url).map_err(|e| format!("Failed to connect to Redis: {}", e))?; 80 - 81 // Create job 82 let job = CarImportJob { 83 request_id: Uuid::new_v4(), ··· 86 created_at: Utc::now(), 87 description: Some(format!("CLI import request for {}", identity)), 88 }; 89 - 90 // Serialize job for queue 91 - let job_data = serde_json::to_string(&job).map_err(|e| format!("Failed to serialize job: {}", e))?; 92 - 93 // Initialize job status 94 let status = CarImportJobStatus { 95 status: JobStatus::Pending, ··· 99 error_message: None, 100 progress: None, 101 }; 102 - let status_data = serde_json::to_string(&status).map_err(|e| format!("Failed to serialize status: {}", e))?; 103 - 104 // Submit to queue and set initial status 105 - match redis_client.queue_job(queue_keys::CAR_IMPORT_JOBS, &job_data).await { 106 Ok(_) => { 107 // Set initial status 108 - if let Err(e) = redis_client.set_job_status(&queue_keys::job_status_key(&job.request_id), &status_data).await { 109 error!("Failed to set job status: {}", e); 110 } 111 - 112 info!("โœ… CAR import job queued successfully!"); 113 info!("Job ID: {}", job.request_id); 114 info!("Identity: {}", identity); 115 - info!("Monitor status with: curl http://localhost:3000/api/car/status/{}", job.request_id); 116 Ok(()) 117 } 118 Err(e) => {
··· 1 + use axum::{ 2 + Router, 3 + extract::Extension, 4 + routing::{get, post}, 5 + }; 6 + use chrono::Utc; 7 + use clap::{Arg, Command}; 8 use std::net::SocketAddr; 9 use tower_http::cors::CorsLayer; 10 use uuid::Uuid; 11 12 use ctx::RawContext; 13 + use redis_client::RedisClient; 14 use repos::DataSource; 15 use repos::pg::PgDataSource; 16 17 mod api; 18 mod ctx; 19 mod db; 20 + mod redis_client; 21 mod repos; 22 + mod types; 23 mod xrpc; 24 25 #[tokio::main] 26 async fn main() -> Result<(), String> { ··· 37 .long("import-identity-car") 38 .value_name("HANDLE_OR_DID") 39 .help("Import CAR file for a specific identity (handle or DID)") 40 + .action(clap::ArgAction::Set), 41 ) 42 .get_matches(); 43 44 let db = db::init_pool().await.expect("failed to init db"); 45 let pgds = PgDataSource::new(db.clone()).boxed(); 46 + let ctx = RawContext::new(pgds).build(); // Arc<RawContext> 47 48 // Check if we should import a CAR file instead of starting the server 49 if let Some(identity) = matches.get_one::<String>("import-identity-car") { ··· 55 56 let app = Router::new() 57 .route("/meta_info", get(api::get_meta_info)) 58 + .route("/.well-known/did.json", get(api::get_did_document)) 59 .route("/api/car/upload", post(api::upload_car_import)) 60 .route("/api/car/fetch", post(api::fetch_car_from_user)) 61 + .route( 62 + "/api/car/status/{import_id}", 63 + get(api::get_car_import_status), 64 + ) 65 + .route( 66 + "/api/car/job-status/{job_id}", 67 + get(api::get_car_import_job_status), 68 + ) 69 .nest("/xrpc/", xrpc::actor::actor_routes()) 70 .nest("/xrpc/", xrpc::feed::feed_routes()) 71 .nest("/xrpc/", xrpc::stats::stats_routes()) ··· 81 } 82 83 async fn import_identity_car(_ctx: &ctx::Context, identity: &str) -> Result<(), String> { 84 + use crate::types::{CarImportJob, CarImportJobStatus, JobStatus, queue_keys}; 85 + use tracing::{error, info}; 86 + 87 info!("Submitting CAR import job for identity: {}", identity); 88 + 89 // Connect to Redis 90 + let redis_url = 91 + std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 92 + let redis_client = 93 + RedisClient::new(&redis_url).map_err(|e| format!("Failed to connect to Redis: {}", e))?; 94 + 95 // Create job 96 let job = CarImportJob { 97 request_id: Uuid::new_v4(), ··· 100 created_at: Utc::now(), 101 description: Some(format!("CLI import request for {}", identity)), 102 }; 103 + 104 // Serialize job for queue 105 + let job_data = 106 + serde_json::to_string(&job).map_err(|e| format!("Failed to serialize job: {}", e))?; 107 + 108 // Initialize job status 109 let status = CarImportJobStatus { 110 status: JobStatus::Pending, ··· 114 error_message: None, 115 progress: None, 116 }; 117 + let status_data = 118 + serde_json::to_string(&status).map_err(|e| format!("Failed to serialize status: {}", e))?; 119 + 120 // Submit to queue and set initial status 121 + match redis_client 122 + .queue_job(queue_keys::CAR_IMPORT_JOBS, &job_data) 123 + .await 124 + { 125 Ok(_) => { 126 // Set initial status 127 + if let Err(e) = redis_client 128 + .set_job_status(&queue_keys::job_status_key(&job.request_id), &status_data) 129 + .await 130 + { 131 error!("Failed to set job status: {}", e); 132 } 133 + 134 info!("โœ… CAR import job queued successfully!"); 135 info!("Job ID: {}", job.request_id); 136 info!("Identity: {}", identity); 137 + info!( 138 + "Monitor status with: curl http://localhost:3000/api/car/status/{}", 139 + job.request_id 140 + ); 141 Ok(()) 142 } 143 Err(e) => {
+1 -1
apps/aqua/src/redis_client.rs
··· 36 let status: Option<String> = conn.get(status_key).await?; 37 Ok(status) 38 } 39 - }
··· 36 let status: Option<String> = conn.get(status_key).await?; 37 Ok(status) 38 } 39 + }
+7 -5
apps/aqua/src/repos/actor_profile.rs
··· 9 async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>>; 10 async fn get_multiple_actor_profiles( 11 &self, 12 - identities: &Vec<String>, 13 ) -> anyhow::Result<Vec<ProfileViewData>>; 14 } 15 ··· 30 avatar: row.avatar, 31 banner: row.banner, 32 // chrono -> atrium time 33 - created_at: row.created_at.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 34 description: row.description, 35 description_facets: row 36 .description_facets 37 .and_then(|v| serde_json::from_value(v).ok()), 38 did: row.did, 39 - featured_item: None, 40 display_name: row.display_name, 41 status: row.status.and_then(|v| serde_json::from_value(v).ok()), 42 } 43 } ··· 46 #[async_trait] 47 impl ActorProfileRepo for PgDataSource { 48 async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>> { 49 - self.get_multiple_actor_profiles(&vec![identity.to_string()]) 50 .await 51 .map(|p| p.first().cloned()) 52 } 53 async fn get_multiple_actor_profiles( 54 &self, 55 - identities: &Vec<String>, 56 ) -> anyhow::Result<Vec<ProfileViewData>> { 57 // split identities into dids (prefixed with "did:") and handles (not prefixed) in one iteration 58 let mut dids = Vec::new();
··· 9 async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>>; 10 async fn get_multiple_actor_profiles( 11 &self, 12 + identities: &[String], 13 ) -> anyhow::Result<Vec<ProfileViewData>>; 14 } 15 ··· 30 avatar: row.avatar, 31 banner: row.banner, 32 // chrono -> atrium time 33 + created_at: row 34 + .created_at 35 + .map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 36 description: row.description, 37 description_facets: row 38 .description_facets 39 .and_then(|v| serde_json::from_value(v).ok()), 40 did: row.did, 41 display_name: row.display_name, 42 + featured_item: None, 43 status: row.status.and_then(|v| serde_json::from_value(v).ok()), 44 } 45 } ··· 48 #[async_trait] 49 impl ActorProfileRepo for PgDataSource { 50 async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>> { 51 + self.get_multiple_actor_profiles(&[identity.to_string()]) 52 .await 53 .map(|p| p.first().cloned()) 54 } 55 async fn get_multiple_actor_profiles( 56 &self, 57 + identities: &[String], 58 ) -> anyhow::Result<Vec<ProfileViewData>> { 59 // split identities into dids (prefixed with "did:") and handles (not prefixed) in one iteration 60 let mut dids = Vec::new();
+22 -18
apps/aqua/src/repos/feed_play.rs
··· 8 async fn get_feed_play(&self, identity: &str) -> anyhow::Result<Option<PlayViewData>>; 9 async fn get_feed_plays_for_profile( 10 &self, 11 - identities: &Vec<String>, 12 ) -> anyhow::Result<Vec<PlayViewData>>; 13 } 14 ··· 49 }; 50 51 Ok(Some(PlayViewData { 52 artists, 53 - duration: row.duration.map(|d| d as i64), 54 isrc: row.isrc, 55 - music_service_base_domain: row.music_service_base_domain, 56 origin_url: row.origin_url, 57 - played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))), 58 - recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 59 - release_mb_id: row.release_mbid.map(|u| u.to_string()), 60 - release_name: row.release_name, 61 submission_client_agent: row.submission_client_agent, 62 - track_mb_id: Some(row.rkey.clone()), 63 - track_name: row.track_name.clone(), 64 })) 65 } 66 67 async fn get_feed_plays_for_profile( 68 &self, 69 - identities: &Vec<String>, 70 ) -> anyhow::Result<Vec<PlayViewData>> { 71 let rows = sqlx::query!( 72 r#" ··· 105 }; 106 107 result.push(PlayViewData { 108 artists, 109 - duration: row.duration.map(|d| d as i64), 110 isrc: row.isrc, 111 - music_service_base_domain: row.music_service_base_domain, 112 origin_url: row.origin_url, 113 - played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))), 114 - recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 115 - release_mb_id: row.release_mbid.map(|u| u.to_string()), 116 - release_name: row.release_name, 117 submission_client_agent: row.submission_client_agent, 118 - track_mb_id: Some(row.rkey.clone()), 119 - track_name: row.track_name.clone(), 120 }); 121 } 122
··· 8 async fn get_feed_play(&self, identity: &str) -> anyhow::Result<Option<PlayViewData>>; 9 async fn get_feed_plays_for_profile( 10 &self, 11 + identities: &[String], 12 ) -> anyhow::Result<Vec<PlayViewData>>; 13 } 14 ··· 49 }; 50 51 Ok(Some(PlayViewData { 52 + track_name: row.track_name.clone(), 53 + track_mb_id: row.recording_mbid.map(|u| u.to_string()), 54 + recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 55 + duration: row.duration.map(|d| d as i64), 56 artists, 57 + release_name: row.release_name.clone(), 58 + release_mb_id: row.release_mbid.map(|u| u.to_string()), 59 isrc: row.isrc, 60 origin_url: row.origin_url, 61 + music_service_base_domain: row.music_service_base_domain, 62 submission_client_agent: row.submission_client_agent, 63 + played_time: row 64 + .played_time 65 + .map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 66 })) 67 } 68 69 async fn get_feed_plays_for_profile( 70 &self, 71 + identities: &[String], 72 ) -> anyhow::Result<Vec<PlayViewData>> { 73 let rows = sqlx::query!( 74 r#" ··· 107 }; 108 109 result.push(PlayViewData { 110 + track_name: row.track_name.clone(), 111 + track_mb_id: row.recording_mbid.map(|u| u.to_string()), 112 + recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 113 + duration: row.duration.map(|d| d as i64), 114 artists, 115 + release_name: row.release_name.clone(), 116 + release_mb_id: row.release_mbid.map(|u| u.to_string()), 117 isrc: row.isrc, 118 origin_url: row.origin_url, 119 + music_service_base_domain: row.music_service_base_domain, 120 submission_client_agent: row.submission_client_agent, 121 + played_time: row 122 + .played_time 123 + .map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 124 }); 125 } 126
+1 -2
apps/aqua/src/repos/mod.rs
··· 27 } 28 29 pub fn time_to_chrono_utc(dt: time::OffsetDateTime) -> chrono::DateTime<chrono::Utc> { 30 - chrono::DateTime::from_timestamp(dt.unix_timestamp(), dt.nanosecond()) 31 - .unwrap_or_default() 32 }
··· 27 } 28 29 pub fn time_to_chrono_utc(dt: time::OffsetDateTime) -> chrono::DateTime<chrono::Utc> { 30 + chrono::DateTime::from_timestamp(dt.unix_timestamp(), dt.nanosecond()).unwrap_or_default() 31 }
+10 -9
apps/aqua/src/repos/stats.rs
··· 85 if let (Some(mbid), Some(name)) = (row.mbid, row.name) { 86 result.push(ReleaseViewData { 87 mbid: mbid.to_string(), 88 - 89 name, 90 play_count: row.play_count.unwrap_or(0), 91 }); ··· 217 }; 218 219 result.push(PlayViewData { 220 artists, 221 - duration: row.duration.map(|d| d as i64), 222 isrc: row.isrc, 223 - music_service_base_domain: row.music_service_base_domain, 224 origin_url: row.origin_url, 225 - played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))), 226 - recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 227 - release_mb_id: row.release_mbid.map(|u| u.to_string()), 228 - release_name: row.release_name, 229 submission_client_agent: row.submission_client_agent, 230 - track_mb_id: Some(row.rkey.clone()), 231 - track_name: row.track_name.clone(), 232 }); 233 } 234
··· 85 if let (Some(mbid), Some(name)) = (row.mbid, row.name) { 86 result.push(ReleaseViewData { 87 mbid: mbid.to_string(), 88 name, 89 play_count: row.play_count.unwrap_or(0), 90 }); ··· 216 }; 217 218 result.push(PlayViewData { 219 + track_name: row.track_name.clone(), 220 + track_mb_id: row.recording_mbid.map(|u| u.to_string()), 221 + recording_mb_id: row.recording_mbid.map(|u| u.to_string()), 222 + duration: row.duration.map(|d| d as i64), 223 artists, 224 + release_name: row.release_name.clone(), 225 + release_mb_id: row.release_mbid.map(|u| u.to_string()), 226 isrc: row.isrc, 227 origin_url: row.origin_url, 228 + music_service_base_domain: row.music_service_base_domain, 229 submission_client_agent: row.submission_client_agent, 230 + played_time: row 231 + .played_time 232 + .map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))), 233 }); 234 } 235
+51
apps/aqua/src/types/jobs.rs
···
··· 1 + use chrono::{DateTime, Utc}; 2 + use serde::{Deserialize, Serialize}; 3 + use uuid::Uuid; 4 + 5 + #[derive(Debug, Clone, Serialize, Deserialize)] 6 + pub struct CarImportJob { 7 + pub request_id: Uuid, 8 + pub identity: String, 9 + pub since: Option<DateTime<Utc>>, 10 + pub created_at: DateTime<Utc>, 11 + pub description: Option<String>, 12 + } 13 + 14 + #[derive(Debug, Clone, Serialize, Deserialize)] 15 + pub struct CarImportJobStatus { 16 + pub status: JobStatus, 17 + pub created_at: DateTime<Utc>, 18 + pub started_at: Option<DateTime<Utc>>, 19 + pub completed_at: Option<DateTime<Utc>>, 20 + pub error_message: Option<String>, 21 + pub progress: Option<JobProgress>, 22 + } 23 + 24 + #[derive(Debug, Clone, Serialize, Deserialize)] 25 + pub enum JobStatus { 26 + Pending, 27 + Processing, 28 + Completed, 29 + Failed, 30 + Cancelled, 31 + } 32 + 33 + #[derive(Debug, Clone, Serialize, Deserialize)] 34 + pub struct JobProgress { 35 + step: String, 36 + pub user_did: Option<String>, 37 + pub pds_host: Option<String>, 38 + pub car_size_bytes: Option<u64>, 39 + pub blocks_processed: Option<u64>, 40 + } 41 + 42 + pub mod queue_keys { 43 + use uuid::Uuid; 44 + 45 + pub const CAR_IMPORT_JOBS: &str = "car_import_jobs"; 46 + pub const CAR_IMPORT_STATUS_PREFIX: &str = "car_import_status"; 47 + 48 + pub fn job_status_key(job_id: &Uuid) -> String { 49 + format!("{}:{}", CAR_IMPORT_STATUS_PREFIX, job_id) 50 + } 51 + }
+3
apps/aqua/src/types/mod.rs
···
··· 1 + pub mod jobs; 2 + 3 + pub use jobs::*;
+1 -1
apps/aqua/src/xrpc/actor.rs
··· 58 let repo = &ctx.db; // assuming ctx.db is Box<dyn ActorProfileRepo + Send + Sync> 59 let actor = &query.actors; 60 61 - if actor.len() == 0 { 62 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 63 } 64
··· 58 let repo = &ctx.db; // assuming ctx.db is Box<dyn ActorProfileRepo + Send + Sync> 59 let actor = &query.actors; 60 61 + if actor.is_empty() { 62 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 63 } 64
+17 -11
apps/aqua/src/xrpc/stats.rs
··· 1 use crate::ctx::Context; 2 use axum::{Extension, http::StatusCode, response::IntoResponse, routing::get}; 3 use serde::{Deserialize, Serialize}; 4 - use types::fm::teal::alpha::stats::defs::{ArtistViewData, ReleaseViewData}; 5 use types::fm::teal::alpha::feed::defs::PlayViewData; 6 7 // mount stats routes 8 pub fn stats_routes() -> axum::Router { 9 axum::Router::new() 10 .route("/fm.teal.alpha.stats.getTopArtists", get(get_top_artists)) 11 .route("/fm.teal.alpha.stats.getTopReleases", get(get_top_releases)) 12 - .route("/fm.teal.alpha.stats.getUserTopArtists", get(get_user_top_artists)) 13 - .route("/fm.teal.alpha.stats.getUserTopReleases", get(get_user_top_releases)) 14 .route("/fm.teal.alpha.stats.getLatest", get(get_latest)) 15 } 16 ··· 29 axum::extract::Query(query): axum::extract::Query<GetTopArtistsQuery>, 30 ) -> Result<impl IntoResponse, (StatusCode, String)> { 31 let repo = &ctx.db; 32 - 33 match repo.get_top_artists(query.limit).await { 34 Ok(artists) => Ok(axum::Json(GetTopArtistsResponse { artists })), 35 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 51 axum::extract::Query(query): axum::extract::Query<GetTopReleasesQuery>, 52 ) -> Result<impl IntoResponse, (StatusCode, String)> { 53 let repo = &ctx.db; 54 - 55 match repo.get_top_releases(query.limit).await { 56 Ok(releases) => Ok(axum::Json(GetTopReleasesResponse { releases })), 57 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 74 axum::extract::Query(query): axum::extract::Query<GetUserTopArtistsQuery>, 75 ) -> Result<impl IntoResponse, (StatusCode, String)> { 76 let repo = &ctx.db; 77 - 78 if query.actor.is_empty() { 79 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 80 } 81 - 82 match repo.get_user_top_artists(&query.actor, query.limit).await { 83 Ok(artists) => Ok(axum::Json(GetUserTopArtistsResponse { artists })), 84 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 101 axum::extract::Query(query): axum::extract::Query<GetUserTopReleasesQuery>, 102 ) -> Result<impl IntoResponse, (StatusCode, String)> { 103 let repo = &ctx.db; 104 - 105 if query.actor.is_empty() { 106 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 107 } 108 - 109 match repo.get_user_top_releases(&query.actor, query.limit).await { 110 Ok(releases) => Ok(axum::Json(GetUserTopReleasesResponse { releases })), 111 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 127 axum::extract::Query(query): axum::extract::Query<GetLatestQuery>, 128 ) -> Result<impl IntoResponse, (StatusCode, String)> { 129 let repo = &ctx.db; 130 - 131 match repo.get_latest(query.limit).await { 132 Ok(plays) => Ok(axum::Json(GetLatestResponse { plays })), 133 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), 134 } 135 - }
··· 1 use crate::ctx::Context; 2 use axum::{Extension, http::StatusCode, response::IntoResponse, routing::get}; 3 use serde::{Deserialize, Serialize}; 4 use types::fm::teal::alpha::feed::defs::PlayViewData; 5 + use types::fm::teal::alpha::stats::defs::{ArtistViewData, ReleaseViewData}; 6 7 // mount stats routes 8 pub fn stats_routes() -> axum::Router { 9 axum::Router::new() 10 .route("/fm.teal.alpha.stats.getTopArtists", get(get_top_artists)) 11 .route("/fm.teal.alpha.stats.getTopReleases", get(get_top_releases)) 12 + .route( 13 + "/fm.teal.alpha.stats.getUserTopArtists", 14 + get(get_user_top_artists), 15 + ) 16 + .route( 17 + "/fm.teal.alpha.stats.getUserTopReleases", 18 + get(get_user_top_releases), 19 + ) 20 .route("/fm.teal.alpha.stats.getLatest", get(get_latest)) 21 } 22 ··· 35 axum::extract::Query(query): axum::extract::Query<GetTopArtistsQuery>, 36 ) -> Result<impl IntoResponse, (StatusCode, String)> { 37 let repo = &ctx.db; 38 + 39 match repo.get_top_artists(query.limit).await { 40 Ok(artists) => Ok(axum::Json(GetTopArtistsResponse { artists })), 41 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 57 axum::extract::Query(query): axum::extract::Query<GetTopReleasesQuery>, 58 ) -> Result<impl IntoResponse, (StatusCode, String)> { 59 let repo = &ctx.db; 60 + 61 match repo.get_top_releases(query.limit).await { 62 Ok(releases) => Ok(axum::Json(GetTopReleasesResponse { releases })), 63 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 80 axum::extract::Query(query): axum::extract::Query<GetUserTopArtistsQuery>, 81 ) -> Result<impl IntoResponse, (StatusCode, String)> { 82 let repo = &ctx.db; 83 + 84 if query.actor.is_empty() { 85 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 86 } 87 + 88 match repo.get_user_top_artists(&query.actor, query.limit).await { 89 Ok(artists) => Ok(axum::Json(GetUserTopArtistsResponse { artists })), 90 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 107 axum::extract::Query(query): axum::extract::Query<GetUserTopReleasesQuery>, 108 ) -> Result<impl IntoResponse, (StatusCode, String)> { 109 let repo = &ctx.db; 110 + 111 if query.actor.is_empty() { 112 return Err((StatusCode::BAD_REQUEST, "actor is required".to_string())); 113 } 114 + 115 match repo.get_user_top_releases(&query.actor, query.limit).await { 116 Ok(releases) => Ok(axum::Json(GetUserTopReleasesResponse { releases })), 117 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), ··· 133 axum::extract::Query(query): axum::extract::Query<GetLatestQuery>, 134 ) -> Result<impl IntoResponse, (StatusCode, String)> { 135 let repo = &ctx.db; 136 + 137 match repo.get_latest(query.limit).await { 138 Ok(plays) => Ok(axum::Json(GetLatestResponse { plays })), 139 Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())), 140 } 141 + }
+82 -5
lexicons/README.md
··· 17 18 ### Initial Setup 19 20 - If you're cloning this repository for the first time, you'll need to initialize the submodules: 21 22 ```bash 23 git submodule update --init --recursive 24 ``` 25 26 ### Updating ATProto Lexicons 27 28 - To update to the latest ATProto lexicons: 29 30 ```bash 31 cd vendor/atproto 32 git pull origin main 33 cd ../.. ··· 35 git commit -m "Update atproto lexicons to latest" 36 ``` 37 38 ### Adding Custom Lexicons 39 40 Custom lexicons should be added to the `fm.teal.alpha/` directory following the ATProto lexicon schema format. These files are tracked directly in our repository and not affected by submodule updates. 41 42 - ## Generated Files 43 - 44 - This directory may contain generated files (`.js`, `.d.ts`, etc.) that are created by lexicon compilation tools. These are ignored by git as specified in the `.gitignore` file.
··· 17 18 ### Initial Setup 19 20 + If you're cloning this repository for the first time, you'll need to initialize the submodules and create the symbolic links: 21 22 ```bash 23 + # Initialize submodules 24 git submodule update --init --recursive 25 + 26 + # Create symbolic links to atproto lexicons 27 + cd lexicons 28 + ln -s ../vendor/atproto/lexicons/app app 29 + ln -s ../vendor/atproto/lexicons/chat chat 30 + ln -s ../vendor/atproto/lexicons/com com 31 + ln -s ../vendor/atproto/lexicons/tools tools 32 + cd .. 33 + ``` 34 + 35 + Or use the provided setup script: 36 + 37 + ```bash 38 + ./scripts/setup-lexicons.sh 39 ``` 40 41 ### Updating ATProto Lexicons 42 43 + To update to the latest ATProto lexicons, use the provided update script: 44 45 ```bash 46 + ./scripts/update-lexicons.sh 47 + ``` 48 + 49 + This will: 50 + 1. Fetch the latest changes from the atproto repository 51 + 2. Show you what changed 52 + 3. Stage the submodule update for commit 53 + 54 + Then commit the changes: 55 + ```bash 56 + git commit -m "Update atproto lexicons to latest" 57 + ``` 58 + 59 + **Manual approach:** 60 + ```bash 61 cd vendor/atproto 62 git pull origin main 63 cd ../.. ··· 65 git commit -m "Update atproto lexicons to latest" 66 ``` 67 68 + ### Available Scripts 69 + 70 + Two convenience scripts are available: 71 + 72 + **Setup Script** - Handle the initial setup: 73 + 74 + ```bash 75 + #!/bin/bash 76 + # scripts/setup-lexicons.sh 77 + 78 + echo "Setting up lexicons..." 79 + 80 + # Initialize submodules 81 + git submodule update --init --recursive 82 + 83 + # Create symbolic links if they don't exist 84 + cd lexicons 85 + if [ ! -L app ]; then 86 + ln -s ../vendor/atproto/lexicons/app app 87 + echo "Created symlink: lexicons/app" 88 + fi 89 + if [ ! -L chat ]; then 90 + ln -s ../vendor/atproto/lexicons/chat chat 91 + echo "Created symlink: lexicons/chat" 92 + fi 93 + if [ ! -L com ]; then 94 + ln -s ../vendor/atproto/lexicons/com com 95 + echo "Created symlink: lexicons/com" 96 + fi 97 + if [ ! -L tools ]; then 98 + ln -s ../vendor/atproto/lexicons/tools tools 99 + echo "Created symlink: lexicons/tools" 100 + fi 101 + cd .. 102 + 103 + echo "Lexicons setup complete!" 104 + ``` 105 + 106 + **Update Script** - Update ATProto lexicons: 107 + 108 + ```bash 109 + #!/bin/bash 110 + # scripts/update-lexicons.sh 111 + 112 + # Fetches latest changes from atproto repository 113 + # Shows what changed and stages the update for commit 114 + ./scripts/update-lexicons.sh 115 + ``` 116 + 117 ### Adding Custom Lexicons 118 119 Custom lexicons should be added to the `fm.teal.alpha/` directory following the ATProto lexicon schema format. These files are tracked directly in our repository and not affected by submodule updates. 120 121 + **Note**: The symbolic links (`app`, `chat`, `com`, `tools`) are not tracked in git and will be created during setup. They are ignored in `.gitignore` to avoid conflicts.
+21 -1
lexicons/fm.teal.alpha/actor/defs.json
··· 36 }, 37 "status": { 38 "type": "ref", 39 - "ref": "fm.teal.alpha.actor.status#main" 40 }, 41 "createdAt": { "type": "string", "format": "datetime" } 42 } ··· 57 "avatar": { 58 "type": "string", 59 "description": "IPLD of the avatar" 60 } 61 } 62 }
··· 36 }, 37 "status": { 38 "type": "ref", 39 + "ref": "#statusView" 40 }, 41 "createdAt": { "type": "string", "format": "datetime" } 42 } ··· 57 "avatar": { 58 "type": "string", 59 "description": "IPLD of the avatar" 60 + } 61 + } 62 + }, 63 + "statusView": { 64 + "type": "object", 65 + "description": "A declaration of the status of the actor.", 66 + "properties": { 67 + "time": { 68 + "type": "string", 69 + "format": "datetime", 70 + "description": "The unix timestamp of when the item was recorded" 71 + }, 72 + "expiry": { 73 + "type": "string", 74 + "format": "datetime", 75 + "description": "The unix timestamp of the expiry time of the item. If unavailable, default to 10 minutes past the start time." 76 + }, 77 + "item": { 78 + "type": "ref", 79 + "ref": "fm.teal.alpha.feed.defs#playView" 80 } 81 } 82 }
+12 -1
lexicons/fm.teal.alpha/feed/play.json
··· 19 }, 20 "trackMbId": { 21 "type": "string", 22 - 23 "description": "The Musicbrainz ID of the track" 24 }, 25 "recordingMbId": { ··· 87 "type": "string", 88 "format": "datetime", 89 "description": "The unix timestamp of when the track was played" 90 } 91 } 92 }
··· 19 }, 20 "trackMbId": { 21 "type": "string", 22 "description": "The Musicbrainz ID of the track" 23 }, 24 "recordingMbId": { ··· 86 "type": "string", 87 "format": "datetime", 88 "description": "The unix timestamp of when the track was played" 89 + }, 90 + "trackDiscriminant": { 91 + "type": "string", 92 + "maxLength": 128, 93 + "maxGraphemes": 1280, 94 + "description": "Distinguishing information for track variants (e.g. 'Acoustic Version', 'Live at Wembley', 'Radio Edit', 'Demo'). Used to differentiate between different versions of the same base track while maintaining grouping capabilities." 95 + }, 96 + "releaseDiscriminant": { 97 + "type": "string", 98 + "maxLength": 128, 99 + "maxGraphemes": 1280, 100 + "description": "Distinguishing information for release variants (e.g. 'Deluxe Edition', 'Remastered', '2023 Remaster', 'Special Edition'). Used to differentiate between different versions of the same base release while maintaining grouping capabilities." 101 } 102 } 103 }
+54
lexicons/fm.teal.alpha/feed/social/defs.json
···
··· 1 + { 2 + "lexicon": 1, 3 + "id": "fm.teal.alpha.feed.social.defs", 4 + "description": "This lexicon is in a not officially released state. It is subject to change. | Misc. items related to the social feed..", 5 + "defs": { 6 + "trackView": { 7 + "trackName": { 8 + "type": "string", 9 + "minLength": 1, 10 + "maxLength": 256, 11 + "maxGraphemes": 2560, 12 + "description": "The name of the track" 13 + }, 14 + "trackMbId": { 15 + "type": "string", 16 + "description": "The Musicbrainz ID of the track" 17 + }, 18 + "recordingMbId": { 19 + "type": "string", 20 + "description": "The Musicbrainz recording ID of the track" 21 + }, 22 + "artistNames": { 23 + "type": "array", 24 + "items": { 25 + "type": "string", 26 + "minLength": 1, 27 + "maxLength": 256, 28 + "maxGraphemes": 2560 29 + }, 30 + "description": "Array of artist names in order of original appearance. Prefer using 'artists'." 31 + }, 32 + "artistMbIds": { 33 + "type": "array", 34 + "items": { "type": "string" }, 35 + "description": "Array of Musicbrainz artist IDs. Prefer using 'artists'." 36 + }, 37 + "artists": { 38 + "type": "array", 39 + "items": { "type": "ref", "ref": "fm.teal.alpha.feed.defs#artist" }, 40 + "description": "Array of artists in order of original appearance." 41 + }, 42 + "releaseName": { 43 + "type": "string", 44 + "maxLength": 256, 45 + "maxGraphemes": 2560, 46 + "description": "The name of the release/album" 47 + }, 48 + "releaseMbId": { 49 + "type": "string", 50 + "description": "The Musicbrainz release ID" 51 + } 52 + } 53 + } 54 + }
+24
lexicons/fm.teal.alpha/feed/social/like.json
···
··· 1 + { 2 + "lexicon": 1, 3 + "id": "fm.teal.alpha.feed.social.like", 4 + "description": "This lexicon is in a not officially released state. It is subject to change. | The action of 'Liking' a Teal.fm post.", 5 + "defs": { 6 + "main": { 7 + "type": "record", 8 + "description": "Record containing a like for a teal.fm post.", 9 + "key": "tid", 10 + "record": { 11 + "type": "object", 12 + "required": ["subject", "createdAt"], 13 + "properties": { 14 + "subject": { "type": "ref", "ref": "com.atproto.repo.strongRef" }, 15 + "createdAt": { 16 + "type": "string", 17 + "format": "datetime", 18 + "description": "Client-declared timestamp when this post was originally created." 19 + } 20 + } 21 + } 22 + } 23 + } 24 + }
+30
lexicons/fm.teal.alpha/feed/social/playlist.json
···
··· 1 + { 2 + "lexicon": 1, 3 + "id": "fm.teal.alpha.feed.social.playlist", 4 + "description": "This lexicon is in a not officially released state. It is subject to change. | A teal.fm playlist, representing a list of tracks.", 5 + "defs": { 6 + "main": { 7 + "type": "record", 8 + "description": "Record containing the playlist metadata.", 9 + "key": "tid", 10 + "record": { 11 + "type": "object", 12 + "required": ["name", "createdAt"], 13 + "properties": { 14 + "name": { 15 + "type": "string", 16 + "description": "Display name for the playlist, required.", 17 + "minLength": 1, 18 + "maxLength": 50 19 + }, 20 + "description": { "type": "string", "maxLength": 5000 }, 21 + "createdAt": { 22 + "type": "string", 23 + "format": "datetime", 24 + "description": "Client-declared timestamp when this playlist was originally created." 25 + } 26 + } 27 + } 28 + } 29 + } 30 + }
+32
lexicons/fm.teal.alpha/feed/social/playlistItem.json
···
··· 1 + { 2 + "lexicon": 1, 3 + "id": "fm.teal.alpha.feed.social.playlistItem", 4 + "description": "This lexicon is in a not officially released state. It is subject to change. | A teal.fm playlist item.", 5 + "defs": { 6 + "main": { 7 + "type": "record", 8 + "description": "Record containing a playlist item for a teal.fm playlist.", 9 + "key": "tid", 10 + "record": { 11 + "type": "object", 12 + "required": ["subject", "createdAt", "trackName"], 13 + "properties": { 14 + "subject": { "type": "record", "ref": "com.atproto.repo.strongRef" }, 15 + "createdAt": { 16 + "type": "string", 17 + "format": "datetime", 18 + "description": "Client-declared timestamp when this post was originally created." 19 + }, 20 + "track": { 21 + "type": "ref", 22 + "ref": "fm.teal.alpha.feed.social.defs#trackView" 23 + }, 24 + "order": { 25 + "type": "integer", 26 + "description": "The order of the track in the playlist" 27 + } 28 + } 29 + } 30 + } 31 + } 32 + }
+104
lexicons/fm.teal.alpha/feed/social/post.json
···
··· 1 + { 2 + "lexicon": 1, 3 + "id": "fm.teal.alpha.feed.social.post", 4 + "description": "This lexicon is in a not officially released state. It is subject to change. | Record containing a teal.fm post. Teal.fm posts include a track that is connected to the post, and could have some text. Replies, by default, have the same track as the parent post.", 5 + "defs": { 6 + "main": { 7 + "type": "record", 8 + "description": "Record containing a teal.fm post.", 9 + "key": "tid", 10 + "record": { 11 + "type": "object", 12 + "required": ["text", "createdAt"], 13 + "properties": { 14 + "text": { 15 + "type": "string", 16 + "maxLength": 3000, 17 + "maxGraphemes": 300, 18 + "description": "The primary post content. May be an empty string, if there are embeds." 19 + }, 20 + 21 + "trackName": { 22 + "type": "string", 23 + "minLength": 1, 24 + "maxLength": 256, 25 + "maxGraphemes": 2560, 26 + "description": "The name of the track" 27 + }, 28 + "trackMbId": { 29 + "type": "string", 30 + "description": "The Musicbrainz ID of the track" 31 + }, 32 + "recordingMbId": { 33 + "type": "string", 34 + "description": "The Musicbrainz recording ID of the track" 35 + }, 36 + "duration": { 37 + "type": "integer", 38 + "description": "The duration of the track in seconds" 39 + }, 40 + "artistNames": { 41 + "type": "array", 42 + "items": { 43 + "type": "string", 44 + "minLength": 1, 45 + "maxLength": 256, 46 + "maxGraphemes": 2560 47 + }, 48 + "description": "The names of the artists" 49 + }, 50 + "artistMbIds": { 51 + "type": "array", 52 + "items": { "type": "string" }, 53 + "description": "The Musicbrainz IDs of the artists" 54 + }, 55 + "releaseName": { 56 + "type": "string", 57 + "maxLength": 256, 58 + "maxGraphemes": 2560, 59 + "description": "The name of the release/album" 60 + }, 61 + "releaseMbId": { 62 + "type": "string", 63 + "description": "The Musicbrainz ID of the release/album" 64 + }, 65 + "isrc": { 66 + "type": "string", 67 + "description": "The ISRC code associated with the recording" 68 + }, 69 + "reply": { "type": "ref", "ref": "#replyRef" }, 70 + "facets": { 71 + "type": "array", 72 + "description": "Rich text facets, which may include mentions, links, and other features.", 73 + "items": { "type": "ref", "ref": "fm.teal.alpha.richtext.facet" } 74 + }, 75 + "langs": { 76 + "type": "array", 77 + "description": "Indicates human language of post primary text content.", 78 + "maxLength": 3, 79 + "items": { "type": "string", "format": "language" } 80 + }, 81 + "tags": { 82 + "type": "array", 83 + "description": "Additional hashtags, in addition to any included in post text and facets.", 84 + "maxLength": 8, 85 + "items": { "type": "string", "maxLength": 640, "maxGraphemes": 64 } 86 + }, 87 + "createdAt": { 88 + "type": "string", 89 + "format": "datetime", 90 + "description": "Client-declared timestamp when this post was originally created." 91 + } 92 + } 93 + } 94 + }, 95 + "replyRef": { 96 + "type": "object", 97 + "required": ["root", "parent"], 98 + "properties": { 99 + "root": { "type": "ref", "ref": "com.atproto.repo.strongRef" }, 100 + "parent": { "type": "ref", "ref": "com.atproto.repo.strongRef" } 101 + } 102 + } 103 + } 104 + }
+24
lexicons/fm.teal.alpha/feed/social/repost.json
···
··· 1 + { 2 + "lexicon": 1, 3 + "id": "fm.teal.alpha.feed.social.repost", 4 + "description": "This lexicon is in a not officially released state. It is subject to change. | The action of 'Reposting' a Teal.fm post.", 5 + "defs": { 6 + "main": { 7 + "type": "record", 8 + "description": "Record containing a repost for a teal.fm post.", 9 + "key": "tid", 10 + "record": { 11 + "type": "object", 12 + "required": ["subject", "createdAt"], 13 + "properties": { 14 + "subject": { "type": "ref", "ref": "com.atproto.repo.strongRef" }, 15 + "createdAt": { 16 + "type": "string", 17 + "format": "datetime", 18 + "description": "Client-declared timestamp when this post was originally created." 19 + } 20 + } 21 + } 22 + } 23 + } 24 + }
+24
lexicons/fm.teal.alpha/richtext/facet.json
···
··· 1 + { 2 + "lexicon": 1, 3 + "id": "fm.teal.alpha.richtext.facet", 4 + "defs": { 5 + "main": { 6 + "type": "object", 7 + "description": "Annotation of a sub-string within rich text.", 8 + "required": ["index", "features"], 9 + "properties": { 10 + "index": { "type": "ref", "ref": "app.bsky.richtext.facet#byteSlice" }, 11 + "features": { 12 + "type": "array", 13 + "items": { 14 + "type": "union", 15 + "refs": [ 16 + "app.bsky.richtext.facet#mention", 17 + "app.bsky.richtext.facet#link" 18 + ] 19 + } 20 + } 21 + } 22 + } 23 + } 24 + }
+226
migrations/20241220000001_initial_schema.sql
···
··· 1 + -- Initial comprehensive schema for Teal music platform 2 + -- Based on services/cadet/sql/base.sql 3 + 4 + CREATE TABLE artists ( 5 + mbid UUID PRIMARY KEY, 6 + name TEXT NOT NULL, 7 + play_count INTEGER DEFAULT 0 8 + ); 9 + 10 + -- releases are synologous to 'albums' 11 + CREATE TABLE releases ( 12 + mbid UUID PRIMARY KEY, 13 + name TEXT NOT NULL, 14 + play_count INTEGER DEFAULT 0 15 + ); 16 + 17 + -- recordings are synologous to 'tracks' BUT tracks can be in multiple releases! 18 + CREATE TABLE recordings ( 19 + mbid UUID PRIMARY KEY, 20 + name TEXT NOT NULL, 21 + play_count INTEGER DEFAULT 0 22 + ); 23 + 24 + CREATE TABLE plays ( 25 + uri TEXT PRIMARY KEY, 26 + did TEXT NOT NULL, 27 + rkey TEXT NOT NULL, 28 + cid TEXT NOT NULL, 29 + isrc TEXT, 30 + duration INTEGER, 31 + track_name TEXT NOT NULL, 32 + played_time TIMESTAMP WITH TIME ZONE, 33 + processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 34 + release_mbid UUID, 35 + release_name TEXT, 36 + recording_mbid UUID, 37 + submission_client_agent TEXT, 38 + music_service_base_domain TEXT, 39 + origin_url TEXT, 40 + FOREIGN KEY (release_mbid) REFERENCES releases (mbid), 41 + FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid) 42 + ); 43 + 44 + CREATE INDEX idx_plays_release_mbid ON plays (release_mbid); 45 + CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid); 46 + CREATE INDEX idx_plays_played_time ON plays (played_time); 47 + CREATE INDEX idx_plays_did ON plays (did); 48 + 49 + CREATE TABLE play_to_artists ( 50 + play_uri TEXT, -- references plays(uri) 51 + artist_mbid UUID REFERENCES artists (mbid), 52 + artist_name TEXT, -- storing here for ease of use when joining 53 + PRIMARY KEY (play_uri, artist_mbid), 54 + FOREIGN KEY (play_uri) REFERENCES plays (uri) 55 + ); 56 + 57 + CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid); 58 + 59 + -- Profiles table 60 + CREATE TABLE profiles ( 61 + did TEXT PRIMARY KEY, 62 + handle TEXT, 63 + display_name TEXT, 64 + description TEXT, 65 + description_facets JSONB, 66 + avatar TEXT, -- IPLD of the image, bafy... 67 + banner TEXT, 68 + created_at TIMESTAMP WITH TIME ZONE 69 + ); 70 + 71 + -- User featured items table 72 + CREATE TABLE featured_items ( 73 + did TEXT PRIMARY KEY, 74 + mbid TEXT NOT NULL, 75 + type TEXT NOT NULL 76 + ); 77 + 78 + -- Statii table (status records) 79 + CREATE TABLE statii ( 80 + uri TEXT PRIMARY KEY, 81 + did TEXT NOT NULL, 82 + rkey TEXT NOT NULL, 83 + cid TEXT NOT NULL, 84 + record JSONB NOT NULL, 85 + indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 86 + ); 87 + 88 + CREATE INDEX idx_statii_did_rkey ON statii (did, rkey); 89 + 90 + -- Materialized view for artists' play counts 91 + CREATE MATERIALIZED VIEW mv_artist_play_counts AS 92 + SELECT 93 + a.mbid AS artist_mbid, 94 + a.name AS artist_name, 95 + COUNT(p.uri) AS play_count 96 + FROM 97 + artists a 98 + LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 99 + LEFT JOIN plays p ON p.uri = pta.play_uri 100 + GROUP BY 101 + a.mbid, 102 + a.name; 103 + 104 + CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid); 105 + 106 + -- Materialized view for releases' play counts 107 + CREATE MATERIALIZED VIEW mv_release_play_counts AS 108 + SELECT 109 + r.mbid AS release_mbid, 110 + r.name AS release_name, 111 + COUNT(p.uri) AS play_count 112 + FROM 113 + releases r 114 + LEFT JOIN plays p ON p.release_mbid = r.mbid 115 + GROUP BY 116 + r.mbid, 117 + r.name; 118 + 119 + CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid); 120 + 121 + -- Materialized view for recordings' play counts 122 + CREATE MATERIALIZED VIEW mv_recording_play_counts AS 123 + SELECT 124 + rec.mbid AS recording_mbid, 125 + rec.name AS recording_name, 126 + COUNT(p.uri) AS play_count 127 + FROM 128 + recordings rec 129 + LEFT JOIN plays p ON p.recording_mbid = rec.mbid 130 + GROUP BY 131 + rec.mbid, 132 + rec.name; 133 + 134 + CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid); 135 + 136 + -- Global play count materialized view 137 + CREATE MATERIALIZED VIEW mv_global_play_count AS 138 + SELECT 139 + COUNT(uri) AS total_plays, 140 + COUNT(DISTINCT did) AS unique_listeners 141 + FROM plays; 142 + 143 + CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays); 144 + 145 + -- Top artists in the last 30 days 146 + CREATE MATERIALIZED VIEW mv_top_artists_30days AS 147 + SELECT 148 + a.mbid AS artist_mbid, 149 + a.name AS artist_name, 150 + COUNT(p.uri) AS play_count 151 + FROM artists a 152 + INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 153 + INNER JOIN plays p ON p.uri = pta.play_uri 154 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 155 + GROUP BY a.mbid, a.name 156 + ORDER BY COUNT(p.uri) DESC; 157 + 158 + -- Top releases in the last 30 days 159 + CREATE MATERIALIZED VIEW mv_top_releases_30days AS 160 + SELECT 161 + r.mbid AS release_mbid, 162 + r.name AS release_name, 163 + COUNT(p.uri) AS play_count 164 + FROM releases r 165 + INNER JOIN plays p ON p.release_mbid = r.mbid 166 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 167 + GROUP BY r.mbid, r.name 168 + ORDER BY COUNT(p.uri) DESC; 169 + 170 + -- Top artists for user in the last 30 days 171 + CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS 172 + SELECT 173 + prof.did, 174 + a.mbid AS artist_mbid, 175 + a.name AS artist_name, 176 + COUNT(p.uri) AS play_count 177 + FROM artists a 178 + INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 179 + INNER JOIN plays p ON p.uri = pta.play_uri 180 + INNER JOIN profiles prof ON prof.did = p.did 181 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 182 + GROUP BY prof.did, a.mbid, a.name 183 + ORDER BY COUNT(p.uri) DESC; 184 + 185 + -- Top artists for user in the last 7 days 186 + CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS 187 + SELECT 188 + prof.did, 189 + a.mbid AS artist_mbid, 190 + a.name AS artist_name, 191 + COUNT(p.uri) AS play_count 192 + FROM artists a 193 + INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 194 + INNER JOIN plays p ON p.uri = pta.play_uri 195 + INNER JOIN profiles prof ON prof.did = p.did 196 + WHERE p.played_time >= NOW() - INTERVAL '7 days' 197 + GROUP BY prof.did, a.mbid, a.name 198 + ORDER BY COUNT(p.uri) DESC; 199 + 200 + -- Top releases for user in the last 30 days 201 + CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS 202 + SELECT 203 + prof.did, 204 + r.mbid AS release_mbid, 205 + r.name AS release_name, 206 + COUNT(p.uri) AS play_count 207 + FROM releases r 208 + INNER JOIN plays p ON p.release_mbid = r.mbid 209 + INNER JOIN profiles prof ON prof.did = p.did 210 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 211 + GROUP BY prof.did, r.mbid, r.name 212 + ORDER BY COUNT(p.uri) DESC; 213 + 214 + -- Top releases for user in the last 7 days 215 + CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS 216 + SELECT 217 + prof.did, 218 + r.mbid AS release_mbid, 219 + r.name AS release_name, 220 + COUNT(p.uri) AS play_count 221 + FROM releases r 222 + INNER JOIN plays p ON p.release_mbid = r.mbid 223 + INNER JOIN profiles prof ON prof.did = p.did 224 + WHERE p.played_time >= NOW() - INTERVAL '7 days' 225 + GROUP BY prof.did, r.mbid, r.name 226 + ORDER BY COUNT(p.uri) DESC;
+59
migrations/20241220000002_car_import_tables.sql
···
··· 1 + -- CAR import functionality tables 2 + -- For handling AT Protocol CAR file imports and processing 3 + 4 + -- Tracks uploaded CAR files that are queued for processing 5 + CREATE TABLE IF NOT EXISTS car_import_requests ( 6 + import_id TEXT PRIMARY KEY, 7 + car_data_base64 TEXT NOT NULL, 8 + status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed 9 + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 10 + processed_at TIMESTAMP WITH TIME ZONE, 11 + error_message TEXT, 12 + file_size_bytes INTEGER, 13 + block_count INTEGER, 14 + extracted_records_count INTEGER DEFAULT 0 15 + ); 16 + 17 + CREATE INDEX idx_car_import_requests_status ON car_import_requests (status); 18 + CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at); 19 + 20 + -- Tracks raw IPLD blocks extracted from CAR files 21 + CREATE TABLE IF NOT EXISTS car_blocks ( 22 + cid TEXT PRIMARY KEY, 23 + import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 24 + block_data BYTEA NOT NULL, 25 + decoded_successfully BOOLEAN DEFAULT FALSE, 26 + collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc. 27 + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 28 + ); 29 + 30 + CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id); 31 + CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type); 32 + 33 + -- Tracks records extracted from CAR imports that were successfully processed 34 + CREATE TABLE IF NOT EXISTS car_extracted_records ( 35 + id SERIAL PRIMARY KEY, 36 + import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 37 + cid TEXT NOT NULL REFERENCES car_blocks(cid), 38 + collection_type TEXT NOT NULL, 39 + record_uri TEXT, -- AT URI if applicable (e.g., for play records) 40 + synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123') 41 + rkey TEXT, 42 + extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 43 + processing_notes TEXT 44 + ); 45 + 46 + CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id); 47 + CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type); 48 + CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri); 49 + 50 + -- Tracks import metadata and commit information 51 + CREATE TABLE IF NOT EXISTS car_import_metadata ( 52 + import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 53 + metadata_key TEXT NOT NULL, 54 + metadata_value JSONB NOT NULL, 55 + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 56 + PRIMARY KEY (import_id, metadata_key) 57 + ); 58 + 59 + CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);
+112
migrations/20241220000003_artists_without_mbids.sql
···
··· 1 + -- Migration to support artists without MusicBrainz IDs 2 + -- This allows the system to comply with the Teal lexicon where only trackName is required 3 + 4 + -- Add a field to plays table to store raw artist names for records without MBIDs 5 + ALTER TABLE plays ADD COLUMN artist_names_raw JSONB; 6 + 7 + -- Create a new artists table that doesn't require MBID as primary key 8 + CREATE TABLE artists_extended ( 9 + id SERIAL PRIMARY KEY, 10 + mbid UUID UNIQUE, -- Optional MusicBrainz ID 11 + name TEXT NOT NULL, 12 + name_normalized TEXT GENERATED ALWAYS AS (LOWER(TRIM(name))) STORED, 13 + play_count INTEGER DEFAULT 0, 14 + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 15 + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 16 + ); 17 + 18 + -- Create index for efficient lookups 19 + CREATE INDEX idx_artists_extended_mbid ON artists_extended (mbid) WHERE mbid IS NOT NULL; 20 + CREATE INDEX idx_artists_extended_name_normalized ON artists_extended (name_normalized); 21 + CREATE UNIQUE INDEX idx_artists_extended_name_unique ON artists_extended (name_normalized) WHERE mbid IS NULL; 22 + 23 + -- Create a new junction table that can handle both MBID and non-MBID artists 24 + CREATE TABLE play_to_artists_extended ( 25 + play_uri TEXT NOT NULL REFERENCES plays(uri), 26 + artist_id INTEGER NOT NULL REFERENCES artists_extended(id), 27 + artist_name TEXT NOT NULL, -- Denormalized for performance 28 + PRIMARY KEY (play_uri, artist_id) 29 + ); 30 + 31 + CREATE INDEX idx_play_to_artists_extended_artist ON play_to_artists_extended (artist_id); 32 + 33 + -- Migrate existing data from old tables to new structure 34 + INSERT INTO artists_extended (mbid, name, play_count) 35 + SELECT mbid, name, play_count FROM artists; 36 + 37 + INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) 38 + SELECT 39 + pta.play_uri, 40 + ae.id, 41 + pta.artist_name 42 + FROM play_to_artists pta 43 + JOIN artists_extended ae ON ae.mbid = pta.artist_mbid; 44 + 45 + -- Update materialized views to use new structure 46 + DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts; 47 + CREATE MATERIALIZED VIEW mv_artist_play_counts AS 48 + SELECT 49 + ae.id AS artist_id, 50 + ae.mbid AS artist_mbid, 51 + ae.name AS artist_name, 52 + COUNT(p.uri) AS play_count 53 + FROM 54 + artists_extended ae 55 + LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 56 + LEFT JOIN plays p ON p.uri = ptae.play_uri 57 + GROUP BY 58 + ae.id, ae.mbid, ae.name; 59 + 60 + CREATE UNIQUE INDEX idx_mv_artist_play_counts_new ON mv_artist_play_counts (artist_id); 61 + 62 + -- Update other materialized views that reference artists 63 + DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_30days; 64 + CREATE MATERIALIZED VIEW mv_top_artists_30days AS 65 + SELECT 66 + ae.id AS artist_id, 67 + ae.mbid AS artist_mbid, 68 + ae.name AS artist_name, 69 + COUNT(p.uri) AS play_count 70 + FROM artists_extended ae 71 + INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 72 + INNER JOIN plays p ON p.uri = ptae.play_uri 73 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 74 + GROUP BY ae.id, ae.mbid, ae.name 75 + ORDER BY COUNT(p.uri) DESC; 76 + 77 + DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_30days; 78 + CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS 79 + SELECT 80 + prof.did, 81 + ae.id AS artist_id, 82 + ae.mbid AS artist_mbid, 83 + ae.name AS artist_name, 84 + COUNT(p.uri) AS play_count 85 + FROM artists_extended ae 86 + INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 87 + INNER JOIN plays p ON p.uri = ptae.play_uri 88 + INNER JOIN profiles prof ON prof.did = p.did 89 + WHERE p.played_time >= NOW() - INTERVAL '30 days' 90 + GROUP BY prof.did, ae.id, ae.mbid, ae.name 91 + ORDER BY COUNT(p.uri) DESC; 92 + 93 + DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_7days; 94 + CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS 95 + SELECT 96 + prof.did, 97 + ae.id AS artist_id, 98 + ae.mbid AS artist_mbid, 99 + ae.name AS artist_name, 100 + COUNT(p.uri) AS play_count 101 + FROM artists_extended ae 102 + INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 103 + INNER JOIN plays p ON p.uri = ptae.play_uri 104 + INNER JOIN profiles prof ON prof.did = p.did 105 + WHERE p.played_time >= NOW() - INTERVAL '7 days' 106 + GROUP BY prof.did, ae.id, ae.mbid, ae.name 107 + ORDER BY COUNT(p.uri) DESC; 108 + 109 + -- Comment explaining the migration strategy 110 + COMMENT ON TABLE artists_extended IS 'Extended artists table that supports both MusicBrainz and non-MusicBrainz artists. Uses serial ID as primary key with optional MBID.'; 111 + COMMENT ON TABLE play_to_artists_extended IS 'Junction table linking plays to artists using the new artists_extended table structure.'; 112 + COMMENT ON COLUMN plays.artist_names_raw IS 'Raw artist names as JSON array for plays without MusicBrainz data, used as fallback when artist relationships cannot be established.';
+76
migrations/20241220000004_synthetic_mbids.sql
···
··· 1 + -- Migration to support synthetic MBIDs for artists without MusicBrainz data 2 + -- This ensures all artists have some form of ID while maintaining uniqueness 3 + 4 + -- Enable UUID extension for v5 UUID generation 5 + CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; 6 + 7 + -- Add a column to track MBID type (musicbrainz, synthetic, unknown) 8 + ALTER TABLE artists_extended ADD COLUMN mbid_type TEXT DEFAULT 'unknown' NOT NULL; 9 + 10 + -- Add check constraint for valid MBID types 11 + ALTER TABLE artists_extended ADD CONSTRAINT chk_mbid_type 12 + CHECK (mbid_type IN ('musicbrainz', 'synthetic', 'unknown')); 13 + 14 + -- Update existing records to set proper MBID type 15 + UPDATE artists_extended SET mbid_type = 'musicbrainz' WHERE mbid IS NOT NULL; 16 + 17 + -- Drop the unique constraint on name_normalized for null MBIDs since we'll handle duplicates differently 18 + DROP INDEX IF EXISTS idx_artists_extended_name_unique; 19 + 20 + -- Add index for efficient querying by MBID type 21 + CREATE INDEX idx_artists_extended_mbid_type ON artists_extended (mbid_type); 22 + 23 + -- Create a view to easily work with different artist types 24 + CREATE VIEW artists_with_type AS 25 + SELECT 26 + id, 27 + mbid, 28 + name, 29 + mbid_type, 30 + play_count, 31 + created_at, 32 + updated_at, 33 + -- For synthetic MBIDs, we can show the source name used for generation 34 + CASE 35 + WHEN mbid_type = 'synthetic' THEN 'Generated from: ' || name 36 + WHEN mbid_type = 'musicbrainz' THEN 'MusicBrainz: ' || mbid::text 37 + ELSE 'No MBID available' 38 + END as mbid_info 39 + FROM artists_extended; 40 + 41 + -- Update materialized views to include MBID type information 42 + DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts; 43 + CREATE MATERIALIZED VIEW mv_artist_play_counts AS 44 + SELECT 45 + ae.id AS artist_id, 46 + ae.mbid AS artist_mbid, 47 + ae.name AS artist_name, 48 + ae.mbid_type, 49 + COUNT(p.uri) AS play_count 50 + FROM 51 + artists_extended ae 52 + LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 53 + LEFT JOIN plays p ON p.uri = ptae.play_uri 54 + GROUP BY 55 + ae.id, ae.mbid, ae.name, ae.mbid_type; 56 + 57 + CREATE UNIQUE INDEX idx_mv_artist_play_counts_with_type ON mv_artist_play_counts (artist_id); 58 + 59 + -- Add comments explaining the synthetic MBID system 60 + COMMENT ON COLUMN artists_extended.mbid_type IS 'Type of MBID: musicbrainz (real), synthetic (generated), or unknown (legacy data)'; 61 + COMMENT ON COLUMN artists_extended.mbid IS 'MusicBrainz ID (for musicbrainz type) or synthetic UUID (for synthetic type)'; 62 + COMMENT ON VIEW artists_with_type IS 'View that provides human-readable information about artist MBID sources'; 63 + 64 + -- Add a function to generate synthetic MBIDs 65 + CREATE OR REPLACE FUNCTION generate_synthetic_mbid(artist_name TEXT) RETURNS UUID AS $$ 66 + DECLARE 67 + namespace_uuid UUID := '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; -- DNS namespace 68 + result_uuid UUID; 69 + BEGIN 70 + -- Generate deterministic UUID v5 based on artist name 71 + SELECT uuid_generate_v5(namespace_uuid, artist_name) INTO result_uuid; 72 + RETURN result_uuid; 73 + END; 74 + $$ LANGUAGE plpgsql IMMUTABLE; 75 + 76 + COMMENT ON FUNCTION generate_synthetic_mbid IS 'Generates a deterministic UUID v5 for artist names without MusicBrainz IDs';
+101
migrations/20241220000005_fuzzy_matching.sql
···
··· 1 + -- Migration to add fuzzy text matching capabilities 2 + -- This enables better artist name matching using trigram similarity 3 + 4 + -- Enable pg_trgm extension for trigram similarity matching 5 + CREATE EXTENSION IF NOT EXISTS pg_trgm; 6 + 7 + -- Create indexes for efficient trigram matching on artist names 8 + CREATE INDEX idx_artists_extended_name_trgm ON artists_extended USING gin (name gin_trgm_ops); 9 + CREATE INDEX idx_artists_extended_name_normalized_trgm ON artists_extended USING gin (name_normalized gin_trgm_ops); 10 + 11 + -- Create a function to calculate comprehensive artist similarity 12 + CREATE OR REPLACE FUNCTION calculate_artist_similarity( 13 + input_name TEXT, 14 + existing_name TEXT, 15 + input_album TEXT DEFAULT NULL, 16 + existing_album TEXT DEFAULT NULL 17 + ) RETURNS FLOAT AS $$ 18 + DECLARE 19 + name_similarity FLOAT; 20 + album_similarity FLOAT := 0.0; 21 + final_score FLOAT; 22 + BEGIN 23 + -- Calculate trigram similarity for artist names 24 + name_similarity := similarity(LOWER(TRIM(input_name)), LOWER(TRIM(existing_name))); 25 + 26 + -- Boost for exact matches after normalization 27 + IF LOWER(TRIM(regexp_replace(input_name, '[^a-zA-Z0-9\s]', '', 'g'))) = 28 + LOWER(TRIM(regexp_replace(existing_name, '[^a-zA-Z0-9\s]', '', 'g'))) THEN 29 + name_similarity := GREATEST(name_similarity, 0.95); 30 + END IF; 31 + 32 + -- Factor in album similarity if both are provided 33 + IF input_album IS NOT NULL AND existing_album IS NOT NULL THEN 34 + album_similarity := similarity(LOWER(TRIM(input_album)), LOWER(TRIM(existing_album))); 35 + -- Weight: 80% name, 20% album 36 + final_score := (name_similarity * 0.8) + (album_similarity * 0.2); 37 + ELSE 38 + final_score := name_similarity; 39 + END IF; 40 + 41 + RETURN final_score; 42 + END; 43 + $$ LANGUAGE plpgsql IMMUTABLE; 44 + 45 + -- Create a view for fuzzy artist matching with confidence scores 46 + CREATE VIEW fuzzy_artist_matches AS 47 + SELECT DISTINCT 48 + ae1.id as query_artist_id, 49 + ae1.name as query_artist_name, 50 + ae1.mbid_type as query_mbid_type, 51 + ae2.id as match_artist_id, 52 + ae2.name as match_artist_name, 53 + ae2.mbid as match_mbid, 54 + ae2.mbid_type as match_mbid_type, 55 + similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as name_similarity, 56 + CASE 57 + WHEN ae2.mbid_type = 'musicbrainz' THEN 'upgrade_to_mb' 58 + WHEN ae1.mbid_type = 'musicbrainz' AND ae2.mbid_type = 'synthetic' THEN 'consolidate_to_mb' 59 + ELSE 'merge_synthetic' 60 + END as match_action 61 + FROM artists_extended ae1 62 + CROSS JOIN artists_extended ae2 63 + WHERE ae1.id != ae2.id 64 + AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) > 0.8 65 + AND ( 66 + ae1.mbid_type = 'synthetic' OR ae2.mbid_type = 'musicbrainz' 67 + ); 68 + 69 + -- Add comments 70 + COMMENT ON EXTENSION pg_trgm IS 'Trigram extension for fuzzy text matching'; 71 + COMMENT ON INDEX idx_artists_extended_name_trgm IS 'GIN index for trigram similarity on artist names'; 72 + COMMENT ON FUNCTION calculate_artist_similarity IS 'Calculates similarity score between artists considering name and optional album context'; 73 + COMMENT ON VIEW fuzzy_artist_matches IS 'Shows potential artist matches with confidence scores and recommended actions'; 74 + 75 + -- Create a function to suggest artist consolidations 76 + CREATE OR REPLACE FUNCTION suggest_artist_consolidations(min_similarity FLOAT DEFAULT 0.9) 77 + RETURNS TABLE( 78 + action TEXT, 79 + synthetic_artist TEXT, 80 + target_artist TEXT, 81 + similarity_score FLOAT, 82 + synthetic_plays INTEGER, 83 + target_plays INTEGER 84 + ) AS $$ 85 + BEGIN 86 + RETURN QUERY 87 + SELECT 88 + fam.match_action as action, 89 + fam.query_artist_name as synthetic_artist, 90 + fam.match_artist_name as target_artist, 91 + fam.name_similarity as similarity_score, 92 + (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.query_artist_id) as synthetic_plays, 93 + (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.match_artist_id) as target_plays 94 + FROM fuzzy_artist_matches fam 95 + WHERE fam.name_similarity >= min_similarity 96 + AND fam.match_action = 'upgrade_to_mb' 97 + ORDER BY fam.name_similarity DESC, synthetic_plays DESC; 98 + END; 99 + $$ LANGUAGE plpgsql; 100 + 101 + COMMENT ON FUNCTION suggest_artist_consolidations IS 'Returns suggestions for consolidating synthetic artists with MusicBrainz artists based on similarity';
+138
migrations/20241220000006_discriminant_fields.sql
···
··· 1 + -- Migration to add discriminant fields for track and release variants 2 + -- This enables proper handling of different versions while maintaining grouping capabilities 3 + 4 + -- Add discriminant fields to plays table 5 + ALTER TABLE plays ADD COLUMN track_discriminant TEXT; 6 + ALTER TABLE plays ADD COLUMN release_discriminant TEXT; 7 + 8 + -- Add discriminant field to releases table 9 + ALTER TABLE releases ADD COLUMN discriminant TEXT; 10 + 11 + -- Add discriminant field to recordings table 12 + ALTER TABLE recordings ADD COLUMN discriminant TEXT; 13 + 14 + -- Create indexes for efficient searching and filtering 15 + CREATE INDEX idx_plays_track_discriminant ON plays (track_discriminant); 16 + CREATE INDEX idx_plays_release_discriminant ON plays (release_discriminant); 17 + CREATE INDEX idx_releases_discriminant ON releases (discriminant); 18 + CREATE INDEX idx_recordings_discriminant ON recordings (discriminant); 19 + 20 + -- Create composite indexes for grouping by base name + discriminant 21 + CREATE INDEX idx_plays_track_name_discriminant ON plays (track_name, track_discriminant); 22 + CREATE INDEX idx_plays_release_name_discriminant ON plays (release_name, release_discriminant); 23 + 24 + -- Update materialized views to include discriminant information 25 + DROP MATERIALIZED VIEW IF EXISTS mv_release_play_counts; 26 + CREATE MATERIALIZED VIEW mv_release_play_counts AS 27 + SELECT 28 + r.mbid AS release_mbid, 29 + r.name AS release_name, 30 + r.discriminant AS release_discriminant, 31 + COUNT(p.uri) AS play_count 32 + FROM 33 + releases r 34 + LEFT JOIN plays p ON p.release_mbid = r.mbid 35 + GROUP BY 36 + r.mbid, r.name, r.discriminant; 37 + 38 + CREATE UNIQUE INDEX idx_mv_release_play_counts_discriminant ON mv_release_play_counts (release_mbid); 39 + 40 + DROP MATERIALIZED VIEW IF EXISTS mv_recording_play_counts; 41 + CREATE MATERIALIZED VIEW mv_recording_play_counts AS 42 + SELECT 43 + rec.mbid AS recording_mbid, 44 + rec.name AS recording_name, 45 + rec.discriminant AS recording_discriminant, 46 + COUNT(p.uri) AS play_count 47 + FROM 48 + recordings rec 49 + LEFT JOIN plays p ON p.recording_mbid = rec.mbid 50 + GROUP BY 51 + rec.mbid, rec.name, rec.discriminant; 52 + 53 + CREATE UNIQUE INDEX idx_mv_recording_play_counts_discriminant ON mv_recording_play_counts (recording_mbid); 54 + 55 + -- Create views for analyzing track/release variants 56 + CREATE VIEW track_variants AS 57 + SELECT 58 + track_name, 59 + track_discriminant, 60 + COUNT(*) AS play_count, 61 + COUNT(DISTINCT did) AS unique_listeners, 62 + COUNT(DISTINCT recording_mbid) AS unique_recordings 63 + FROM plays 64 + WHERE track_name IS NOT NULL 65 + GROUP BY track_name, track_discriminant 66 + ORDER BY track_name, play_count DESC; 67 + 68 + CREATE VIEW release_variants AS 69 + SELECT 70 + release_name, 71 + release_discriminant, 72 + COUNT(*) AS play_count, 73 + COUNT(DISTINCT did) AS unique_listeners, 74 + COUNT(DISTINCT release_mbid) AS unique_releases 75 + FROM plays 76 + WHERE release_name IS NOT NULL 77 + GROUP BY release_name, release_discriminant 78 + ORDER BY release_name, play_count DESC; 79 + 80 + -- Create function to extract potential discriminants from existing names 81 + CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 82 + DECLARE 83 + discriminant_patterns TEXT[] := ARRAY[ 84 + '\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\)', 85 + '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\]', 86 + '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\}' 87 + ]; 88 + pattern TEXT; 89 + match_result TEXT; 90 + BEGIN 91 + -- Try each pattern to find discriminant information 92 + FOREACH pattern IN ARRAY discriminant_patterns 93 + LOOP 94 + SELECT substring(name_text FROM pattern) INTO match_result; 95 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 96 + RETURN trim(match_result); 97 + END IF; 98 + END LOOP; 99 + 100 + RETURN NULL; 101 + END; 102 + $$ LANGUAGE plpgsql IMMUTABLE; 103 + 104 + -- Create function to get base name without discriminant 105 + CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 106 + DECLARE 107 + cleanup_patterns TEXT[] := ARRAY[ 108 + '\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\)\s*', 109 + '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\]\s*', 110 + '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\}\s*' 111 + ]; 112 + pattern TEXT; 113 + result_text TEXT := name_text; 114 + BEGIN 115 + -- Remove discriminant patterns to get base name 116 + FOREACH pattern IN ARRAY cleanup_patterns 117 + LOOP 118 + result_text := regexp_replace(result_text, pattern, ' ', 'gi'); 119 + END LOOP; 120 + 121 + -- Clean up extra whitespace 122 + result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 123 + 124 + RETURN result_text; 125 + END; 126 + $$ LANGUAGE plpgsql IMMUTABLE; 127 + 128 + -- Add comments explaining the discriminant system 129 + COMMENT ON COLUMN plays.track_discriminant IS 'Distinguishing information for track variants (e.g., "Acoustic Version", "Live at Wembley", "Radio Edit")'; 130 + COMMENT ON COLUMN plays.release_discriminant IS 'Distinguishing information for release variants (e.g., "Deluxe Edition", "Remastered", "2023 Remaster")'; 131 + COMMENT ON COLUMN releases.discriminant IS 'Distinguishing information for release variants to enable proper grouping'; 132 + COMMENT ON COLUMN recordings.discriminant IS 'Distinguishing information for recording variants to enable proper grouping'; 133 + 134 + COMMENT ON VIEW track_variants IS 'Shows all variants of tracks with their play counts and unique listeners'; 135 + COMMENT ON VIEW release_variants IS 'Shows all variants of releases with their play counts and unique listeners'; 136 + 137 + COMMENT ON FUNCTION extract_discriminant IS 'Extracts discriminant information from track/release names for migration purposes'; 138 + COMMENT ON FUNCTION get_base_name IS 'Returns the base name without discriminant information for grouping purposes';
+276
migrations/20241220000007_enhanced_discriminant_extraction.sql
···
··· 1 + -- Enhanced discriminant extraction with comprehensive edition/version patterns 2 + -- This migration improves the auto-population of discriminants for better metadata handling 3 + 4 + -- Drop existing functions to replace them with enhanced versions 5 + DROP FUNCTION IF EXISTS extract_discriminant(TEXT); 6 + DROP FUNCTION IF EXISTS get_base_name(TEXT); 7 + 8 + -- Enhanced function to extract discriminants with comprehensive patterns 9 + CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 10 + DECLARE 11 + -- Comprehensive patterns for discriminant extraction 12 + discriminant_patterns TEXT[] := ARRAY[ 13 + -- Parentheses patterns 14 + '\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)', 15 + '\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)', 16 + '\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)', 17 + '\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)', 18 + '\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)', 19 + 20 + -- Brackets patterns 21 + '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]', 22 + '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]', 23 + '\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]', 24 + '\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]', 25 + '\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]', 26 + 27 + -- Braces patterns 28 + '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}', 29 + '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}', 30 + '\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}', 31 + '\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}', 32 + '\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}', 33 + 34 + -- Dash/hyphen patterns (common for editions) 35 + '[-โ€“โ€”]\s*([^-โ€“โ€”]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$', 36 + '[-โ€“โ€”]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 37 + 38 + -- Colon patterns (common for subtitles and versions) 39 + ':\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$', 40 + ':\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 41 + ]; 42 + 43 + pattern TEXT; 44 + match_result TEXT; 45 + BEGIN 46 + -- Return early if input is null or empty 47 + IF name_text IS NULL OR trim(name_text) = '' THEN 48 + RETURN NULL; 49 + END IF; 50 + 51 + -- Try each pattern to find discriminant information 52 + FOREACH pattern IN ARRAY discriminant_patterns 53 + LOOP 54 + SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result; 55 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 56 + -- Clean up the match result 57 + match_result := trim(match_result); 58 + -- Remove leading/trailing punctuation 59 + match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 60 + -- Ensure it's not just whitespace or empty after cleanup 61 + IF length(trim(match_result)) > 0 THEN 62 + RETURN match_result; 63 + END IF; 64 + END IF; 65 + END LOOP; 66 + 67 + RETURN NULL; 68 + END; 69 + $$ LANGUAGE plpgsql IMMUTABLE; 70 + 71 + -- Enhanced function to get base name without discriminant 72 + CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 73 + DECLARE 74 + -- Comprehensive cleanup patterns matching the extraction patterns 75 + cleanup_patterns TEXT[] := ARRAY[ 76 + -- Remove parentheses content 77 + '\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*', 78 + '\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*', 79 + '\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*', 80 + '\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*', 81 + '\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*', 82 + 83 + -- Remove brackets content 84 + '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*', 85 + '\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*', 86 + '\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*', 87 + '\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*', 88 + '\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*', 89 + 90 + -- Remove braces content 91 + '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*', 92 + '\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*', 93 + '\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*', 94 + '\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*', 95 + '\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*', 96 + 97 + -- Remove dash/hyphen patterns 98 + '\s*[-โ€“โ€”]\s*[^-โ€“โ€”]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$', 99 + '\s*[-โ€“โ€”]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 100 + 101 + -- Remove colon patterns 102 + '\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$', 103 + '\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 104 + ]; 105 + 106 + pattern TEXT; 107 + result_text TEXT := name_text; 108 + BEGIN 109 + -- Return early if input is null or empty 110 + IF name_text IS NULL OR trim(name_text) = '' THEN 111 + RETURN name_text; 112 + END IF; 113 + 114 + -- Remove discriminant patterns to get base name 115 + FOREACH pattern IN ARRAY cleanup_patterns 116 + LOOP 117 + result_text := regexp_replace(result_text, pattern, ' ', 'gi'); 118 + END LOOP; 119 + 120 + -- Clean up extra whitespace and normalize 121 + result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 122 + 123 + -- Remove trailing punctuation that might be left after removal 124 + result_text := regexp_replace(result_text, '[,;:\-โ€“โ€”]\s*$', '', 'g'); 125 + result_text := trim(result_text); 126 + 127 + -- Ensure we don't return an empty string 128 + IF length(result_text) = 0 THEN 129 + RETURN name_text; 130 + END IF; 131 + 132 + RETURN result_text; 133 + END; 134 + $$ LANGUAGE plpgsql IMMUTABLE; 135 + 136 + -- Create function to extract discriminant specifically for editions and versions 137 + CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$ 138 + DECLARE 139 + -- Focused patterns for edition/version extraction 140 + edition_patterns TEXT[] := ARRAY[ 141 + -- Edition patterns 142 + '\(([^)]*edition[^)]*)\)', 143 + '\[([^]]*edition[^]]*)\]', 144 + '\{([^}]*edition[^}]*)\}', 145 + '[-โ€“โ€”]\s*([^-โ€“โ€”]*edition[^-โ€“โ€”]*)$', 146 + ':\s*([^:]*edition[^:]*)$', 147 + 148 + -- Version patterns 149 + '\(([^)]*version[^)]*)\)', 150 + '\[([^]]*version[^]]*)\]', 151 + '\{([^}]*version[^}]*)\}', 152 + '[-โ€“โ€”]\s*([^-โ€“โ€”]*version[^-โ€“โ€”]*)$', 153 + ':\s*([^:]*version[^:]*)$', 154 + 155 + -- Remaster patterns 156 + '\(([^)]*remaster[^)]*)\)', 157 + '\[([^]]*remaster[^]]*)\]', 158 + '\{([^}]*remaster[^}]*)\}', 159 + '[-โ€“โ€”]\s*([^-โ€“โ€”]*remaster[^-โ€“โ€”]*)$', 160 + ':\s*([^:]*remaster[^:]*)$', 161 + 162 + -- Year-based patterns 163 + '\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)', 164 + '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]', 165 + '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}' 166 + ]; 167 + 168 + pattern TEXT; 169 + match_result TEXT; 170 + BEGIN 171 + -- Return early if input is null or empty 172 + IF name_text IS NULL OR trim(name_text) = '' THEN 173 + RETURN NULL; 174 + END IF; 175 + 176 + -- Try edition-specific patterns first 177 + FOREACH pattern IN ARRAY edition_patterns 178 + LOOP 179 + SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result; 180 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 181 + match_result := trim(match_result); 182 + match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 183 + IF length(trim(match_result)) > 0 THEN 184 + RETURN match_result; 185 + END IF; 186 + END IF; 187 + END LOOP; 188 + 189 + RETURN NULL; 190 + END; 191 + $$ LANGUAGE plpgsql IMMUTABLE; 192 + 193 + -- Update recordings table to populate discriminants from existing names 194 + UPDATE recordings 195 + SET discriminant = extract_discriminant(name) 196 + WHERE discriminant IS NULL 197 + AND extract_discriminant(name) IS NOT NULL; 198 + 199 + -- Update releases table to populate discriminants from existing names 200 + UPDATE releases 201 + SET discriminant = extract_discriminant(name) 202 + WHERE discriminant IS NULL 203 + AND extract_discriminant(name) IS NOT NULL; 204 + 205 + -- Update plays table to populate discriminants from existing names where not already set 206 + UPDATE plays 207 + SET track_discriminant = extract_discriminant(track_name) 208 + WHERE track_discriminant IS NULL 209 + AND extract_discriminant(track_name) IS NOT NULL; 210 + 211 + UPDATE plays 212 + SET release_discriminant = extract_discriminant(release_name) 213 + WHERE release_discriminant IS NULL 214 + AND release_name IS NOT NULL 215 + AND extract_discriminant(release_name) IS NOT NULL; 216 + 217 + -- Create indexes for efficient discriminant queries 218 + CREATE INDEX IF NOT EXISTS idx_recordings_name_discriminant ON recordings (name, discriminant); 219 + CREATE INDEX IF NOT EXISTS idx_releases_name_discriminant ON releases (name, discriminant); 220 + 221 + -- Add comments for the new function 222 + COMMENT ON FUNCTION extract_discriminant IS 'Enhanced discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons'; 223 + COMMENT ON FUNCTION get_base_name IS 'Enhanced base name extraction removing comprehensive discriminant patterns to enable proper grouping'; 224 + COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized function for extracting edition and version discriminants with focused patterns'; 225 + 226 + -- Create a view to show discriminant extraction results for analysis 227 + CREATE OR REPLACE VIEW discriminant_analysis AS 228 + SELECT 229 + 'recordings' as table_name, 230 + name as original_name, 231 + discriminant, 232 + get_base_name(name) as base_name, 233 + extract_discriminant(name) as extracted_discriminant, 234 + extract_edition_discriminant(name) as edition_discriminant 235 + FROM recordings 236 + WHERE name IS NOT NULL 237 + UNION ALL 238 + SELECT 239 + 'releases' as table_name, 240 + name as original_name, 241 + discriminant, 242 + get_base_name(name) as base_name, 243 + extract_discriminant(name) as extracted_discriminant, 244 + extract_edition_discriminant(name) as edition_discriminant 245 + FROM releases 246 + WHERE name IS NOT NULL; 247 + 248 + COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing discriminant extraction results for quality assessment and debugging'; 249 + 250 + -- Refresh materialized views to include discriminant information 251 + REFRESH MATERIALIZED VIEW mv_release_play_counts; 252 + REFRESH MATERIALIZED VIEW mv_recording_play_counts; 253 + 254 + -- Create summary statistics for discriminant usage 255 + CREATE OR REPLACE VIEW discriminant_stats AS 256 + SELECT 257 + 'recordings' as entity_type, 258 + COUNT(*) as total_count, 259 + COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant, 260 + COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant, 261 + ROUND( 262 + COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2 263 + ) as discriminant_percentage 264 + FROM recordings 265 + UNION ALL 266 + SELECT 267 + 'releases' as entity_type, 268 + COUNT(*) as total_count, 269 + COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant, 270 + COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant, 271 + ROUND( 272 + COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2 273 + ) as discriminant_percentage 274 + FROM releases; 275 + 276 + COMMENT ON VIEW discriminant_stats IS 'Statistics showing discriminant usage and extraction potential across entity types';
+252
migrations/20241220000008_fix_discriminant_case_sensitivity.sql
···
··· 1 + -- Fix case sensitivity in discriminant extraction patterns 2 + -- This migration updates the discriminant extraction functions to properly handle case-insensitive matching 3 + 4 + -- Drop dependent views first, then functions, then recreate everything 5 + DROP VIEW IF EXISTS discriminant_analysis CASCADE; 6 + DROP VIEW IF EXISTS discriminant_stats CASCADE; 7 + 8 + -- Drop existing functions to replace with case-insensitive versions 9 + DROP FUNCTION IF EXISTS extract_discriminant(TEXT) CASCADE; 10 + DROP FUNCTION IF EXISTS get_base_name(TEXT) CASCADE; 11 + DROP FUNCTION IF EXISTS extract_edition_discriminant(TEXT) CASCADE; 12 + 13 + -- Enhanced function to extract discriminants with case-insensitive matching 14 + CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 15 + DECLARE 16 + -- Comprehensive patterns for discriminant extraction with case-insensitive flags 17 + discriminant_patterns TEXT[] := ARRAY[ 18 + -- Parentheses patterns 19 + '(?i)\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)', 20 + '(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)', 21 + '(?i)\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)', 22 + '(?i)\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)', 23 + '(?i)\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)', 24 + 25 + -- Brackets patterns 26 + '(?i)\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]', 27 + '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]', 28 + '(?i)\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]', 29 + '(?i)\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]', 30 + '(?i)\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]', 31 + 32 + -- Braces patterns 33 + '(?i)\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}', 34 + '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}', 35 + '(?i)\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}', 36 + '(?i)\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}', 37 + '(?i)\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}', 38 + 39 + -- Dash/hyphen patterns (common for editions) 40 + '(?i)[-โ€“โ€”]\s*([^-โ€“โ€”]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$', 41 + '(?i)[-โ€“โ€”]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 42 + 43 + -- Colon patterns (common for subtitles and versions) 44 + '(?i):\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$', 45 + '(?i):\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 46 + ]; 47 + 48 + pattern TEXT; 49 + match_result TEXT; 50 + BEGIN 51 + -- Return early if input is null or empty 52 + IF name_text IS NULL OR trim(name_text) = '' THEN 53 + RETURN NULL; 54 + END IF; 55 + 56 + -- Try each pattern to find discriminant information 57 + FOREACH pattern IN ARRAY discriminant_patterns 58 + LOOP 59 + SELECT substring(name_text FROM pattern) INTO match_result; 60 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 61 + -- Clean up the match result 62 + match_result := trim(match_result); 63 + -- Remove leading/trailing punctuation 64 + match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 65 + -- Ensure it's not just whitespace or empty after cleanup 66 + IF length(trim(match_result)) > 0 THEN 67 + RETURN match_result; 68 + END IF; 69 + END IF; 70 + END LOOP; 71 + 72 + RETURN NULL; 73 + END; 74 + $$ LANGUAGE plpgsql IMMUTABLE; 75 + 76 + -- Enhanced function to get base name without discriminant with case-insensitive matching 77 + CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 78 + DECLARE 79 + -- Comprehensive cleanup patterns matching the extraction patterns 80 + cleanup_patterns TEXT[] := ARRAY[ 81 + -- Remove parentheses content 82 + '(?i)\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*', 83 + '(?i)\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*', 84 + '(?i)\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*', 85 + '(?i)\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*', 86 + '(?i)\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*', 87 + 88 + -- Remove brackets content 89 + '(?i)\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*', 90 + '(?i)\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*', 91 + '(?i)\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*', 92 + '(?i)\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*', 93 + '(?i)\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*', 94 + 95 + -- Remove braces content 96 + '(?i)\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*', 97 + '(?i)\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*', 98 + '(?i)\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*', 99 + '(?i)\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*', 100 + '(?i)\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*', 101 + 102 + -- Remove dash/hyphen patterns 103 + '(?i)\s*[-โ€“โ€”]\s*[^-โ€“โ€”]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$', 104 + '(?i)\s*[-โ€“โ€”]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 105 + 106 + -- Remove colon patterns 107 + '(?i)\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$', 108 + '(?i)\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 109 + ]; 110 + 111 + pattern TEXT; 112 + result_text TEXT := name_text; 113 + BEGIN 114 + -- Return early if input is null or empty 115 + IF name_text IS NULL OR trim(name_text) = '' THEN 116 + RETURN name_text; 117 + END IF; 118 + 119 + -- Remove discriminant patterns to get base name 120 + FOREACH pattern IN ARRAY cleanup_patterns 121 + LOOP 122 + result_text := regexp_replace(result_text, pattern, ' ', 'g'); 123 + END LOOP; 124 + 125 + -- Clean up extra whitespace and normalize 126 + result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 127 + 128 + -- Remove trailing punctuation that might be left after removal 129 + result_text := regexp_replace(result_text, '[,;:\-โ€“โ€”]\s*$', '', 'g'); 130 + result_text := trim(result_text); 131 + 132 + -- Ensure we don't return an empty string 133 + IF length(result_text) = 0 THEN 134 + RETURN name_text; 135 + END IF; 136 + 137 + RETURN result_text; 138 + END; 139 + $$ LANGUAGE plpgsql IMMUTABLE; 140 + 141 + -- Enhanced function to extract discriminant specifically for editions and versions with case-insensitive matching 142 + CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$ 143 + DECLARE 144 + -- Focused patterns for edition/version extraction with case-insensitive flags 145 + edition_patterns TEXT[] := ARRAY[ 146 + -- Edition patterns 147 + '(?i)\(([^)]*edition[^)]*)\)', 148 + '(?i)\[([^]]*edition[^]]*)\]', 149 + '(?i)\{([^}]*edition[^}]*)\}', 150 + '(?i)[-โ€“โ€”]\s*([^-โ€“โ€”]*edition[^-โ€“โ€”]*)$', 151 + '(?i):\s*([^:]*edition[^:]*)$', 152 + 153 + -- Version patterns 154 + '(?i)\(([^)]*version[^)]*)\)', 155 + '(?i)\[([^]]*version[^]]*)\]', 156 + '(?i)\{([^}]*version[^}]*)\}', 157 + '(?i)[-โ€“โ€”]\s*([^-โ€“โ€”]*version[^-โ€“โ€”]*)$', 158 + '(?i):\s*([^:]*version[^:]*)$', 159 + 160 + -- Remaster patterns 161 + '(?i)\(([^)]*remaster[^)]*)\)', 162 + '(?i)\[([^]]*remaster[^]]*)\]', 163 + '(?i)\{([^}]*remaster[^}]*)\}', 164 + '(?i)[-โ€“โ€”]\s*([^-โ€“โ€”]*remaster[^-โ€“โ€”]*)$', 165 + '(?i):\s*([^:]*remaster[^:]*)$', 166 + 167 + -- Year-based patterns 168 + '(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)', 169 + '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]', 170 + '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}' 171 + ]; 172 + 173 + pattern TEXT; 174 + match_result TEXT; 175 + BEGIN 176 + -- Return early if input is null or empty 177 + IF name_text IS NULL OR trim(name_text) = '' THEN 178 + RETURN NULL; 179 + END IF; 180 + 181 + -- Try edition-specific patterns first 182 + FOREACH pattern IN ARRAY edition_patterns 183 + LOOP 184 + SELECT substring(name_text FROM pattern) INTO match_result; 185 + IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 186 + match_result := trim(match_result); 187 + match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 188 + IF length(trim(match_result)) > 0 THEN 189 + RETURN match_result; 190 + END IF; 191 + END IF; 192 + END LOOP; 193 + 194 + RETURN NULL; 195 + END; 196 + $$ LANGUAGE plpgsql IMMUTABLE; 197 + 198 + -- Update existing records with newly extracted discriminants (case-insensitive) 199 + UPDATE recordings 200 + SET discriminant = extract_discriminant(name) 201 + WHERE discriminant IS NULL 202 + AND extract_discriminant(name) IS NOT NULL; 203 + 204 + UPDATE releases 205 + SET discriminant = extract_discriminant(name) 206 + WHERE discriminant IS NULL 207 + AND extract_discriminant(name) IS NOT NULL; 208 + 209 + UPDATE plays 210 + SET track_discriminant = extract_discriminant(track_name) 211 + WHERE track_discriminant IS NULL 212 + AND extract_discriminant(track_name) IS NOT NULL; 213 + 214 + UPDATE plays 215 + SET release_discriminant = extract_discriminant(release_name) 216 + WHERE release_discriminant IS NULL 217 + AND release_name IS NOT NULL 218 + AND extract_discriminant(release_name) IS NOT NULL; 219 + 220 + -- Update comments for the enhanced functions 221 + COMMENT ON FUNCTION extract_discriminant IS 'Enhanced case-insensitive discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons'; 222 + COMMENT ON FUNCTION get_base_name IS 'Enhanced case-insensitive base name extraction removing comprehensive discriminant patterns to enable proper grouping'; 223 + COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized case-insensitive function for extracting edition and version discriminants with focused patterns'; 224 + 225 + -- Refresh materialized views to reflect the case-insensitive improvements 226 + REFRESH MATERIALIZED VIEW mv_release_play_counts; 227 + REFRESH MATERIALIZED VIEW mv_recording_play_counts; 228 + 229 + -- Update discriminant analysis view to include case-insensitive results 230 + DROP VIEW IF EXISTS discriminant_analysis; 231 + CREATE OR REPLACE VIEW discriminant_analysis AS 232 + SELECT 233 + 'recordings' as table_name, 234 + name as original_name, 235 + discriminant, 236 + get_base_name(name) as base_name, 237 + extract_discriminant(name) as extracted_discriminant, 238 + extract_edition_discriminant(name) as edition_discriminant 239 + FROM recordings 240 + WHERE name IS NOT NULL 241 + UNION ALL 242 + SELECT 243 + 'releases' as table_name, 244 + name as original_name, 245 + discriminant, 246 + get_base_name(name) as base_name, 247 + extract_discriminant(name) as extracted_discriminant, 248 + extract_edition_discriminant(name) as edition_discriminant 249 + FROM releases 250 + WHERE name IS NOT NULL; 251 + 252 + COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing case-insensitive discriminant extraction results for quality assessment and debugging';
+12 -3
package.json
··· 7 "dev": "turbo dev", 8 "build": "pnpm turbo run build --filter='./packages/*' --filter='./apps/*'", 9 "build:rust": "turbo run build:rust", 10 - "typecheck": "pnpm -r exec tsc --noEmit", 11 "test": "turbo run test test:rust", 12 - "rust:fmt": "cd services && cargo fmt", 13 - "rust:clippy": "cd services && cargo clippy", 14 "fix": "biome lint --apply . && biome format --write . && biome check . --apply", 15 "nuke": "rimraf node_modules */*/node_modules", 16 "lex:gen-server": "turbo lex:gen-server", 17 "format": "prettier --write .", ··· 19 "lex:watch": "cd tools/lexicon-cli && node dist/index.js watch", 20 "lex:validate": "cd tools/lexicon-cli && node dist/index.js validate", 21 "lex:diff": "cd tools/lexicon-cli && node dist/index.js diff", 22 "db:migrate": "cd services && sqlx migrate run", 23 "db:migrate:revert": "cd services && sqlx migrate revert", 24 "db:create": "cd services && sqlx database create",
··· 7 "dev": "turbo dev", 8 "build": "pnpm turbo run build --filter='./packages/*' --filter='./apps/*'", 9 "build:rust": "turbo run build:rust", 10 + "typecheck": "pnpm -r --filter='!./vendor/*' exec tsc --noEmit", 11 "test": "turbo run test test:rust", 12 + "rust:fmt": "pnpm rust:fmt:services && pnpm rust:fmt:apps", 13 + "rust:clippy": "pnpm rust:clippy:services && pnpm rust:clippy:apps", 14 + "rust:fmt:services": "cd services && cargo fmt", 15 + "rust:clippy:services": "cd services && cargo clippy -- -D warnings", 16 + "rust:fmt:apps": "for dir in apps/*/; do if [ -f \"$dir/Cargo.toml\" ]; then echo \"Formatting $dir\" && cd \"$dir\" && cargo fmt && cd ../..; fi; done", 17 + "rust:clippy:apps": "for dir in apps/*/; do if [ -f \"$dir/Cargo.toml\" ]; then echo \"Linting $dir\" && cd \"$dir\" && cargo clippy -- -D warnings && cd ../..; fi; done", 18 "fix": "biome lint --apply . && biome format --write . && biome check . --apply", 19 + "hooks:install": "./scripts/install-git-hooks.sh", 20 + "hooks:install-precommit": "pre-commit install", 21 + "postinstall": "pnpm lex:gen-server", 22 "nuke": "rimraf node_modules */*/node_modules", 23 "lex:gen-server": "turbo lex:gen-server", 24 "format": "prettier --write .", ··· 26 "lex:watch": "cd tools/lexicon-cli && node dist/index.js watch", 27 "lex:validate": "cd tools/lexicon-cli && node dist/index.js validate", 28 "lex:diff": "cd tools/lexicon-cli && node dist/index.js diff", 29 + "lex:build-amethyst": "pnpm lex:gen-server && pnpm turbo build --filter=@teal/amethyst", 30 + "lex:dev": "pnpm lex:gen-server && pnpm turbo dev --filter=@teal/amethyst", 31 "db:migrate": "cd services && sqlx migrate run", 32 "db:migrate:revert": "cd services && sqlx migrate revert", 33 "db:create": "cd services && sqlx database create",
+25
packages/lexicons/lex-gen.sh
···
··· 1 + #!/bin/bash 2 + set -e 3 + 4 + # Navigate to the lexicons directory and find all .json files 5 + cd ../../lexicons 6 + json_files=$(find . -name "*.json" -type f) 7 + 8 + # Go back to the lexicons package directory 9 + cd ../packages/lexicons 10 + 11 + # Check if we found any lexicon files 12 + if [ -z "$json_files" ]; then 13 + echo "No lexicon files found in ../../lexicons/" 14 + exit 1 15 + fi 16 + 17 + # Convert the file list to absolute paths 18 + lexicon_paths="" 19 + for file in $json_files; do 20 + lexicon_paths="$lexicon_paths ../../lexicons/$file" 21 + done 22 + 23 + # Generate lexicons 24 + echo "Generating lexicons from: $lexicon_paths" 25 + lex gen-server ./src $lexicon_paths --yes
+14
packages/lexicons/package.json
···
··· 1 + { 2 + "name": "@teal/lexicons", 3 + "type": "module", 4 + "main": "./index.ts", 5 + "dependencies": { 6 + "@atproto/lex-cli": "^0.5.4", 7 + "@atproto/lexicon": "^0.4.2", 8 + "@atproto/xrpc-server": "^0.7.4", 9 + "@teal/tsconfig": "workspace:*" 10 + }, 11 + "scripts": { 12 + "lex:gen-server": "bash ./lex-gen.sh" 13 + } 14 + }
-4
pnpm-lock.yaml
··· 254 255 services/cadet: {} 256 257 - services/rocketman: {} 258 - 259 services/satellite: {} 260 - 261 - services/types: {} 262 263 tools/lexicon-cli: 264 dependencies:
··· 254 255 services/cadet: {} 256 257 services/satellite: {} 258 259 tools/lexicon-cli: 260 dependencies:
+100
scripts/install-git-hooks.sh
···
··· 1 + #!/bin/bash 2 + 3 + # Install git hooks for the Teal project 4 + # This script sets up pre-commit hooks for code formatting and linting 5 + 6 + set -e 7 + 8 + # Colors for output 9 + RED='\033[0;31m' 10 + GREEN='\033[0;32m' 11 + YELLOW='\033[1;33m' 12 + BLUE='\033[0;34m' 13 + NC='\033[0m' # No Color 14 + 15 + print_status() { 16 + echo -e "${BLUE}[INFO]${NC} $1" 17 + } 18 + 19 + print_success() { 20 + echo -e "${GREEN}[SUCCESS]${NC} $1" 21 + } 22 + 23 + print_error() { 24 + echo -e "${RED}[ERROR]${NC} $1" 25 + } 26 + 27 + print_warning() { 28 + echo -e "${YELLOW}[WARNING]${NC} $1" 29 + } 30 + 31 + # Check if we're in a git repository 32 + if [ ! -d ".git" ]; then 33 + print_error "This script must be run from the root of a git repository" 34 + exit 1 35 + fi 36 + 37 + print_status "Installing git hooks for Teal project..." 38 + 39 + # Create hooks directory if it doesn't exist 40 + mkdir -p .git/hooks 41 + 42 + # Install pre-commit hook 43 + if [ -f "scripts/pre-commit-hook.sh" ]; then 44 + print_status "Installing pre-commit hook..." 45 + cp scripts/pre-commit-hook.sh .git/hooks/pre-commit 46 + chmod +x .git/hooks/pre-commit 47 + print_success "Pre-commit hook installed" 48 + else 49 + print_error "Pre-commit hook script not found at scripts/pre-commit-hook.sh" 50 + exit 1 51 + fi 52 + 53 + # Optional: Install other hooks 54 + # You can add more hooks here if needed 55 + 56 + print_status "Testing hook installation..." 57 + 58 + # Test if the hook is executable 59 + if [ -x ".git/hooks/pre-commit" ]; then 60 + print_success "Pre-commit hook is executable" 61 + else 62 + print_error "Pre-commit hook is not executable" 63 + exit 1 64 + fi 65 + 66 + # Check if required tools are available 67 + print_status "Checking required tools..." 68 + 69 + MISSING_TOOLS="" 70 + 71 + if ! command -v pnpm >/dev/null 2>&1; then 72 + MISSING_TOOLS="$MISSING_TOOLS pnpm" 73 + fi 74 + 75 + if ! command -v node >/dev/null 2>&1; then 76 + MISSING_TOOLS="$MISSING_TOOLS node" 77 + fi 78 + 79 + if ! command -v cargo >/dev/null 2>&1; then 80 + MISSING_TOOLS="$MISSING_TOOLS cargo" 81 + fi 82 + 83 + if [ -n "$MISSING_TOOLS" ]; then 84 + print_warning "Some tools are missing:$MISSING_TOOLS" 85 + print_warning "The git hooks may not work properly without these tools" 86 + else 87 + print_success "All required tools are available" 88 + fi 89 + 90 + print_success "Git hooks installation complete! ๐ŸŽ‰" 91 + print_status "The following hooks have been installed:" 92 + echo " - pre-commit: Runs formatting and linting checks before commits" 93 + 94 + print_status "To test the pre-commit hook, try making a commit with staged files" 95 + print_status "To temporarily skip hooks, use: git commit --no-verify" 96 + 97 + # Optional: Show hook status 98 + echo "" 99 + print_status "Installed hooks:" 100 + ls -la .git/hooks/ | grep -v sample | grep -v "^d" | sed 's/^/ /'
+213
scripts/pre-commit-hook.sh
···
··· 1 + #!/bin/bash 2 + 3 + # Pre-commit hook for Teal project 4 + # This script runs code formatting and linting checks before allowing commits 5 + 6 + set -e 7 + 8 + echo "๐Ÿ” Running pre-commit checks..." 9 + 10 + # Colors for output 11 + RED='\033[0;31m' 12 + GREEN='\033[0;32m' 13 + YELLOW='\033[1;33m' 14 + BLUE='\033[0;34m' 15 + NC='\033[0m' # No Color 16 + 17 + # Function to print colored output 18 + print_status() { 19 + echo -e "${BLUE}[INFO]${NC} $1" 20 + } 21 + 22 + print_success() { 23 + echo -e "${GREEN}[SUCCESS]${NC} $1" 24 + } 25 + 26 + print_warning() { 27 + echo -e "${YELLOW}[WARNING]${NC} $1" 28 + } 29 + 30 + print_error() { 31 + echo -e "${RED}[ERROR]${NC} $1" 32 + } 33 + 34 + # Get list of staged files 35 + STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM) 36 + 37 + if [ -z "$STAGED_FILES" ]; then 38 + print_warning "No staged files found" 39 + exit 0 40 + fi 41 + 42 + # Check if we have TypeScript/JavaScript files 43 + TS_JS_FILES=$(echo "$STAGED_FILES" | grep -E '\.(ts|tsx|js|jsx)$' || true) 44 + # Check if we have Rust files 45 + RUST_FILES=$(echo "$STAGED_FILES" | grep -E '\.rs$' || true) 46 + # Check if we have lexicon files 47 + LEXICON_FILES=$(echo "$STAGED_FILES" | grep -E 'lexicons/.*\.json$' || true) 48 + 49 + print_status "Staged files to check:" 50 + echo "$STAGED_FILES" | sed 's/^/ - /' 51 + 52 + # 1. TypeScript/JavaScript checks 53 + if [ -n "$TS_JS_FILES" ]; then 54 + print_status "Running TypeScript/JavaScript checks..." 55 + 56 + # Check if biome is available and run it 57 + if command -v pnpm >/dev/null 2>&1; then 58 + print_status "Running Biome formatting and linting..." 59 + if ! pnpm biome check . --apply --no-errors-on-unmatched 2>/dev/null; then 60 + print_error "Biome check failed. Please fix the issues and try again." 61 + exit 1 62 + fi 63 + 64 + print_status "Running Prettier formatting..." 65 + if ! pnpm prettier --write $TS_JS_FILES 2>/dev/null; then 66 + print_error "Prettier formatting failed. Please fix the issues and try again." 67 + exit 1 68 + fi 69 + 70 + # TypeScript checking temporarily disabled due to vendor compilation issues 71 + # Re-enable once vendor code is fixed 72 + else 73 + print_warning "pnpm not found, skipping JS/TS checks" 74 + fi 75 + fi 76 + 77 + # 2. Rust checks 78 + if [ -n "$RUST_FILES" ]; then 79 + print_status "Running Rust checks..." 80 + 81 + if command -v cargo >/dev/null 2>&1; then 82 + RUST_ERRORS=0 83 + 84 + # Check services workspace 85 + if [ -f "services/Cargo.toml" ]; then 86 + print_status "Running cargo fmt on services workspace..." 87 + if ! (cd services && cargo fmt --check) 2>/dev/null; then 88 + print_status "Auto-formatting Rust code in services..." 89 + (cd services && cargo fmt) 2>/dev/null || true 90 + fi 91 + 92 + print_status "Running cargo clippy on services workspace..." 93 + if (cd services && cargo check); then 94 + if ! (cd services && cargo clippy -- -D warnings); then 95 + print_warning "Cargo clippy found issues in services workspace. Please fix the warnings." 96 + print_warning "Run 'pnpm rust:clippy:services' to see detailed errors." 97 + # Don't fail the commit for clippy warnings, just warn 98 + fi 99 + else 100 + print_warning "Services workspace has compilation errors. Skipping clippy." 101 + print_warning "Run 'pnpm rust:clippy:services' to see detailed errors." 102 + fi 103 + fi 104 + 105 + # Check individual Rust projects outside services 106 + CHECKED_DIRS="" 107 + for rust_file in $RUST_FILES; do 108 + rust_dir=$(dirname "$rust_file") 109 + # Find the nearest Cargo.toml going up the directory tree 110 + check_dir="$rust_dir" 111 + while [ "$check_dir" != "." ] && [ "$check_dir" != "/" ]; do 112 + if [ -f "$check_dir/Cargo.toml" ] && [ "$check_dir" != "services" ]; then 113 + # Skip if we already checked this directory 114 + if echo "$CHECKED_DIRS" | grep -q "$check_dir"; then 115 + break 116 + fi 117 + CHECKED_DIRS="$CHECKED_DIRS $check_dir" 118 + 119 + # Found a Cargo.toml outside services workspace 120 + print_status "Running cargo fmt on $check_dir..." 121 + if ! (cd "$check_dir" && cargo fmt --check) 2>/dev/null; then 122 + print_status "Auto-formatting Rust code in $check_dir..." 123 + (cd "$check_dir" && cargo fmt) 2>/dev/null || true 124 + fi 125 + 126 + print_status "Running cargo clippy on $check_dir..." 127 + if (cd "$check_dir" && cargo check); then 128 + if ! (cd "$check_dir" && cargo clippy -- -D warnings); then 129 + print_error "Cargo clippy found issues in $check_dir. Please fix the warnings and try again." 130 + RUST_ERRORS=1 131 + fi 132 + else 133 + print_warning "Project $check_dir has compilation errors. Skipping clippy." 134 + print_warning "Run 'cd $check_dir && cargo check' to see detailed errors." 135 + fi 136 + break 137 + fi 138 + check_dir=$(dirname "$check_dir") 139 + done 140 + done 141 + 142 + if [ $RUST_ERRORS -eq 1 ]; then 143 + exit 1 144 + fi 145 + else 146 + print_warning "Cargo not found, skipping Rust checks" 147 + fi 148 + fi 149 + 150 + # 3. Lexicon checks 151 + if [ -n "$LEXICON_FILES" ]; then 152 + print_status "Lexicon files changed, validating and regenerating..." 153 + 154 + if command -v pnpm >/dev/null 2>&1; then 155 + print_status "Validating lexicons..." 156 + if ! pnpm lex:validate 2>/dev/null; then 157 + print_error "Lexicon validation failed. Please fix the lexicon files and try again." 158 + exit 1 159 + fi 160 + 161 + print_status "Regenerating lexicons..." 162 + if ! pnpm lex:gen-server 2>/dev/null; then 163 + print_error "Lexicon generation failed. Please check the lexicon files and try again." 164 + exit 1 165 + fi 166 + 167 + # Note: Generated lexicon files are ignored by .gitignore and not added to staging 168 + print_status "Generated lexicon files are ignored by .gitignore (as intended)" 169 + else 170 + print_warning "pnpm not found, skipping lexicon checks" 171 + fi 172 + fi 173 + 174 + # 4. Re-add files that might have been formatted 175 + FORMATTED_FILES="" 176 + for file in $STAGED_FILES; do 177 + if [ -f "$file" ]; then 178 + # Check if file was modified by formatters 179 + if [ -n "$(git diff "$file")" ]; then 180 + FORMATTED_FILES="$FORMATTED_FILES $file" 181 + git add "$file" 182 + fi 183 + fi 184 + done 185 + 186 + if [ -n "$FORMATTED_FILES" ]; then 187 + print_success "Auto-formatted files have been re-staged:" 188 + echo "$FORMATTED_FILES" | tr ' ' '\n' | sed 's/^/ - /' 189 + fi 190 + 191 + # 5. Final validation - ensure no syntax errors in staged files 192 + print_status "Running final validation..." 193 + 194 + # Check for common issues 195 + for file in $TS_JS_FILES; do 196 + if [ -f "$file" ]; then 197 + # Check for console.log statements (optional - remove if you want to allow them) 198 + if grep -n "console\.log" "$file" >/dev/null 2>&1; then 199 + print_warning "Found console.log statements in $file! yooo!!!" 200 + # Uncomment the next two lines if you want to block commits with console.log 201 + # print_error "Please remove console.log statements before committing" 202 + # exit 1 203 + fi 204 + 205 + # Check for TODO/FIXME comments in committed code (optional) 206 + if grep -n -i "TODO\|FIXME" "$file" >/dev/null 2>&1; then 207 + print_warning "Found TODO/FIXME comments in $file" 208 + fi 209 + fi 210 + done 211 + 212 + print_success "All pre-commit checks passed! ๐ŸŽ‰" 213 + exit 0
+66
scripts/setup-lexicons.sh
···
··· 1 + #!/bin/bash 2 + # scripts/setup-lexicons.sh 3 + # Setup script for ATProto lexicons submodule and symbolic links 4 + 5 + set -e 6 + 7 + echo "Setting up lexicons..." 8 + 9 + # Check if we're in the right directory 10 + if [ ! -f "package.json" ] || [ ! -d "lexicons" ]; then 11 + echo "Error: This script must be run from the project root directory" 12 + exit 1 13 + fi 14 + 15 + # Initialize submodules 16 + echo "Initializing submodules..." 17 + git submodule update --init --recursive 18 + 19 + # Check if vendor/atproto exists 20 + if [ ! -d "vendor/atproto" ]; then 21 + echo "Error: vendor/atproto submodule not found" 22 + exit 1 23 + fi 24 + 25 + # Create symbolic links if they don't exist 26 + echo "Creating symbolic links..." 27 + cd lexicons 28 + 29 + if [ ! -L app ]; then 30 + ln -s ../vendor/atproto/lexicons/app app 31 + echo "Created symlink: lexicons/app" 32 + else 33 + echo "Symlink already exists: lexicons/app" 34 + fi 35 + 36 + if [ ! -L chat ]; then 37 + ln -s ../vendor/atproto/lexicons/chat chat 38 + echo "Created symlink: lexicons/chat" 39 + else 40 + echo "Symlink already exists: lexicons/chat" 41 + fi 42 + 43 + if [ ! -L com ]; then 44 + ln -s ../vendor/atproto/lexicons/com com 45 + echo "Created symlink: lexicons/com" 46 + else 47 + echo "Symlink already exists: lexicons/com" 48 + fi 49 + 50 + if [ ! -L tools ]; then 51 + ln -s ../vendor/atproto/lexicons/tools tools 52 + echo "Created symlink: lexicons/tools" 53 + else 54 + echo "Symlink already exists: lexicons/tools" 55 + fi 56 + 57 + cd .. 58 + 59 + echo "Lexicons setup complete!" 60 + echo "" 61 + echo "You should now have access to:" 62 + echo " - lexicons/app -> ATProto app lexicons" 63 + echo " - lexicons/chat -> ATProto chat lexicons" 64 + echo " - lexicons/com -> ATProto protocol lexicons" 65 + echo " - lexicons/tools -> ATProto tools lexicons" 66 + echo " - lexicons/fm.teal.alpha -> Custom Teal lexicons"
+69
scripts/setup-sqlx-offline.sh
···
··· 1 + #!/bin/bash 2 + 3 + # Script to copy .sqlx files to all Rust projects that use SQLx 4 + # This is needed for offline SQLx builds (SQLX_OFFLINE=true) 5 + 6 + set -e 7 + 8 + # Get the script directory (should be in teal/scripts/) 9 + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 10 + PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" 11 + 12 + # Source .sqlx directory 13 + SQLX_SOURCE="$PROJECT_ROOT/.sqlx" 14 + 15 + # List of projects that use SQLx (relative to project root) 16 + SQLX_PROJECTS=( 17 + "apps/aqua" 18 + "services/cadet" 19 + "services/satellite" 20 + ) 21 + 22 + echo "๐Ÿ”ง Setting up SQLx offline files..." 23 + 24 + # Check if source .sqlx directory exists 25 + if [ ! -d "$SQLX_SOURCE" ]; then 26 + echo "โŒ Source .sqlx directory not found at: $SQLX_SOURCE" 27 + echo " Make sure you've run 'cargo sqlx prepare' from the services directory first." 28 + exit 1 29 + fi 30 + 31 + # Copy .sqlx files to each project that needs them 32 + for project in "${SQLX_PROJECTS[@]}"; do 33 + project_path="$PROJECT_ROOT/$project" 34 + target_sqlx="$project_path/.sqlx" 35 + 36 + if [ ! -d "$project_path" ]; then 37 + echo "โš ๏ธ Project directory not found: $project_path (skipping)" 38 + continue 39 + fi 40 + 41 + # Check if project actually uses SQLx 42 + if [ ! -f "$project_path/Cargo.toml" ]; then 43 + echo "โš ๏ธ No Cargo.toml found in $project (skipping)" 44 + continue 45 + fi 46 + 47 + if ! grep -q "sqlx" "$project_path/Cargo.toml"; then 48 + echo "โš ๏ธ Project $project doesn't appear to use SQLx (skipping)" 49 + continue 50 + fi 51 + 52 + echo "๐Ÿ“ฆ Copying .sqlx files to $project..." 53 + 54 + # Remove existing .sqlx directory if it exists 55 + if [ -d "$target_sqlx" ]; then 56 + rm -rf "$target_sqlx" 57 + fi 58 + 59 + # Copy the .sqlx directory 60 + cp -r "$SQLX_SOURCE" "$target_sqlx" 61 + 62 + echo " โœ… Copied $(ls -1 "$target_sqlx" | wc -l) query files" 63 + done 64 + 65 + echo "โœ… SQLx offline setup complete!" 66 + echo "" 67 + echo "Note: If you add new SQL queries or modify existing ones, you'll need to:" 68 + echo "1. Run 'cargo sqlx prepare' from the services directory" 69 + echo "2. Run this script again to update all project copies"
+62
scripts/update-lexicons.sh
···
··· 1 + #!/bin/bash 2 + # scripts/update-lexicons.sh 3 + # Update script for ATProto lexicons from upstream 4 + 5 + set -e 6 + 7 + echo "Updating ATProto lexicons..." 8 + 9 + # Check if we're in the right directory 10 + if [ ! -f "package.json" ] || [ ! -d "vendor/atproto" ]; then 11 + echo "Error: This script must be run from the project root directory" 12 + echo "Make sure vendor/atproto submodule exists" 13 + exit 1 14 + fi 15 + 16 + # Save current directory 17 + PROJECT_ROOT=$(pwd) 18 + 19 + # Update the submodule 20 + echo "Fetching latest changes from atproto repository..." 21 + cd vendor/atproto 22 + 23 + # Fetch latest changes 24 + git fetch origin 25 + 26 + # Get current commit 27 + CURRENT_COMMIT=$(git rev-parse HEAD) 28 + CURRENT_SHORT=$(git rev-parse --short HEAD) 29 + 30 + # Get latest commit on main 31 + LATEST_COMMIT=$(git rev-parse origin/main) 32 + LATEST_SHORT=$(git rev-parse --short origin/main) 33 + 34 + if [ "$CURRENT_COMMIT" = "$LATEST_COMMIT" ]; then 35 + echo "Already up to date (${CURRENT_SHORT})" 36 + cd "$PROJECT_ROOT" 37 + exit 0 38 + fi 39 + 40 + echo "Updating from ${CURRENT_SHORT} to ${LATEST_SHORT}..." 41 + 42 + # Pull latest changes 43 + git pull origin main 44 + 45 + # Go back to project root 46 + cd "$PROJECT_ROOT" 47 + 48 + # Stage the submodule update 49 + git add vendor/atproto 50 + 51 + # Show what changed 52 + echo "" 53 + echo "Submodule updated successfully!" 54 + echo "Changes:" 55 + git diff --cached --submodule=log vendor/atproto 56 + 57 + echo "" 58 + echo "To complete the update, commit the changes:" 59 + echo " git commit -m \"Update atproto lexicons to ${LATEST_SHORT}\"" 60 + echo "" 61 + echo "Or to see what lexicon files changed:" 62 + echo " cd vendor/atproto && git log --oneline ${CURRENT_SHORT}..${LATEST_SHORT} -- lexicons/"
-6
services/.sqlx/.sqlxrc
··· 1 - [database] 2 - url = "postgres://localhost/teal" 3 - migrations = "./migrations" 4 - 5 - [compile_time_verification] 6 - offline = false
···
+96 -366
services/Cargo.lock
··· 60 ] 61 62 [[package]] 63 - name = "anstream" 64 - version = "0.6.19" 65 - source = "registry+https://github.com/rust-lang/crates.io-index" 66 - checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" 67 - dependencies = [ 68 - "anstyle", 69 - "anstyle-parse", 70 - "anstyle-query", 71 - "anstyle-wincon", 72 - "colorchoice", 73 - "is_terminal_polyfill", 74 - "utf8parse", 75 - ] 76 - 77 - [[package]] 78 - name = "anstyle" 79 - version = "1.0.11" 80 - source = "registry+https://github.com/rust-lang/crates.io-index" 81 - checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" 82 - 83 - [[package]] 84 - name = "anstyle-parse" 85 - version = "0.2.7" 86 - source = "registry+https://github.com/rust-lang/crates.io-index" 87 - checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" 88 - dependencies = [ 89 - "utf8parse", 90 - ] 91 - 92 - [[package]] 93 - name = "anstyle-query" 94 - version = "1.1.3" 95 - source = "registry+https://github.com/rust-lang/crates.io-index" 96 - checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" 97 - dependencies = [ 98 - "windows-sys 0.59.0", 99 - ] 100 - 101 - [[package]] 102 - name = "anstyle-wincon" 103 - version = "3.0.9" 104 - source = "registry+https://github.com/rust-lang/crates.io-index" 105 - checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" 106 - dependencies = [ 107 - "anstyle", 108 - "once_cell_polyfill", 109 - "windows-sys 0.59.0", 110 - ] 111 - 112 - [[package]] 113 name = "anyhow" 114 version = "1.0.98" 115 source = "registry+https://github.com/rust-lang/crates.io-index" 116 checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" 117 118 [[package]] 119 - name = "aqua" 120 - version = "0.1.0" 121 - dependencies = [ 122 - "anyhow", 123 - "async-trait", 124 - "atrium-api", 125 - "axum", 126 - "base64", 127 - "chrono", 128 - "clap", 129 - "dotenvy", 130 - "iroh-car", 131 - "redis", 132 - "reqwest", 133 - "serde", 134 - "serde_json", 135 - "sqlx", 136 - "sys-info", 137 - "time", 138 - "tokio", 139 - "tower-http", 140 - "tracing", 141 - "tracing-subscriber", 142 - "types", 143 - "url", 144 - "uuid", 145 - "vergen", 146 - "vergen-gitcl", 147 - ] 148 - 149 - [[package]] 150 name = "arc-swap" 151 version = "1.7.1" 152 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 187 ] 188 189 [[package]] 190 name = "atoi" 191 version = "2.0.0" 192 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 287 checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" 288 dependencies = [ 289 "axum-core", 290 - "axum-macros", 291 "bytes", 292 "form_urlencoded", 293 "futures-util", ··· 300 "matchit", 301 "memchr", 302 "mime", 303 - "multer", 304 "percent-encoding", 305 "pin-project-lite", 306 "rustversion", ··· 337 ] 338 339 [[package]] 340 - name = "axum-macros" 341 - version = "0.5.0" 342 - source = "registry+https://github.com/rust-lang/crates.io-index" 343 - checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" 344 - dependencies = [ 345 - "proc-macro2", 346 - "quote", 347 - "syn 2.0.104", 348 - ] 349 - 350 - [[package]] 351 name = "backtrace" 352 version = "0.3.75" 353 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 504 version = "1.10.1" 505 source = "registry+https://github.com/rust-lang/crates.io-index" 506 checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 507 508 [[package]] 509 name = "cadet" ··· 511 dependencies = [ 512 "anyhow", 513 "async-trait", 514 "atrium-api", 515 "base64", 516 "chrono", 517 "cid 0.11.1", 518 "dotenvy", 519 "flume", 520 - "iroh-car", 521 "libipld", 522 "metrics 0.23.1", 523 "metrics-exporter-prometheus", ··· 528 "reqwest", 529 "rocketman", 530 "serde", 531 "serde_json", 532 "sqlx", 533 "time", ··· 541 ] 542 543 [[package]] 544 - name = "camino" 545 - version = "1.1.10" 546 - source = "registry+https://github.com/rust-lang/crates.io-index" 547 - checksum = "0da45bc31171d8d6960122e222a67740df867c1dd53b4d51caa297084c185cab" 548 - dependencies = [ 549 - "serde", 550 - ] 551 - 552 - [[package]] 553 - name = "cargo-platform" 554 - version = "0.1.9" 555 - source = "registry+https://github.com/rust-lang/crates.io-index" 556 - checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea" 557 - dependencies = [ 558 - "serde", 559 - ] 560 - 561 - [[package]] 562 - name = "cargo_metadata" 563 - version = "0.19.2" 564 - source = "registry+https://github.com/rust-lang/crates.io-index" 565 - checksum = "dd5eb614ed4c27c5d706420e4320fbe3216ab31fa1c33cd8246ac36dae4479ba" 566 - dependencies = [ 567 - "camino", 568 - "cargo-platform", 569 - "semver", 570 - "serde", 571 - "serde_json", 572 - "thiserror 2.0.12", 573 - ] 574 - 575 - [[package]] 576 name = "cbor4ii" 577 version = "0.2.14" 578 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 661 ] 662 663 [[package]] 664 - name = "clap" 665 - version = "4.5.41" 666 - source = "registry+https://github.com/rust-lang/crates.io-index" 667 - checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" 668 - dependencies = [ 669 - "clap_builder", 670 - "clap_derive", 671 - ] 672 - 673 - [[package]] 674 - name = "clap_builder" 675 - version = "4.5.41" 676 - source = "registry+https://github.com/rust-lang/crates.io-index" 677 - checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" 678 - dependencies = [ 679 - "anstream", 680 - "anstyle", 681 - "clap_lex", 682 - "strsim", 683 - ] 684 - 685 - [[package]] 686 - name = "clap_derive" 687 - version = "4.5.41" 688 - source = "registry+https://github.com/rust-lang/crates.io-index" 689 - checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" 690 - dependencies = [ 691 - "heck", 692 - "proc-macro2", 693 - "quote", 694 - "syn 2.0.104", 695 - ] 696 - 697 - [[package]] 698 - name = "clap_lex" 699 - version = "0.7.5" 700 - source = "registry+https://github.com/rust-lang/crates.io-index" 701 - checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" 702 - 703 - [[package]] 704 name = "cmake" 705 version = "0.1.54" 706 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 708 dependencies = [ 709 "cc", 710 ] 711 - 712 - [[package]] 713 - name = "colorchoice" 714 - version = "1.0.4" 715 - source = "registry+https://github.com/rust-lang/crates.io-index" 716 - checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" 717 718 [[package]] 719 name = "combine" ··· 1296 "libc", 1297 "log", 1298 "rustversion", 1299 - "windows 0.61.3", 1300 ] 1301 1302 [[package]] ··· 1568 "js-sys", 1569 "log", 1570 "wasm-bindgen", 1571 - "windows-core 0.61.2", 1572 ] 1573 1574 [[package]] ··· 1757 ] 1758 1759 [[package]] 1760 - name = "is_terminal_polyfill" 1761 - version = "1.70.1" 1762 source = "registry+https://github.com/rust-lang/crates.io-index" 1763 - checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 1764 1765 [[package]] 1766 name = "itertools" ··· 2150 ] 2151 2152 [[package]] 2153 - name = "multer" 2154 - version = "3.1.0" 2155 - source = "registry+https://github.com/rust-lang/crates.io-index" 2156 - checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b" 2157 - dependencies = [ 2158 - "bytes", 2159 - "encoding_rs", 2160 - "futures-util", 2161 - "http", 2162 - "httparse", 2163 - "memchr", 2164 - "mime", 2165 - "spin", 2166 - "version_check", 2167 - ] 2168 - 2169 - [[package]] 2170 name = "multibase" 2171 version = "0.9.1" 2172 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2300 ] 2301 2302 [[package]] 2303 - name = "ntapi" 2304 - version = "0.4.1" 2305 - source = "registry+https://github.com/rust-lang/crates.io-index" 2306 - checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" 2307 - dependencies = [ 2308 - "winapi", 2309 - ] 2310 - 2311 - [[package]] 2312 name = "nu-ansi-term" 2313 version = "0.46.0" 2314 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2383 ] 2384 2385 [[package]] 2386 - name = "num_threads" 2387 - version = "0.1.7" 2388 - source = "registry+https://github.com/rust-lang/crates.io-index" 2389 - checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" 2390 - dependencies = [ 2391 - "libc", 2392 - ] 2393 - 2394 - [[package]] 2395 - name = "objc2-core-foundation" 2396 - version = "0.3.1" 2397 - source = "registry+https://github.com/rust-lang/crates.io-index" 2398 - checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166" 2399 - dependencies = [ 2400 - "bitflags 2.9.1", 2401 - ] 2402 - 2403 - [[package]] 2404 name = "object" 2405 version = "0.36.7" 2406 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2414 version = "1.21.3" 2415 source = "registry+https://github.com/rust-lang/crates.io-index" 2416 checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 2417 - 2418 - [[package]] 2419 - name = "once_cell_polyfill" 2420 - version = "1.70.1" 2421 - source = "registry+https://github.com/rust-lang/crates.io-index" 2422 - checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 2423 2424 [[package]] 2425 name = "openssl" ··· 3019 dependencies = [ 3020 "aws-lc-rs", 3021 "once_cell", 3022 "rustls-pki-types", 3023 "rustls-webpki", 3024 "subtle", ··· 3150 version = "1.0.26" 3151 source = "registry+https://github.com/rust-lang/crates.io-index" 3152 checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" 3153 - dependencies = [ 3154 - "serde", 3155 - ] 3156 3157 [[package]] 3158 name = "serde" ··· 3209 ] 3210 3211 [[package]] 3212 name = "serde_json" 3213 version = "1.0.141" 3214 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3296 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 3297 3298 [[package]] 3299 name = "signature" 3300 version = "2.2.0" 3301 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3402 "memchr", 3403 "once_cell", 3404 "percent-encoding", 3405 "serde", 3406 "serde_json", 3407 "sha2", ··· 3413 "tracing", 3414 "url", 3415 "uuid", 3416 ] 3417 3418 [[package]] ··· 3662 ] 3663 3664 [[package]] 3665 - name = "sys-info" 3666 - version = "0.9.1" 3667 - source = "registry+https://github.com/rust-lang/crates.io-index" 3668 - checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c" 3669 - dependencies = [ 3670 - "cc", 3671 - "libc", 3672 - ] 3673 - 3674 - [[package]] 3675 - name = "sysinfo" 3676 - version = "0.34.2" 3677 - source = "registry+https://github.com/rust-lang/crates.io-index" 3678 - checksum = "a4b93974b3d3aeaa036504b8eefd4c039dced109171c1ae973f1dc63b2c7e4b2" 3679 - dependencies = [ 3680 - "libc", 3681 - "memchr", 3682 - "ntapi", 3683 - "objc2-core-foundation", 3684 - "windows 0.57.0", 3685 - ] 3686 - 3687 - [[package]] 3688 name = "system-configuration" 3689 version = "0.6.1" 3690 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3781 dependencies = [ 3782 "deranged", 3783 "itoa", 3784 - "libc", 3785 "num-conv", 3786 - "num_threads", 3787 "powerfmt", 3788 "serde", 3789 "time-core", ··· 3842 "io-uring", 3843 "libc", 3844 "mio", 3845 "pin-project-lite", 3846 "slab", 3847 "socket2 0.5.10", 3848 "tokio-macros", ··· 4133 "serde_ipld_dagcbor", 4134 "serde_json", 4135 "thiserror 2.0.12", 4136 - "uuid", 4137 ] 4138 4139 [[package]] ··· 4211 checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" 4212 4213 [[package]] 4214 - name = "utf8parse" 4215 - version = "0.2.2" 4216 - source = "registry+https://github.com/rust-lang/crates.io-index" 4217 - checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 4218 - 4219 - [[package]] 4220 name = "uuid" 4221 version = "1.17.0" 4222 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4241 checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 4242 4243 [[package]] 4244 - name = "vergen" 4245 - version = "9.0.6" 4246 - source = "registry+https://github.com/rust-lang/crates.io-index" 4247 - checksum = "6b2bf58be11fc9414104c6d3a2e464163db5ef74b12296bda593cac37b6e4777" 4248 - dependencies = [ 4249 - "anyhow", 4250 - "cargo_metadata", 4251 - "derive_builder", 4252 - "regex", 4253 - "rustc_version", 4254 - "rustversion", 4255 - "sysinfo", 4256 - "time", 4257 - "vergen-lib", 4258 - ] 4259 - 4260 - [[package]] 4261 - name = "vergen-gitcl" 4262 - version = "1.0.8" 4263 - source = "registry+https://github.com/rust-lang/crates.io-index" 4264 - checksum = "b9dfc1de6eb2e08a4ddf152f1b179529638bedc0ea95e6d667c014506377aefe" 4265 - dependencies = [ 4266 - "anyhow", 4267 - "derive_builder", 4268 - "rustversion", 4269 - "time", 4270 - "vergen", 4271 - "vergen-lib", 4272 - ] 4273 - 4274 - [[package]] 4275 - name = "vergen-lib" 4276 - version = "0.1.6" 4277 - source = "registry+https://github.com/rust-lang/crates.io-index" 4278 - checksum = "9b07e6010c0f3e59fcb164e0163834597da68d1f864e2b8ca49f74de01e9c166" 4279 - dependencies = [ 4280 - "anyhow", 4281 - "derive_builder", 4282 - "rustversion", 4283 - ] 4284 - 4285 - [[package]] 4286 name = "version_check" 4287 version = "0.9.5" 4288 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4410 ] 4411 4412 [[package]] 4413 name = "which" 4414 version = "4.4.2" 4415 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4455 4456 [[package]] 4457 name = "windows" 4458 - version = "0.57.0" 4459 - source = "registry+https://github.com/rust-lang/crates.io-index" 4460 - checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" 4461 - dependencies = [ 4462 - "windows-core 0.57.0", 4463 - "windows-targets 0.52.6", 4464 - ] 4465 - 4466 - [[package]] 4467 - name = "windows" 4468 version = "0.61.3" 4469 source = "registry+https://github.com/rust-lang/crates.io-index" 4470 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" 4471 dependencies = [ 4472 "windows-collections", 4473 - "windows-core 0.61.2", 4474 "windows-future", 4475 "windows-link", 4476 "windows-numerics", ··· 4482 source = "registry+https://github.com/rust-lang/crates.io-index" 4483 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" 4484 dependencies = [ 4485 - "windows-core 0.61.2", 4486 - ] 4487 - 4488 - [[package]] 4489 - name = "windows-core" 4490 - version = "0.57.0" 4491 - source = "registry+https://github.com/rust-lang/crates.io-index" 4492 - checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" 4493 - dependencies = [ 4494 - "windows-implement 0.57.0", 4495 - "windows-interface 0.57.0", 4496 - "windows-result 0.1.2", 4497 - "windows-targets 0.52.6", 4498 ] 4499 4500 [[package]] ··· 4503 source = "registry+https://github.com/rust-lang/crates.io-index" 4504 checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" 4505 dependencies = [ 4506 - "windows-implement 0.60.0", 4507 - "windows-interface 0.59.1", 4508 "windows-link", 4509 - "windows-result 0.3.4", 4510 "windows-strings", 4511 ] 4512 ··· 4516 source = "registry+https://github.com/rust-lang/crates.io-index" 4517 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" 4518 dependencies = [ 4519 - "windows-core 0.61.2", 4520 "windows-link", 4521 "windows-threading", 4522 ] 4523 4524 [[package]] 4525 name = "windows-implement" 4526 - version = "0.57.0" 4527 - source = "registry+https://github.com/rust-lang/crates.io-index" 4528 - checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" 4529 - dependencies = [ 4530 - "proc-macro2", 4531 - "quote", 4532 - "syn 2.0.104", 4533 - ] 4534 - 4535 - [[package]] 4536 - name = "windows-implement" 4537 version = "0.60.0" 4538 source = "registry+https://github.com/rust-lang/crates.io-index" 4539 checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" ··· 4545 4546 [[package]] 4547 name = "windows-interface" 4548 - version = "0.57.0" 4549 - source = "registry+https://github.com/rust-lang/crates.io-index" 4550 - checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" 4551 - dependencies = [ 4552 - "proc-macro2", 4553 - "quote", 4554 - "syn 2.0.104", 4555 - ] 4556 - 4557 - [[package]] 4558 - name = "windows-interface" 4559 version = "0.59.1" 4560 source = "registry+https://github.com/rust-lang/crates.io-index" 4561 checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" ··· 4577 source = "registry+https://github.com/rust-lang/crates.io-index" 4578 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" 4579 dependencies = [ 4580 - "windows-core 0.61.2", 4581 "windows-link", 4582 ] 4583 ··· 4588 checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" 4589 dependencies = [ 4590 "windows-link", 4591 - "windows-result 0.3.4", 4592 "windows-strings", 4593 - ] 4594 - 4595 - [[package]] 4596 - name = "windows-result" 4597 - version = "0.1.2" 4598 - source = "registry+https://github.com/rust-lang/crates.io-index" 4599 - checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" 4600 - dependencies = [ 4601 - "windows-targets 0.52.6", 4602 ] 4603 4604 [[package]]
··· 60 ] 61 62 [[package]] 63 name = "anyhow" 64 version = "1.0.98" 65 source = "registry+https://github.com/rust-lang/crates.io-index" 66 checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" 67 68 [[package]] 69 name = "arc-swap" 70 version = "1.7.1" 71 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 106 ] 107 108 [[package]] 109 + name = "atmst" 110 + version = "0.0.1" 111 + source = "registry+https://github.com/rust-lang/crates.io-index" 112 + checksum = "aeb2a4631a64a242ae62c3ceb140adfa2a8bdacb1b22a6549db5de2ce3389c1d" 113 + dependencies = [ 114 + "async-trait", 115 + "bytes", 116 + "cid 0.11.1", 117 + "dashmap", 118 + "futures", 119 + "ipld-core", 120 + "iroh-car 0.5.1", 121 + "log", 122 + "multihash 0.19.3", 123 + "serde", 124 + "serde_ipld_dagcbor", 125 + "serde_ipld_dagjson", 126 + "sha2", 127 + "thiserror 1.0.69", 128 + "tokio", 129 + ] 130 + 131 + [[package]] 132 name = "atoi" 133 version = "2.0.0" 134 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 229 checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" 230 dependencies = [ 231 "axum-core", 232 "bytes", 233 "form_urlencoded", 234 "futures-util", ··· 241 "matchit", 242 "memchr", 243 "mime", 244 "percent-encoding", 245 "pin-project-lite", 246 "rustversion", ··· 277 ] 278 279 [[package]] 280 name = "backtrace" 281 version = "0.3.75" 282 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 433 version = "1.10.1" 434 source = "registry+https://github.com/rust-lang/crates.io-index" 435 checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 436 + dependencies = [ 437 + "serde", 438 + ] 439 440 [[package]] 441 name = "cadet" ··· 443 dependencies = [ 444 "anyhow", 445 "async-trait", 446 + "atmst", 447 "atrium-api", 448 "base64", 449 "chrono", 450 "cid 0.11.1", 451 "dotenvy", 452 "flume", 453 + "futures", 454 + "iroh-car 0.4.0", 455 "libipld", 456 "metrics 0.23.1", 457 "metrics-exporter-prometheus", ··· 462 "reqwest", 463 "rocketman", 464 "serde", 465 + "serde_ipld_dagcbor", 466 "serde_json", 467 "sqlx", 468 "time", ··· 476 ] 477 478 [[package]] 479 name = "cbor4ii" 480 version = "0.2.14" 481 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 564 ] 565 566 [[package]] 567 name = "cmake" 568 version = "0.1.54" 569 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 571 dependencies = [ 572 "cc", 573 ] 574 575 [[package]] 576 name = "combine" ··· 1153 "libc", 1154 "log", 1155 "rustversion", 1156 + "windows", 1157 ] 1158 1159 [[package]] ··· 1425 "js-sys", 1426 "log", 1427 "wasm-bindgen", 1428 + "windows-core", 1429 ] 1430 1431 [[package]] ··· 1614 ] 1615 1616 [[package]] 1617 + name = "iroh-car" 1618 + version = "0.5.1" 1619 source = "registry+https://github.com/rust-lang/crates.io-index" 1620 + checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 1621 + dependencies = [ 1622 + "anyhow", 1623 + "cid 0.11.1", 1624 + "futures", 1625 + "serde", 1626 + "serde_ipld_dagcbor", 1627 + "thiserror 1.0.69", 1628 + "tokio", 1629 + "unsigned-varint 0.7.2", 1630 + ] 1631 1632 [[package]] 1633 name = "itertools" ··· 2017 ] 2018 2019 [[package]] 2020 name = "multibase" 2021 version = "0.9.1" 2022 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2150 ] 2151 2152 [[package]] 2153 name = "nu-ansi-term" 2154 version = "0.46.0" 2155 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2224 ] 2225 2226 [[package]] 2227 name = "object" 2228 version = "0.36.7" 2229 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2237 version = "1.21.3" 2238 source = "registry+https://github.com/rust-lang/crates.io-index" 2239 checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 2240 2241 [[package]] 2242 name = "openssl" ··· 2836 dependencies = [ 2837 "aws-lc-rs", 2838 "once_cell", 2839 + "ring", 2840 "rustls-pki-types", 2841 "rustls-webpki", 2842 "subtle", ··· 2968 version = "1.0.26" 2969 source = "registry+https://github.com/rust-lang/crates.io-index" 2970 checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" 2971 2972 [[package]] 2973 name = "serde" ··· 3024 ] 3025 3026 [[package]] 3027 + name = "serde_ipld_dagjson" 3028 + version = "0.2.0" 3029 + source = "registry+https://github.com/rust-lang/crates.io-index" 3030 + checksum = "3359b47ba7f4a306ef5984665e10539e212e97217afa489437d533208eecda36" 3031 + dependencies = [ 3032 + "ipld-core", 3033 + "serde", 3034 + "serde_json", 3035 + ] 3036 + 3037 + [[package]] 3038 name = "serde_json" 3039 version = "1.0.141" 3040 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3122 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 3123 3124 [[package]] 3125 + name = "signal-hook-registry" 3126 + version = "1.4.5" 3127 + source = "registry+https://github.com/rust-lang/crates.io-index" 3128 + checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" 3129 + dependencies = [ 3130 + "libc", 3131 + ] 3132 + 3133 + [[package]] 3134 name = "signature" 3135 version = "2.2.0" 3136 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3237 "memchr", 3238 "once_cell", 3239 "percent-encoding", 3240 + "rustls", 3241 "serde", 3242 "serde_json", 3243 "sha2", ··· 3249 "tracing", 3250 "url", 3251 "uuid", 3252 + "webpki-roots 0.26.11", 3253 ] 3254 3255 [[package]] ··· 3499 ] 3500 3501 [[package]] 3502 name = "system-configuration" 3503 version = "0.6.1" 3504 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3595 dependencies = [ 3596 "deranged", 3597 "itoa", 3598 "num-conv", 3599 "powerfmt", 3600 "serde", 3601 "time-core", ··· 3654 "io-uring", 3655 "libc", 3656 "mio", 3657 + "parking_lot", 3658 "pin-project-lite", 3659 + "signal-hook-registry", 3660 "slab", 3661 "socket2 0.5.10", 3662 "tokio-macros", ··· 3947 "serde_ipld_dagcbor", 3948 "serde_json", 3949 "thiserror 2.0.12", 3950 ] 3951 3952 [[package]] ··· 4024 checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" 4025 4026 [[package]] 4027 name = "uuid" 4028 version = "1.17.0" 4029 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4048 checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 4049 4050 [[package]] 4051 name = "version_check" 4052 version = "0.9.5" 4053 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4175 ] 4176 4177 [[package]] 4178 + name = "webpki-roots" 4179 + version = "0.26.11" 4180 + source = "registry+https://github.com/rust-lang/crates.io-index" 4181 + checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" 4182 + dependencies = [ 4183 + "webpki-roots 1.0.2", 4184 + ] 4185 + 4186 + [[package]] 4187 + name = "webpki-roots" 4188 + version = "1.0.2" 4189 + source = "registry+https://github.com/rust-lang/crates.io-index" 4190 + checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" 4191 + dependencies = [ 4192 + "rustls-pki-types", 4193 + ] 4194 + 4195 + [[package]] 4196 name = "which" 4197 version = "4.4.2" 4198 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4238 4239 [[package]] 4240 name = "windows" 4241 version = "0.61.3" 4242 source = "registry+https://github.com/rust-lang/crates.io-index" 4243 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" 4244 dependencies = [ 4245 "windows-collections", 4246 + "windows-core", 4247 "windows-future", 4248 "windows-link", 4249 "windows-numerics", ··· 4255 source = "registry+https://github.com/rust-lang/crates.io-index" 4256 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" 4257 dependencies = [ 4258 + "windows-core", 4259 ] 4260 4261 [[package]] ··· 4264 source = "registry+https://github.com/rust-lang/crates.io-index" 4265 checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" 4266 dependencies = [ 4267 + "windows-implement", 4268 + "windows-interface", 4269 "windows-link", 4270 + "windows-result", 4271 "windows-strings", 4272 ] 4273 ··· 4277 source = "registry+https://github.com/rust-lang/crates.io-index" 4278 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" 4279 dependencies = [ 4280 + "windows-core", 4281 "windows-link", 4282 "windows-threading", 4283 ] 4284 4285 [[package]] 4286 name = "windows-implement" 4287 version = "0.60.0" 4288 source = "registry+https://github.com/rust-lang/crates.io-index" 4289 checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" ··· 4295 4296 [[package]] 4297 name = "windows-interface" 4298 version = "0.59.1" 4299 source = "registry+https://github.com/rust-lang/crates.io-index" 4300 checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" ··· 4316 source = "registry+https://github.com/rust-lang/crates.io-index" 4317 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" 4318 dependencies = [ 4319 + "windows-core", 4320 "windows-link", 4321 ] 4322 ··· 4327 checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" 4328 dependencies = [ 4329 "windows-link", 4330 + "windows-result", 4331 "windows-strings", 4332 ] 4333 4334 [[package]]
+5 -4
services/Cargo.toml
··· 1 [workspace] 2 - members = ["aqua", "cadet", "rocketman", "satellite", "types"] 3 resolver = "2" 4 5 [workspace.dependencies] ··· 12 "postgres", 13 "uuid", 14 "chrono", 15 ] } 16 serde = { version = "1.0", features = ["derive"] } 17 anyhow = "1.0" ··· 19 tracing = "0.1" 20 tracing-subscriber = "0.3" 21 metrics = "0.23" 22 - reqwest = { version = "0.12", features = ["json"] } 23 url = "2.5" 24 rand = "0.8" 25 flume = "0.11" 26 async-trait = "0.1" 27 time = "0.3" 28 dotenvy = "0.15" 29 - tokio-tungstenite = "0.24" 30 atrium-api = "0.25" 31 chrono = { version = "0.4", features = ["serde"] } 32 uuid = { version = "1.0", features = ["v4", "serde"] } 33 types = { path = "types" } 34 - rocketman = { path = "rocketman" } 35 36 # CAR and IPLD dependencies 37 iroh-car = "0.4"
··· 1 [workspace] 2 + members = ["cadet", "satellite", "types"] 3 resolver = "2" 4 5 [workspace.dependencies] ··· 12 "postgres", 13 "uuid", 14 "chrono", 15 + "tls-rustls", 16 ] } 17 serde = { version = "1.0", features = ["derive"] } 18 anyhow = "1.0" ··· 20 tracing = "0.1" 21 tracing-subscriber = "0.3" 22 metrics = "0.23" 23 + reqwest.workspace = true 24 url = "2.5" 25 rand = "0.8" 26 flume = "0.11" 27 async-trait = "0.1" 28 time = "0.3" 29 dotenvy = "0.15" 30 + tokio-tungstenite.workspace = true 31 atrium-api = "0.25" 32 chrono = { version = "0.4", features = ["serde"] } 33 uuid = { version = "1.0", features = ["v4", "serde"] } 34 types = { path = "types" } 35 + rocketman = "0.2.5" 36 37 # CAR and IPLD dependencies 38 iroh-car = "0.4"
+20
services/Cross.toml
···
··· 1 + [build.env] 2 + passthrough = [ 3 + "CARGO_HOME", 4 + "CARGO_TARGET_DIR", 5 + "SQLX_OFFLINE", 6 + "PKG_CONFIG_ALLOW_CROSS", 7 + ] 8 + 9 + [target.aarch64-unknown-linux-gnu] 10 + image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main" 11 + 12 + [target.aarch64-unknown-linux-gnu.env] 13 + passthrough = ["CARGO_HOME", "CARGO_TARGET_DIR", "SQLX_OFFLINE"] 14 + # Allow cross-compilation of native dependencies 15 + PKG_CONFIG_ALLOW_CROSS = "1" 16 + # Use static linking to reduce runtime dependencies 17 + RUSTFLAGS = "-C target-feature=+crt-static -C link-arg=-s" 18 + # Disable problematic features that might require OpenSSL 19 + CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc" 20 + CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"
+4
services/cadet/Cargo.toml
··· 3 version = "0.1.0" 4 edition = "2021" 5 6 [dependencies] 7 atrium-api.workspace = true 8 tokio.workspace = true ··· 32 libipld.workspace = true 33 cid.workspace = true 34 base64.workspace = true 35 36 # Redis for job queues 37 redis.workspace = true
··· 3 version = "0.1.0" 4 edition = "2021" 5 6 + 7 [dependencies] 8 atrium-api.workspace = true 9 tokio.workspace = true ··· 33 libipld.workspace = true 34 cid.workspace = true 35 base64.workspace = true 36 + atmst = "0.0.1" 37 + serde_ipld_dagcbor = "0.6" 38 + futures = "0.3" 39 40 # Redis for job queues 41 redis.workspace = true
+61 -1
services/cadet/Dockerfile
··· 1 FROM --platform=${BUILDPLATFORM} rust:latest AS buildah 2 3 # Create appuser ··· 15 16 WORKDIR /buildah 17 18 COPY ./ . 19 20 - RUN . ./target.sh && touch src/main.rs && echo "Building for $TARGET_ARCH" && cargo build --release --target $RUST_TARGET && cp target/$RUST_TARGET/release/cadet target/cadet 21 22 FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc 23
··· 1 + # Docker build args for cross-platform builds (must be at the top) 2 + ARG TARGETPLATFORM 3 + ARG BUILDPLATFORM 4 + ARG TARGETARCH 5 + ARG TARGETOS 6 + 7 FROM --platform=${BUILDPLATFORM} rust:latest AS buildah 8 9 # Create appuser ··· 21 22 WORKDIR /buildah 23 24 + # Re-declare ARGs after FROM (Docker requirement) 25 + ARG TARGETPLATFORM 26 + ARG BUILDPLATFORM 27 + ARG TARGETARCH 28 + ARG TARGETOS 29 + 30 + # Debug platform detection before copying files 31 + RUN echo "DEBUG Before copy: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH BUILDPLATFORM=$BUILDPLATFORM" 32 + 33 COPY ./ . 34 35 + # Setup lexicons and install dependencies 36 + RUN ./scripts/setup-lexicons.sh 37 + 38 + # Install Node.js and pnpm for lexicon generation 39 + RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/* 40 + RUN npm install -g pnpm 41 + 42 + # Install dependencies and generate lexicons 43 + RUN pnpm install 44 + RUN cd tools/lexicon-cli && pnpm build 45 + RUN pnpm lex:gen 46 + 47 + # Install cross-compilation toolchains 48 + RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu 49 + 50 + # Enable ARM64 architecture and install cross-compilation tools 51 + RUN dpkg --add-architecture arm64 && \ 52 + apt-get update && \ 53 + apt-get install -y \ 54 + gcc-aarch64-linux-gnu \ 55 + libssl-dev:arm64 \ 56 + libssl-dev \ 57 + pkg-config \ 58 + && rm -rf /var/lib/apt/lists/* 59 + 60 + # Set up cross-compilation environment 61 + ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc 62 + ENV PKG_CONFIG_ALLOW_CROSS=1 63 + ENV PKG_CONFIG_PATH_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu/pkgconfig 64 + ENV OPENSSL_DIR_aarch64_unknown_linux_gnu=/usr 65 + ENV OPENSSL_LIB_DIR_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu 66 + ENV OPENSSL_INCLUDE_DIR_aarch64_unknown_linux_gnu=/usr/include/openssl 67 + 68 + # Force SQLx to use offline mode with workspace cache 69 + ENV SQLX_OFFLINE=true 70 + 71 + # copy sqlx in 72 + COPY ./.sqlx ./services/cadet/.sqlx 73 + 74 + # Debug platform detection and run build 75 + RUN echo "DEBUG Before target.sh: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH" && \ 76 + . ./target.sh && \ 77 + touch services/cadet/src/main.rs && \ 78 + echo "Building for $TARGET_ARCH" && \ 79 + cargo build --release --target $RUST_TARGET --package cadet && \ 80 + cp target/$RUST_TARGET/release/cadet target/cadet 81 82 FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc 83
+240
services/cadet/src/ingestors/car/README.md
···
··· 1 + # CAR Import System with `atmst` 2 + 3 + This directory contains the implementation of Teal's CAR (Content Addressable aRchive) import functionality, now powered by the `atmst` library for proper AT Protocol-style Merkle Search Tree handling. 4 + 5 + ## Overview 6 + 7 + The CAR import system allows Teal to ingest historical music listening data from AT Protocol repositories. Previously, this was done with manual IPLD parsing, but we've now migrated to use the specialized `atmst` library for more accurate and robust CAR file processing. 8 + 9 + ## Key Components 10 + 11 + ### `CarImportIngestor` 12 + 13 + The main entry point for CAR file processing. This ingestor: 14 + 15 + 1. **Accepts CAR data** via the `LexiconIngestor` interface (base64 or URL) 16 + 2. **Uses `atmst::CarImporter`** to parse CAR files with proper MST handling 17 + 3. **Converts to MST structure** for tree traversal and record extraction 18 + 4. **Delegates to existing ingestors** for Teal record types (play, profile, status) 19 + 20 + ### Migration from `iroh-car` to `atmst` 21 + 22 + **Previous Implementation:** 23 + - Used `iroh-car` for basic CAR parsing 24 + - Manual IPLD block decoding with `libipld` 25 + - Complex two-pass processing to extract rkey mappings from commit operations 26 + - Error-prone MST parsing that could miss records 27 + 28 + **New Implementation:** 29 + - Uses `atmst::CarImporter` for specialized AT Protocol CAR handling 30 + - Built-in MST structure understanding 31 + - Proper tree traversal with guaranteed rkey extraction 32 + - More reliable and maintainable code 33 + 34 + ## Usage 35 + 36 + ### As a LexiconIngestor 37 + 38 + The CAR importer integrates seamlessly with Teal's existing ingestion pipeline: 39 + 40 + ```rust 41 + // CAR data in a record 42 + { 43 + "$type": "com.teal.car.import", 44 + "carData": "base64-encoded-car-file-here" 45 + } 46 + 47 + // Or as a URL reference 48 + { 49 + "$type": "com.teal.car.import", 50 + "carData": { 51 + "url": "https://example.com/repo.car" 52 + } 53 + } 54 + ``` 55 + 56 + ### Direct Import 57 + 58 + ```rust 59 + let ingestor = CarImportIngestor::new(db_pool); 60 + 61 + // Import from bytes 62 + let import_id = ingestor.import_car_bytes(&car_data, "did:plc:example").await?; 63 + 64 + // Import from PDS 65 + let import_id = ingestor.fetch_and_process_identity_car("user.bsky.social").await?; 66 + ``` 67 + 68 + ## Supported Record Types 69 + 70 + The CAR importer automatically detects and processes these Teal record types: 71 + 72 + - **`fm.teal.alpha.feed.play`** - Music play records 73 + - **`fm.teal.alpha.profile`** - User profile data 74 + - **`fm.teal.alpha.status`** - User status updates 75 + 76 + Records are processed using the same logic as real-time Jetstream ingestion, ensuring data consistency. 77 + 78 + ## Architecture 79 + 80 + ### MST Processing Flow 81 + 82 + 1. **CAR Import**: `atmst::CarImporter` loads and validates the CAR file 83 + 2. **MST Conversion**: CAR data is converted to an `atmst::Mst` structure 84 + 3. **Tree Traversal**: MST is traversed depth-first to find all records 85 + 4. **Record Extraction**: Each MST entry is examined for Teal record types 86 + 5. **Delegation**: Valid records are passed to existing Teal ingestors 87 + 88 + ### Key Benefits 89 + 90 + - **Proper rkey handling**: MST structure ensures correct record key extraction 91 + - **AT Protocol compliance**: Uses specialized library designed for AT Protocol 92 + - **Maintainable code**: Eliminates complex manual MST parsing 93 + - **Better error handling**: More robust than previous implementation 94 + 95 + ## Current Status 96 + 97 + ### โœ… Completed 98 + - Basic `atmst` integration 99 + - MST structure setup and conversion 100 + - Record type detection and routing 101 + - Integration with existing Teal ingestors 102 + - Error handling and logging 103 + 104 + ### ๐Ÿšง In Progress 105 + - **Block data access**: Full implementation of record data extraction from MST 106 + - **MST traversal**: Complete iteration through MST entries 107 + - **Testing**: Comprehensive test suite with real CAR files 108 + 109 + ### ๐Ÿ“‹ TODO 110 + - Complete `get_record_from_mst()` implementation 111 + - Add MST entry iteration logic 112 + - Performance optimization for large CAR files 113 + - Comprehensive integration tests 114 + 115 + ## Implementation Notes 116 + 117 + ### Block Data Access 118 + 119 + The current implementation has a placeholder for accessing actual record data from the MST: 120 + 121 + ```rust 122 + fn get_record_from_mst(&self, cid: &atmst::Cid, mst: &Mst) -> Option<Value> { 123 + // TODO: Implement proper block data access using atmst API 124 + // This requires understanding how to extract IPLD data for a given CID 125 + // from the MST's internal block storage 126 + None 127 + } 128 + ``` 129 + 130 + This is the key missing piece that needs to be completed based on `atmst` library documentation. 131 + 132 + ### MST Traversal 133 + 134 + Similarly, the MST traversal logic needs completion: 135 + 136 + ```rust 137 + // TODO: Implement proper MST iteration 138 + // for (cid, node) in mst.iter() { 139 + // // Process MST entries 140 + // } 141 + ``` 142 + 143 + ### Error Handling 144 + 145 + The system is designed to be resilient: 146 + - Invalid records are logged and skipped 147 + - Network errors during PDS fetching are properly reported 148 + - Database errors are propagated with context 149 + 150 + ## Testing 151 + 152 + ### Test Structure 153 + 154 + ```bash 155 + # Unit tests (no database required) 156 + cargo test test_parse_teal_key 157 + cargo test test_is_teal_record_key 158 + 159 + # Integration tests (requires database) 160 + cargo test test_atmst_car_import --ignored 161 + 162 + # CLI testing 163 + cd tools/teal-cli 164 + cargo run -- car analyze path/to/file.car 165 + ``` 166 + 167 + ### Test Data 168 + 169 + Test CAR files should be placed in `services/cadet/` for integration testing: 170 + - `test.car` - Basic test file with Teal records 171 + - `large.car` - Performance testing file 172 + - `empty.car` - Edge case testing 173 + 174 + ## Dependencies 175 + 176 + ### Key Dependencies 177 + - **`atmst`**: AT Protocol MST library (v0.0.1) 178 + - **`serde_json`**: JSON serialization for record processing 179 + - **`anyhow`**: Error handling 180 + - **`uuid`**: Import ID generation 181 + - **`reqwest`**: HTTP client for PDS fetching 182 + 183 + ### Workspace Dependencies 184 + The implementation uses existing Teal workspace dependencies for database access, logging, and record processing. 185 + 186 + ## Configuration 187 + 188 + No additional configuration is required. The CAR importer uses the same database connection and logging setup as other Teal ingestors. 189 + 190 + ## Monitoring 191 + 192 + The CAR importer provides detailed logging: 193 + 194 + - **Info**: Successful imports, record counts, processing progress 195 + - **Warn**: Skipped records, missing data, network issues 196 + - **Error**: Database failures, invalid CAR files, processing errors 197 + 198 + Metrics are integrated with Teal's existing observability stack. 199 + 200 + ## Performance 201 + 202 + ### Optimization Strategies 203 + 204 + 1. **Streaming processing**: Records are processed as they're discovered 205 + 2. **Batch database operations**: Multiple records can be inserted in batches 206 + 3. **Memory management**: Large CAR files are processed without loading entirely into memory 207 + 4. **Parallel processing**: Future enhancement for concurrent record processing 208 + 209 + ### Benchmarks 210 + 211 + Performance testing should be conducted with: 212 + - Small CAR files (< 1MB, ~100 records) 213 + - Medium CAR files (1-50MB, ~10K records) 214 + - Large CAR files (> 50MB, ~100K+ records) 215 + 216 + ## Future Enhancements 217 + 218 + ### Planned Features 219 + - **Incremental imports**: Support for delta/since-based CAR fetching 220 + - **Batch processing**: Queue-based processing for multiple CAR files 221 + - **Validation**: Pre-import validation of CAR file integrity 222 + - **Metrics**: Detailed import statistics and performance monitoring 223 + 224 + ### Integration Opportunities 225 + - **Admin API**: Trigger imports via HTTP API 226 + - **Scheduled imports**: Cron-based periodic imports from known users 227 + - **Real-time sync**: Hybrid approach combining Jetstream + CAR imports 228 + 229 + --- 230 + 231 + ## Contributing 232 + 233 + When working on the CAR import system: 234 + 235 + 1. **Test thoroughly**: Use both unit and integration tests 236 + 2. **Document changes**: Update this README for significant modifications 237 + 3. **Monitor performance**: Large CAR files can impact system performance 238 + 4. **Handle errors gracefully**: Network and parsing errors are expected 239 + 240 + For questions about `atmst` integration or MST processing, refer to the library documentation or consider reaching out to the `atmst` maintainers.
+677 -421
services/cadet/src/ingestors/car/car_import.rs
··· 1 use anyhow::{anyhow, Result}; 2 use async_trait::async_trait; 3 - use base64::{engine::general_purpose, Engine as _}; 4 - use chrono; 5 - use cid::Cid; 6 - use iroh_car::{CarHeader, CarReader}; 7 - use libipld::cbor::DagCborCodec; 8 - use libipld::{Block, Cid as LibipldCid, Ipld}; 9 - use reqwest; 10 use rocketman::{ingestion::LexiconIngestor, types::event::Event}; 11 use serde_json::Value; 12 use sqlx::PgPool; 13 - use std::io::Cursor; 14 use tracing::{info, warn}; 15 - use url; 16 17 pub struct CarImportIngestor { 18 sql: PgPool, 19 } 20 21 impl CarImportIngestor { 22 pub fn new(sql: PgPool) -> Self { 23 Self { sql } 24 } 25 26 - /// Process a CAR file from bytes 27 - async fn process_car_data(&self, car_data: &[u8], import_id: &str) -> Result<()> { 28 - info!("Starting CAR file processing for import {}", import_id); 29 30 - let cursor = Cursor::new(car_data); 31 - let mut reader = CarReader::new(cursor).await?; 32 33 - // Read the header 34 - let header = reader.header(); 35 - info!("CAR header: {} root CIDs", header.roots().len()); 36 37 - // Track import metadata 38 - // self.store_import_metadata(import_id, header).await?; 39 40 - // Process blocks 41 - let mut block_count = 0; 42 - while let Some((cid, block_data)) = reader.next_block().await? { 43 - // Convert iroh-car CID to our CID type for processing 44 - let our_cid: Cid = cid.to_string().parse()?; 45 - self.process_car_block(&our_cid, &block_data, import_id) 46 - .await?; 47 - block_count += 1; 48 49 - if block_count % 100 == 0 { 50 - info!("Processed {} blocks for import {}", block_count, import_id); 51 } 52 } 53 54 info!( 55 - "Completed CAR file processing: {} blocks for import {}", 56 - block_count, import_id 57 ); 58 - // self.mark_import_complete(import_id, block_count).await?; 59 60 Ok(()) 61 } 62 63 - /// Process an individual IPLD block from the CAR file 64 - async fn process_car_block(&self, cid: &Cid, block_data: &[u8], import_id: &str) -> Result<()> { 65 - // Store the raw block first 66 - // self.store_raw_block(cid, block_data, import_id).await?; 67 68 - // Try to decode as IPLD and extract meaningful data 69 - match self.decode_and_extract_data(cid, block_data).await { 70 - Ok(Some(extracted_data)) => { 71 - self.process_extracted_data(&extracted_data, cid, import_id) 72 - .await?; 73 - } 74 - Ok(None) => { 75 - // Block doesn't contain extractable data, just stored raw 76 - } 77 - Err(e) => { 78 - warn!("Failed to decode block {}: {}", cid, e); 79 - // Continue processing other blocks 80 } 81 } 82 83 - Ok(()) 84 } 85 86 - /// Decode IPLD block and extract AT Protocol data if present 87 - async fn decode_and_extract_data( 88 &self, 89 - cid: &Cid, 90 - block_data: &[u8], 91 - ) -> Result<Option<ExtractedData>> { 92 - // Create IPLD block (convert CID types) 93 - let libipld_cid: LibipldCid = cid.to_string().parse()?; 94 - let block: Block<libipld::DefaultParams> = Block::new(libipld_cid, block_data.to_vec())?; 95 - 96 - // Decode to IPLD (try to decode as DAG-CBOR, which is common in AT Protocol) 97 - let ipld: Ipld = match block.decode::<DagCborCodec, Ipld>() { 98 - Ok(ipld) => ipld, 99 - Err(_) => { 100 - // If DAG-CBOR fails, try as raw data 101 - return Ok(None); 102 - } 103 - }; 104 - 105 - // Check if this looks like AT Protocol data 106 - if let Ipld::Map(map) = &ipld { 107 - // Look for AT Protocol patterns 108 - if let Some(collection) = map.get("$type").and_then(|v| { 109 - if let Ipld::String(s) = v { 110 - Some(s.as_str()) 111 - } else { 112 - None 113 } 114 - }) { 115 - return Ok(Some(ExtractedData { 116 - collection: collection.to_string(), 117 - data: ipld, 118 - cid: cid.clone(), 119 - })); 120 } 121 - 122 - // Check for commit structures 123 - if map.contains_key("ops") && map.contains_key("prev") { 124 - return Ok(Some(ExtractedData { 125 - collection: "commit".to_string(), 126 - data: ipld, 127 - cid: cid.clone(), 128 - })); 129 } 130 } 131 - 132 - Ok(None) 133 } 134 135 - /// Process extracted AT Protocol data 136 - async fn process_extracted_data( 137 &self, 138 - data: &ExtractedData, 139 - cid: &Cid, 140 - import_id: &str, 141 ) -> Result<()> { 142 - match data.collection.as_str() { 143 "fm.teal.alpha.feed.play" => { 144 - self.process_play_record(&data.data, cid, import_id).await?; 145 } 146 "fm.teal.alpha.actor.profile" => { 147 - self.process_profile_record(&data.data, cid, import_id) 148 - .await?; 149 } 150 "fm.teal.alpha.actor.status" => { 151 - self.process_status_record(&data.data, cid, import_id) 152 - .await?; 153 - } 154 - "commit" => { 155 - self.process_commit_record(&data.data, cid, import_id) 156 - .await?; 157 } 158 _ => { 159 - info!("Unhandled collection type: {}", data.collection); 160 } 161 } 162 163 - Ok(()) 164 } 165 166 - /// Process a Teal play record from IPLD data 167 - async fn process_play_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> { 168 - // Convert IPLD to JSON value for processing by existing ingestors 169 - let json_value = ipld_to_json(ipld)?; 170 171 - // Delegate to existing play ingestor logic 172 - if let Ok(play_record) = 173 - serde_json::from_value::<types::fm::teal::alpha::feed::play::RecordData>(json_value) 174 { 175 - info!("Importing play record from CAR: {}", play_record.track_name); 176 177 - // Use existing play ingestor for consistency 178 - let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 179 180 - // Create a synthetic AT URI for the imported record 181 - let synthetic_did = format!("car-import:{}", import_id); 182 - let rkey = cid.to_string(); 183 - let uri = super::super::teal::assemble_at_uri( 184 - &synthetic_did, 185 - "fm.teal.alpha.feed.play", 186 - &rkey, 187 - ); 188 189 - // Store using existing logic 190 - play_ingestor 191 - .insert_play(&play_record, &uri, &cid.to_string(), &synthetic_did, &rkey) 192 - .await?; 193 194 - // Track the extracted record 195 - // self.store_extracted_record(import_id, cid, "fm.teal.alpha.feed.play", Some(&uri)).await?; 196 } 197 198 - Ok(()) 199 } 200 201 - /// Process a Teal profile record from IPLD data 202 - async fn process_profile_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> { 203 - let json_value = ipld_to_json(ipld)?; 204 205 - if let Ok(profile_record) = 206 - serde_json::from_value::<types::fm::teal::alpha::actor::profile::RecordData>(json_value) 207 - { 208 - info!( 209 - "Importing profile record from CAR: {:?}", 210 - profile_record.display_name 211 - ); 212 213 - // For now, just log until we have public methods on profile ingestor 214 - info!( 215 - "Would store profile record from CAR import {} with CID {}", 216 - import_id, cid 217 - ); 218 219 - // Track the extracted record 220 - // self.store_extracted_record(import_id, cid, "fm.teal.alpha.actor.profile", None).await?; 221 - } 222 223 - Ok(()) 224 } 225 226 - /// Process a Teal status record from IPLD data 227 - async fn process_status_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> { 228 - let json_value = ipld_to_json(ipld)?; 229 230 - if let Ok(_status_record) = 231 - serde_json::from_value::<types::fm::teal::alpha::actor::status::RecordData>(json_value) 232 - { 233 - info!("Importing status record from CAR"); 234 235 - // For now, just log until we have public methods on status ingestor 236 - info!( 237 - "Would store status record from CAR import {} with CID {}", 238 - import_id, cid 239 - ); 240 241 - // Track the extracted record 242 - // self.store_extracted_record(import_id, cid, "fm.teal.alpha.actor.status", None).await?; 243 } 244 245 - Ok(()) 246 } 247 248 - /// Process a commit record from IPLD data 249 - async fn process_commit_record( 250 - &self, 251 - _ipld: &Ipld, 252 - _cid: &Cid, 253 - _import_id: &str, 254 - ) -> Result<()> { 255 - info!("Processing commit record from CAR import"); 256 257 - // Store commit metadata for tracking 258 - // self.store_commit_metadata(ipld, cid, import_id).await?; 259 260 - Ok(()) 261 } 262 263 - /// Store CAR import metadata 264 - async fn store_import_metadata(&self, _import_id: &str, _header: &CarHeader) -> Result<()> { 265 - // TODO: Implement when database tables are ready 266 - Ok(()) 267 } 268 269 - /// Mark import as complete 270 - async fn mark_import_complete(&self, _import_id: &str, _block_count: i32) -> Result<()> { 271 - // TODO: Implement when database tables are ready 272 Ok(()) 273 } 274 275 - /// Store raw IPLD block 276 - async fn store_raw_block( 277 - &self, 278 - _cid: &Cid, 279 - _block_data: &[u8], 280 - _import_id: &str, 281 - ) -> Result<()> { 282 - // TODO: Implement when database tables are ready 283 - Ok(()) 284 } 285 286 - /// Store commit metadata 287 - async fn store_commit_metadata(&self, _ipld: &Ipld, _cid: &Cid, import_id: &str) -> Result<()> { 288 - info!("Would store commit metadata from CAR import {}", import_id); 289 - Ok(()) 290 } 291 292 - /// Store extracted record tracking 293 - async fn store_extracted_record( 294 - &self, 295 - _import_id: &str, 296 - _cid: &Cid, 297 - _collection: &str, 298 - _record_uri: Option<&str>, 299 - ) -> Result<()> { 300 - // TODO: Implement when database tables are ready 301 - Ok(()) 302 } 303 304 - /// Fetch and process CAR file for a given identity (handle or DID) 305 - pub async fn fetch_and_process_identity_car(&self, identity: &str) -> Result<String> { 306 - info!( 307 - "Starting CAR fetch and processing for identity: {}", 308 - identity 309 - ); 310 311 - // Resolve identity to DID and PDS 312 - let (user_did, pds_host) = self.resolve_user_to_pds(identity).await?; 313 - info!( 314 - "Resolved {} to DID {} on PDS {}", 315 - identity, user_did, pds_host 316 - ); 317 318 - // Fetch CAR file from PDS 319 - let car_data = self.fetch_car_from_pds(&pds_host, &user_did, None).await?; 320 - info!( 321 - "Successfully fetched CAR file for {} ({} bytes)", 322 - user_did, 323 - car_data.len() 324 - ); 325 326 - // Generate import ID 327 - let import_id = format!( 328 - "pds-{}-{}", 329 - user_did.replace(":", "-"), 330 - chrono::Utc::now().timestamp() 331 - ); 332 333 - // Process through existing pipeline 334 - self.process_car_data(&car_data, &import_id).await?; 335 336 - info!("โœ… CAR import completed successfully for {}", identity); 337 - Ok(import_id) 338 - } 339 340 - /// Resolve a user identifier (DID or handle) to their DID and PDS host 341 - async fn resolve_user_to_pds(&self, user_identifier: &str) -> Result<(String, String)> { 342 - if user_identifier.starts_with("did:") { 343 - // User provided a DID directly, resolve to PDS 344 - let pds_host = self.resolve_did_to_pds(user_identifier).await?; 345 - Ok((user_identifier.to_string(), pds_host)) 346 - } else { 347 - // User provided a handle, resolve to DID then PDS 348 - let user_did = self.resolve_handle_to_did(user_identifier).await?; 349 - let pds_host = self.resolve_did_to_pds(&user_did).await?; 350 - Ok((user_did, pds_host)) 351 - } 352 } 353 354 - /// Resolve a handle to a DID using com.atproto.identity.resolveHandle 355 - async fn resolve_handle_to_did(&self, handle: &str) -> Result<String> { 356 - let url = format!( 357 - "https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", 358 - handle 359 ); 360 361 - let response = reqwest::get(&url).await?; 362 - if !response.status().is_success() { 363 - return Err(anyhow!( 364 - "Failed to resolve handle {}: {}", 365 - handle, 366 - response.status() 367 - )); 368 - } 369 370 - let json: serde_json::Value = response.json().await?; 371 - let did = json["did"] 372 - .as_str() 373 - .ok_or_else(|| anyhow!("No DID found in response for handle {}", handle))?; 374 375 - Ok(did.to_string()) 376 - } 377 378 - /// Resolve a DID to their PDS host using DID document 379 - async fn resolve_did_to_pds(&self, did: &str) -> Result<String> { 380 - // For DID:plc, use the PLC directory 381 - if did.starts_with("did:plc:") { 382 - let url = format!("https://plc.directory/{}", did); 383 384 - let response = reqwest::get(&url).await?; 385 - if !response.status().is_success() { 386 - return Err(anyhow!( 387 - "Failed to resolve DID {}: {}", 388 - did, 389 - response.status() 390 - )); 391 - } 392 393 - let doc: serde_json::Value = response.json().await?; 394 395 - // Find the PDS service endpoint 396 - if let Some(services) = doc["service"].as_array() { 397 - for service in services { 398 - if service["id"].as_str() == Some("#atproto_pds") { 399 - if let Some(endpoint) = service["serviceEndpoint"].as_str() { 400 - // Extract hostname from URL 401 - let parsed_url = url::Url::parse(endpoint)?; 402 - let host = parsed_url 403 - .host_str() 404 - .ok_or_else(|| anyhow!("Invalid PDS endpoint URL: {}", endpoint))?; 405 - return Ok(host.to_string()); 406 - } 407 - } 408 - } 409 - } 410 411 - Err(anyhow!("No PDS service found in DID document for {}", did)) 412 } else { 413 - Err(anyhow!("Unsupported DID method: {}", did)) 414 } 415 } 416 417 - /// Fetch CAR file from PDS using com.atproto.sync.getRepo 418 - async fn fetch_car_from_pds( 419 - &self, 420 - pds_host: &str, 421 - did: &str, 422 - since: Option<&str>, 423 - ) -> Result<Vec<u8>> { 424 - let mut url = format!( 425 - "https://{}/xrpc/com.atproto.sync.getRepo?did={}", 426 - pds_host, did 427 - ); 428 - 429 - if let Some(since_rev) = since { 430 - url.push_str(&format!("&since={}", since_rev)); 431 } 432 433 - info!("Fetching CAR file from: {}", url); 434 435 - let response = reqwest::get(&url).await?; 436 - if !response.status().is_success() { 437 - return Err(anyhow!( 438 - "Failed to fetch CAR from PDS {}: {}", 439 - pds_host, 440 - response.status() 441 - )); 442 - } 443 444 - // Verify content type 445 - let content_type = response 446 - .headers() 447 - .get("content-type") 448 - .and_then(|h| h.to_str().ok()) 449 - .unwrap_or(""); 450 451 - if !content_type.contains("application/vnd.ipld.car") { 452 - return Err(anyhow!("Unexpected content type: {}", content_type)); 453 } 454 455 - let car_data = response.bytes().await?; 456 - Ok(car_data.to_vec()) 457 } 458 - } 459 460 - #[async_trait] 461 - impl LexiconIngestor for CarImportIngestor { 462 - async fn ingest(&self, message: Event<Value>) -> Result<()> { 463 - // For CAR imports, we expect the message to contain CAR file data 464 - // This could be a file path, URL, or base64 encoded data 465 466 - if let Some(commit) = &message.commit { 467 - if let Some(record) = &commit.record { 468 - // Check if this is a CAR import request 469 - if let Some(car_data_field) = record.get("carData") { 470 - let import_id = format!("{}:{}", message.did, commit.rkey); 471 472 - match car_data_field { 473 - Value::String(base64_data) => { 474 - // Decode base64 CAR data 475 - if let Ok(car_bytes) = general_purpose::STANDARD.decode(base64_data) { 476 - self.process_car_data(&car_bytes, &import_id).await?; 477 - } else { 478 - return Err(anyhow!("Invalid base64 CAR data")); 479 - } 480 - } 481 - Value::Object(obj) => { 482 - // Handle different CAR data formats (URL, file path, etc.) 483 - if let Some(Value::String(url)) = obj.get("url") { 484 - // Download and process CAR from URL 485 - let car_bytes = self.download_car_file(url).await?; 486 - self.process_car_data(&car_bytes, &import_id).await?; 487 - } 488 - } 489 - _ => { 490 - return Err(anyhow!("Unsupported CAR data format")); 491 - } 492 - } 493 - } else { 494 - return Err(anyhow!("No CAR data found in record")); 495 } 496 } 497 } 498 499 Ok(()) 500 } 501 - } 502 503 - impl CarImportIngestor { 504 - /// Download CAR file from URL 505 - async fn download_car_file(&self, url: &str) -> Result<Vec<u8>> { 506 - let response = reqwest::get(url).await?; 507 - let bytes = response.bytes().await?; 508 - Ok(bytes.to_vec()) 509 - } 510 - } 511 512 - /// Helper struct for extracted AT Protocol data 513 - #[derive(Debug)] 514 - struct ExtractedData { 515 - collection: String, 516 - data: Ipld, 517 - cid: Cid, 518 - } 519 520 - /// Convert IPLD to JSON Value for compatibility with existing ingestors 521 - fn ipld_to_json(ipld: &Ipld) -> Result<Value> { 522 - match ipld { 523 - Ipld::Null => Ok(Value::Null), 524 - Ipld::Bool(b) => Ok(Value::Bool(*b)), 525 - Ipld::Integer(i) => { 526 - // Convert i128 to i64 for JSON compatibility 527 - if let Ok(i64_val) = i64::try_from(*i) { 528 - Ok(Value::Number(i64_val.into())) 529 - } else { 530 - // Fall back to string representation for very large integers 531 - Ok(Value::String(i.to_string())) 532 - } 533 - } 534 - Ipld::Float(f) => { 535 - if let Some(num) = serde_json::Number::from_f64(*f) { 536 - Ok(Value::Number(num)) 537 - } else { 538 - Err(anyhow!("Invalid float value")) 539 } 540 - } 541 - Ipld::String(s) => Ok(Value::String(s.clone())), 542 - Ipld::Bytes(b) => { 543 - // Convert bytes to base64 string 544 - Ok(Value::String(general_purpose::STANDARD.encode(b))) 545 - } 546 - Ipld::List(list) => { 547 - let json_array: Result<Vec<Value>> = list.iter().map(ipld_to_json).collect(); 548 - Ok(Value::Array(json_array?)) 549 - } 550 - Ipld::Map(map) => { 551 - let mut json_map = serde_json::Map::new(); 552 - for (key, value) in map { 553 - json_map.insert(key.clone(), ipld_to_json(value)?); 554 } 555 - Ok(Value::Object(json_map)) 556 } 557 - Ipld::Link(cid) => { 558 - // Convert CID to string representation 559 - Ok(Value::String(cid.to_string())) 560 - } 561 } 562 }
··· 1 + //! CAR (Content Addressable aRchive) Import Ingestor using atmst 2 + //! 3 + //! This module handles importing Teal records from CAR files using the atmst library, 4 + //! which provides proper AT Protocol-style Merkle Search Tree handling. The CAR import process: 5 + //! 6 + //! 1. Receives CAR data via the LexiconIngestor interface (base64 encoded or URL) 7 + //! 2. Uses atmst::CarImporter to parse the CAR file and extract MST structure 8 + //! 3. Converts the CarImporter to an MST for proper tree traversal 9 + //! 4. Iterates through MST nodes to find Teal record types (play, profile, status) 10 + //! 5. Delegates to existing Teal ingestors using the actual DID and proper rkey 11 + //! 12 + //! ## Usage Example 13 + //! 14 + //! ```rust,ignore 15 + //! // CAR data can be provided in a record like: 16 + //! { 17 + //! "carData": "base64-encoded-car-file-here" 18 + //! } 19 + //! 20 + //! // Or as a URL reference: 21 + //! { 22 + //! "carData": { 23 + //! "url": "https://example.com/my-archive.car" 24 + //! } 25 + //! } 26 + //! ``` 27 + //! 28 + //! The ingestor will automatically detect record types and store them using the 29 + //! same logic as real-time Jetstream ingestion, ensuring data consistency. 30 + //! All imported records will be attributed to the DID that initiated the import 31 + //! and use the original rkey from the AT Protocol MST structure. 32 + 33 + use crate::ingestors::car::jobs::{queue_keys, CarImportJob}; 34 + use crate::redis_client::RedisClient; 35 use anyhow::{anyhow, Result}; 36 use async_trait::async_trait; 37 + use atmst::{mst::Mst, Bytes, CarImporter}; 38 + use base64::Engine; 39 + use futures::StreamExt; 40 + use redis::AsyncCommands; 41 use rocketman::{ingestion::LexiconIngestor, types::event::Event}; 42 use serde_json::Value; 43 use sqlx::PgPool; 44 use tracing::{info, warn}; 45 46 + /// Helper struct for extracted records 47 + #[derive(Debug)] 48 + pub struct ExtractedRecord { 49 + pub collection: String, 50 + pub rkey: String, 51 + pub data: serde_json::Value, 52 + } 53 + 54 + /// CAR Import Ingestor handles importing Teal records from CAR files using atmst 55 pub struct CarImportIngestor { 56 sql: PgPool, 57 } 58 59 impl CarImportIngestor { 60 + /// Create a new CAR import ingestor with database connection 61 pub fn new(sql: PgPool) -> Self { 62 Self { sql } 63 } 64 65 + /// Helper to get a Redis connection for job queueing 66 + pub async fn get_redis_connection(&self) -> Result<redis::aio::MultiplexedConnection> { 67 + let redis_url = 68 + std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 69 + let client = RedisClient::new(&redis_url)?; 70 + client 71 + .get_connection() 72 + .await 73 + .map_err(|e| anyhow!("Redis connection error: {}", e)) 74 + } 75 + 76 + /// Process CAR file data using atmst library and extract Teal records 77 + async fn process_car_data(&self, car_data: &[u8], import_id: &str, did: &str) -> Result<()> { 78 + info!( 79 + "Starting CAR file processing with atmst for import {} (DID: {})", 80 + import_id, did 81 + ); 82 + 83 + // Convert to Bytes for atmst 84 + let car_bytes: Bytes = Bytes::from(car_data.to_vec()); 85 + 86 + // Create CarImporter and import the CAR data 87 + let mut car_importer = CarImporter::new(); 88 + car_importer 89 + .import_from_bytes(car_bytes.clone()) 90 + .await 91 + .map_err(|e| anyhow!("Failed to import CAR with atmst: {}", e))?; 92 93 + info!( 94 + "CAR imported successfully. Root CIDs: {:?}, Total blocks: {}", 95 + car_importer.roots(), 96 + car_importer.len() 97 + ); 98 + 99 + // Convert CarImporter to MST for proper tree traversal 100 + let mst = Mst::from_car_importer(car_importer) 101 + .await 102 + .map_err(|e| anyhow!("Failed to convert CAR to MST: {}", e))?; 103 104 + info!("MST conversion successful, starting record extraction"); 105 + 106 + // Create a new CarImporter for data access since the previous one was consumed 107 + let mut data_importer = CarImporter::new(); 108 + data_importer 109 + .import_from_bytes(car_bytes) 110 + .await 111 + .map_err(|e| anyhow!("Failed to re-import CAR for data access: {}", e))?; 112 113 + // Extract all records from the MST 114 + let records = self 115 + .extract_records_from_mst(&mst, &data_importer, did) 116 + .await?; 117 118 + info!("Extracted {} records from MST", records.len()); 119 120 + // Process each record through the appropriate ingestor 121 + let mut processed_count = 0; 122 + for record in records { 123 + match self.process_extracted_record(&record, import_id, did).await { 124 + Ok(()) => { 125 + processed_count += 1; 126 + if processed_count % 10 == 0 { 127 + info!("Processed {} records so far", processed_count); 128 + } 129 + } 130 + Err(e) => { 131 + warn!("Failed to process record {}: {}", record.rkey, e); 132 + // Continue processing other records 133 + } 134 } 135 } 136 137 info!( 138 + "Completed CAR file processing: {} records processed for import {}", 139 + processed_count, import_id 140 ); 141 142 Ok(()) 143 } 144 145 + /// Extract all Teal records from the MST 146 + async fn extract_records_from_mst( 147 + &self, 148 + mst: &Mst, 149 + car_importer: &CarImporter, 150 + _did: &str, 151 + ) -> Result<Vec<ExtractedRecord>> { 152 + let mut records = Vec::new(); 153 + 154 + // Use the MST iterator to traverse all entries 155 + let mut stream = mst.iter().into_stream(); 156 157 + while let Some(result) = stream.next().await { 158 + match result { 159 + Ok((key, record_cid)) => { 160 + // Check if this is a Teal record based on the key pattern 161 + if self.is_teal_record_key(&key) { 162 + info!("๐ŸŽต Found Teal record: {} -> {}", key, record_cid); 163 + if let Some((collection, rkey)) = self.parse_teal_key(&key) { 164 + info!(" Collection: {}, rkey: {}", collection, rkey); 165 + // Get the actual record data using the CID 166 + match self.get_record_data(&record_cid, car_importer).await { 167 + Ok(Some(data)) => { 168 + info!(" โœ… Successfully got record data for {}", record_cid); 169 + records.push(ExtractedRecord { 170 + collection, 171 + rkey, 172 + data, 173 + }); 174 + } 175 + Ok(None) => { 176 + warn!(" โŒ No data found for record CID: {}", record_cid); 177 + } 178 + Err(e) => { 179 + warn!( 180 + " โŒ Failed to get record data for {}: {}", 181 + record_cid, e 182 + ); 183 + } 184 + } 185 + } else { 186 + warn!(" โŒ Failed to parse Teal key: {}", key); 187 + } 188 + } 189 + } 190 + Err(e) => { 191 + warn!("Error iterating MST: {}", e); 192 + // Continue with other entries 193 + } 194 } 195 } 196 197 + Ok(records) 198 } 199 200 + /// Get record data from the CAR importer using a CID 201 + async fn get_record_data( 202 &self, 203 + cid: &atmst::Cid, 204 + car_importer: &CarImporter, 205 + ) -> Result<Option<Value>> { 206 + // Try to decode the block as CBOR IPLD directly with atmst::Cid 207 + info!("๐Ÿ” Attempting to decode CBOR for CID: {}", cid); 208 + match car_importer.decode_cbor(cid) { 209 + Ok(ipld) => { 210 + info!(" โœ… Successfully decoded CBOR for CID: {}", cid); 211 + // Convert IPLD to JSON for processing by existing ingestors 212 + match self.ipld_to_json(&ipld) { 213 + Ok(json) => { 214 + info!(" โœ… Successfully converted IPLD to JSON for CID: {}", cid); 215 + Ok(Some(json)) 216 + } 217 + Err(e) => { 218 + warn!( 219 + " โŒ Failed to convert IPLD to JSON for CID {}: {}", 220 + cid, e 221 + ); 222 + Ok(None) 223 + } 224 } 225 } 226 + Err(e) => { 227 + warn!(" โŒ Failed to decode CBOR for CID {}: {}", cid, e); 228 + Ok(None) 229 } 230 } 231 } 232 233 + /// Process a single extracted record through the appropriate ingestor 234 + async fn process_extracted_record( 235 &self, 236 + record: &ExtractedRecord, 237 + _import_id: &str, 238 + did: &str, 239 ) -> Result<()> { 240 + info!( 241 + "Processing {} record with rkey: {}", 242 + record.collection, record.rkey 243 + ); 244 + 245 + info!( 246 + "๐Ÿ”„ Processing {} record: {}", 247 + record.collection, record.rkey 248 + ); 249 + match record.collection.as_str() { 250 "fm.teal.alpha.feed.play" => { 251 + info!(" ๐Ÿ“€ Processing play record..."); 252 + let result = self 253 + .process_play_record(&record.data, did, &record.rkey) 254 + .await; 255 + if result.is_ok() { 256 + info!(" โœ… Successfully processed play record"); 257 + } else { 258 + warn!(" โŒ Failed to process play record: {:?}", result); 259 + } 260 + result 261 } 262 "fm.teal.alpha.actor.profile" => { 263 + info!(" ๐Ÿ‘ค Processing profile record..."); 264 + let result = self 265 + .process_profile_record(&record.data, did, &record.rkey) 266 + .await; 267 + if result.is_ok() { 268 + info!(" โœ… Successfully processed profile record"); 269 + } else { 270 + warn!(" โŒ Failed to process profile record: {:?}", result); 271 + } 272 + result 273 } 274 "fm.teal.alpha.actor.status" => { 275 + info!(" ๐Ÿ“ข Processing status record..."); 276 + let result = self 277 + .process_status_record(&record.data, did, &record.rkey) 278 + .await; 279 + if result.is_ok() { 280 + info!(" โœ… Successfully processed status record"); 281 + } else { 282 + warn!(" โŒ Failed to process status record: {:?}", result); 283 + } 284 + result 285 } 286 _ => { 287 + warn!("โ“ Unknown Teal collection: {}", record.collection); 288 + Ok(()) 289 } 290 } 291 + } 292 293 + /// Check if a key represents a Teal record 294 + fn is_teal_record_key(&self, key: &str) -> bool { 295 + key.starts_with("fm.teal.alpha.") && key.contains("/") 296 } 297 298 + /// Parse a Teal MST key to extract collection and rkey 299 + fn parse_teal_key(&self, key: &str) -> Option<(String, String)> { 300 + if let Some(slash_pos) = key.rfind('/') { 301 + let collection = key[..slash_pos].to_string(); 302 + let rkey = key[slash_pos + 1..].to_string(); 303 + Some((collection, rkey)) 304 + } else { 305 + None 306 + } 307 + } 308 309 + /// Process a play record using the existing PlayIngestor 310 + async fn process_play_record(&self, data: &Value, did: &str, rkey: &str) -> Result<()> { 311 + match serde_json::from_value::<types::fm::teal::alpha::feed::play::RecordData>(data.clone()) 312 { 313 + Ok(play_record) => { 314 + let play_ingestor = 315 + super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 316 + let uri = super::super::teal::assemble_at_uri(did, "fm.teal.alpha.feed.play", rkey); 317 318 + play_ingestor 319 + .insert_play( 320 + &play_record, 321 + &uri, 322 + &format!("car-import-{}", uuid::Uuid::new_v4()), 323 + did, 324 + rkey, 325 + ) 326 + .await?; 327 328 + info!( 329 + "Successfully stored play record: {} by {:?}", 330 + play_record.track_name, play_record.artist_names 331 + ); 332 + Ok(()) 333 + } 334 + Err(e) => { 335 + warn!("Failed to deserialize play record data: {}", e); 336 + Err(anyhow!("Invalid play record format: {}", e)) 337 + } 338 + } 339 + } 340 341 + /// Process a profile record using the existing ActorProfileIngestor 342 + async fn process_profile_record(&self, data: &Value, did: &str, _rkey: &str) -> Result<()> { 343 + match serde_json::from_value::<types::fm::teal::alpha::actor::profile::RecordData>( 344 + data.clone(), 345 + ) { 346 + Ok(profile_record) => { 347 + let profile_ingestor = 348 + super::super::teal::actor_profile::ActorProfileIngestor::new(self.sql.clone()); 349 + let did_typed = atrium_api::types::string::Did::new(did.to_string()) 350 + .map_err(|e| anyhow!("Failed to create Did: {}", e))?; 351 352 + profile_ingestor 353 + .insert_profile(did_typed, &profile_record) 354 + .await?; 355 + 356 + info!( 357 + "Successfully stored profile record: {:?}", 358 + profile_record.display_name 359 + ); 360 + Ok(()) 361 + } 362 + Err(e) => { 363 + warn!("Failed to deserialize profile record data: {}", e); 364 + Err(anyhow!("Invalid profile record format: {}", e)) 365 + } 366 } 367 + } 368 369 + /// Process a status record using the existing ActorStatusIngestor 370 + async fn process_status_record(&self, data: &Value, did: &str, rkey: &str) -> Result<()> { 371 + match serde_json::from_value::<types::fm::teal::alpha::actor::status::RecordData>( 372 + data.clone(), 373 + ) { 374 + Ok(status_record) => { 375 + let status_ingestor = 376 + super::super::teal::actor_status::ActorStatusIngestor::new(self.sql.clone()); 377 + let did_typed = atrium_api::types::string::Did::new(did.to_string()) 378 + .map_err(|e| anyhow!("Failed to create Did: {}", e))?; 379 + 380 + status_ingestor 381 + .insert_status( 382 + did_typed, 383 + rkey, 384 + &format!("car-import-{}", uuid::Uuid::new_v4()), 385 + &status_record, 386 + ) 387 + .await?; 388 + 389 + info!("Successfully stored status record from CAR import"); 390 + Ok(()) 391 + } 392 + Err(e) => { 393 + warn!("Failed to deserialize status record data: {}", e); 394 + Err(anyhow!("Invalid status record format: {}", e)) 395 + } 396 + } 397 } 398 399 + /// Fetch and process a CAR file from a PDS for a given identity 400 + pub async fn fetch_and_process_identity_car(&self, handle_or_did: &str) -> Result<String> { 401 + info!("Fetching CAR file for identity: {}", handle_or_did); 402 + 403 + // Resolve to DID if needed 404 + let did = if handle_or_did.starts_with("did:") { 405 + handle_or_did.to_string() 406 + } else { 407 + self.resolve_handle_to_did(handle_or_did).await? 408 + }; 409 + 410 + // Resolve DID to PDS 411 + let pds_url = self.resolve_did_to_pds(&did).await?; 412 + info!("Resolved {} to PDS: {}", did, pds_url); 413 414 + // Fetch CAR file 415 + let car_data = self.fetch_car_from_pds(&pds_url, &did).await?; 416 417 + // Generate import ID 418 + let import_id = uuid::Uuid::new_v4().to_string(); 419 420 + // Process the CAR data 421 + self.process_car_data(&car_data, &import_id, &did).await?; 422 423 + Ok(import_id) 424 } 425 426 + /// Resolve handle to DID 427 + async fn resolve_handle_to_did(&self, handle: &str) -> Result<String> { 428 + let url = format!( 429 + "https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", 430 + handle 431 + ); 432 + let response: Value = reqwest::get(&url).await?.json().await?; 433 434 + response["did"] 435 + .as_str() 436 + .map(|s| s.to_string()) 437 + .ok_or_else(|| anyhow!("Failed to resolve handle to DID")) 438 + } 439 440 + /// Resolve DID to PDS URL 441 + async fn resolve_did_to_pds(&self, did: &str) -> Result<String> { 442 + let url = format!("https://plc.directory/{}", did); 443 + let response: Value = reqwest::get(&url).await?.json().await?; 444 445 + if let Some(services) = response["service"].as_array() { 446 + for service in services { 447 + if service["id"] == "#atproto_pds" { 448 + if let Some(endpoint) = service["serviceEndpoint"].as_str() { 449 + return Ok(endpoint.to_string()); 450 + } 451 + } 452 + } 453 } 454 455 + Err(anyhow!("Could not resolve PDS for DID: {}", did)) 456 } 457 458 + /// Fetch CAR file from PDS 459 + async fn fetch_car_from_pds(&self, pds_url: &str, did: &str) -> Result<Vec<u8>> { 460 + let url = format!("{}/xrpc/com.atproto.sync.getRepo?did={}", pds_url, did); 461 + let response = reqwest::get(&url).await?; 462 463 + if !response.status().is_success() { 464 + return Err(anyhow!( 465 + "Failed to fetch CAR file: HTTP {}", 466 + response.status() 467 + )); 468 + } 469 + 470 + let car_data = response.bytes().await?.to_vec(); 471 + info!("Fetched CAR file: {} bytes", car_data.len()); 472 473 + Ok(car_data) 474 } 475 476 + /// Helper: Convert IPLD to JSON 477 + #[allow(clippy::only_used_in_recursion)] 478 + fn ipld_to_json(&self, ipld: &atmst::Ipld) -> Result<Value> { 479 + use atmst::Ipld; 480 + 481 + match ipld { 482 + Ipld::Null => Ok(Value::Null), 483 + Ipld::Bool(b) => Ok(Value::Bool(*b)), 484 + Ipld::Integer(i) => { 485 + if let Ok(i64_val) = i64::try_from(*i) { 486 + Ok(Value::Number(i64_val.into())) 487 + } else { 488 + Ok(Value::String(i.to_string())) 489 + } 490 + } 491 + Ipld::Float(f) => { 492 + if let Some(num) = serde_json::Number::from_f64(*f) { 493 + Ok(Value::Number(num)) 494 + } else { 495 + Err(anyhow!("Invalid float value")) 496 + } 497 + } 498 + Ipld::String(s) => Ok(Value::String(s.clone())), 499 + Ipld::Bytes(b) => Ok(Value::String( 500 + base64::engine::general_purpose::STANDARD.encode(b), 501 + )), 502 + Ipld::List(list) => { 503 + let json_array: Result<Vec<Value>> = 504 + list.iter().map(|v| self.ipld_to_json(v)).collect(); 505 + Ok(Value::Array(json_array?)) 506 + } 507 + Ipld::Map(map) => { 508 + let mut json_map = serde_json::Map::new(); 509 + for (key, value) in map { 510 + json_map.insert(key.clone(), self.ipld_to_json(value)?); 511 + } 512 + Ok(Value::Object(json_map)) 513 + } 514 + Ipld::Link(cid) => Ok(Value::String(cid.to_string())), 515 + } 516 } 517 + } 518 519 + #[async_trait] 520 + impl LexiconIngestor for CarImportIngestor { 521 + async fn ingest(&self, message: Event<Value>) -> Result<()> { 522 + let commit = message 523 + .commit 524 + .as_ref() 525 + .ok_or_else(|| anyhow!("CarImportIngestor requires a commit event"))?; 526 + 527 + let record = commit 528 + .record 529 + .as_ref() 530 + .ok_or_else(|| anyhow!("CarImportIngestor requires a record in the commit"))?; 531 + 532 + // Enqueue CAR import job into Redis 533 + let job = CarImportJob { 534 + request_id: uuid::Uuid::new_v4(), 535 + identity: record 536 + .get("identity") 537 + .and_then(|v| v.as_str()) 538 + .ok_or_else(|| anyhow!("Missing identity in record"))? 539 + .to_string(), 540 + since: None, 541 + created_at: chrono::Utc::now(), 542 + description: None, 543 + }; 544 + let job_payload = serde_json::to_string(&job)?; 545 + let mut conn = self.get_redis_connection().await?; 546 + // Specify the expected return type to avoid FromRedisValue fallback issues in edition 2024 547 + let _: () = conn.lpush(queue_keys::CAR_IMPORT_JOBS, job_payload).await?; 548 + tracing::info!("Enqueued CAR import job: {}", job.request_id); 549 + 550 Ok(()) 551 } 552 + } 553 554 + #[allow(dead_code)] 555 + impl CarImportIngestor { 556 + /// Download CAR file from URL 557 + async fn download_car_file(&self, url: &str) -> Result<Vec<u8>> { 558 + let response = reqwest::get(url).await?; 559 + Ok(response.bytes().await?.to_vec()) 560 } 561 562 + /// Import CAR data from bytes (public interface) 563 + pub async fn import_car_bytes(&self, car_data: &[u8], did: &str) -> Result<String> { 564 + let import_id = uuid::Uuid::new_v4().to_string(); 565 + self.process_car_data(car_data, &import_id, did).await?; 566 + Ok(import_id) 567 } 568 569 + /// Consolidate synthetic artists with MusicBrainz artists 570 + pub async fn consolidate_synthetic_artists(&self, min_confidence: f64) -> Result<usize> { 571 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 572 + play_ingestor 573 + .consolidate_synthetic_artists(min_confidence) 574 + .await 575 } 576 577 + /// Consolidate duplicate releases 578 + pub async fn consolidate_duplicate_releases(&self, min_confidence: f64) -> Result<usize> { 579 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 580 + play_ingestor 581 + .consolidate_duplicate_releases(min_confidence) 582 + .await 583 + } 584 585 + /// Consolidate duplicate recordings 586 + pub async fn consolidate_duplicate_recordings(&self, min_confidence: f64) -> Result<usize> { 587 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 588 + play_ingestor 589 + .consolidate_duplicate_recordings(min_confidence) 590 + .await 591 + } 592 593 + /// Preview consolidation candidates before running consolidation 594 + pub async fn preview_consolidation_candidates(&self, min_confidence: f64) -> Result<()> { 595 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 596 + play_ingestor 597 + .preview_consolidation_candidates(min_confidence) 598 + .await 599 + } 600 601 + /// Run full batch consolidation for all entity types 602 + pub async fn run_full_consolidation(&self) -> Result<()> { 603 + let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone()); 604 + play_ingestor.run_full_consolidation().await 605 + } 606 + } 607 608 + // Removed unused helper struct for extracted records. 609 610 + #[cfg(test)] 611 + mod tests { 612 + use super::*; 613 + use atmst::{CarBuilder, Ipld}; 614 + use std::collections::BTreeMap; 615 616 + fn create_mock_teal_play_record() -> Ipld { 617 + let mut record = BTreeMap::new(); 618 + record.insert( 619 + "$type".to_string(), 620 + Ipld::String("fm.teal.alpha.feed.play".to_string()), 621 + ); 622 + record.insert( 623 + "track_name".to_string(), 624 + Ipld::String("Test Song".to_string()), 625 + ); 626 + record.insert( 627 + "artist_names".to_string(), 628 + Ipld::List(vec![Ipld::String("Test Artist".to_string())]), 629 + ); 630 + record.insert("duration".to_string(), Ipld::Integer(180000)); 631 + record.insert( 632 + "created_at".to_string(), 633 + Ipld::String("2024-01-01T00:00:00Z".to_string()), 634 + ); 635 + Ipld::Map(record) 636 } 637 638 + fn create_mock_teal_profile_record() -> Ipld { 639 + let mut record = BTreeMap::new(); 640 + record.insert( 641 + "$type".to_string(), 642 + Ipld::String("fm.teal.alpha.actor.profile".to_string()), 643 ); 644 + record.insert( 645 + "display_name".to_string(), 646 + Ipld::String("Test User".to_string()), 647 + ); 648 + record.insert( 649 + "description".to_string(), 650 + Ipld::String("Music lover".to_string()), 651 + ); 652 + Ipld::Map(record) 653 + } 654 655 + async fn create_test_car_with_teal_records() -> Result<Bytes> { 656 + let mut builder = CarBuilder::new(); 657 658 + // Create test Teal records 659 + let play_record = create_mock_teal_play_record(); 660 + let profile_record = create_mock_teal_profile_record(); 661 662 + // Add records to CAR 663 + let play_cid = builder.add_cbor(&play_record)?; 664 + let profile_cid = builder.add_cbor(&profile_record)?; 665 666 + // Add roots (in a real MST, these would be MST nodes, but for testing this is sufficient) 667 + builder.add_root(play_cid); 668 + builder.add_root(profile_cid); 669 670 + let importer = builder.build(); 671 + importer 672 + .export_to_bytes() 673 + .await 674 + .map_err(|e| anyhow!("Failed to export CAR: {}", e)) 675 + } 676 677 + #[test] 678 + fn test_parse_teal_key() { 679 + // This test doesn't need a database connection or async 680 + let key = "fm.teal.alpha.feed.play/3k2akjdlkjsf"; 681 682 + // Test the parsing logic directly 683 + if let Some(slash_pos) = key.rfind('/') { 684 + let collection = key[..slash_pos].to_string(); 685 + let rkey = key[slash_pos + 1..].to_string(); 686 687 + assert_eq!(collection, "fm.teal.alpha.feed.play"); 688 + assert_eq!(rkey, "3k2akjdlkjsf"); 689 } else { 690 + panic!("Should have found slash in key"); 691 } 692 } 693 694 + #[test] 695 + fn test_is_teal_record_key() { 696 + // Test the logic directly without needing an ingestor instance 697 + fn is_teal_record_key(key: &str) -> bool { 698 + key.starts_with("fm.teal.alpha.") && key.contains("/") 699 } 700 701 + assert!(is_teal_record_key("fm.teal.alpha.feed.play/abc123")); 702 + assert!(is_teal_record_key("fm.teal.alpha.profile/def456")); 703 + assert!(!is_teal_record_key("app.bsky.feed.post/xyz789")); 704 + assert!(!is_teal_record_key("fm.teal.alpha.feed.play")); // No rkey 705 + } 706 707 + #[test] 708 + fn test_ipld_to_json_conversion() { 709 + // Test IPLD to JSON conversion logic directly 710 + use atmst::Ipld; 711 + use std::collections::BTreeMap; 712 713 + let mut record = BTreeMap::new(); 714 + record.insert( 715 + "$type".to_string(), 716 + Ipld::String("fm.teal.alpha.feed.play".to_string()), 717 + ); 718 + record.insert( 719 + "track_name".to_string(), 720 + Ipld::String("Test Song".to_string()), 721 + ); 722 + record.insert("duration".to_string(), Ipld::Integer(180000)); 723 + let play_record = Ipld::Map(record); 724 725 + // Test the conversion logic inline 726 + fn ipld_to_json(ipld: &Ipld) -> Result<Value> { 727 + match ipld { 728 + Ipld::Null => Ok(Value::Null), 729 + Ipld::Bool(b) => Ok(Value::Bool(*b)), 730 + Ipld::Integer(i) => { 731 + if let Ok(i64_val) = i64::try_from(*i) { 732 + Ok(Value::Number(i64_val.into())) 733 + } else { 734 + Ok(Value::String(i.to_string())) 735 + } 736 + } 737 + Ipld::String(s) => Ok(Value::String(s.clone())), 738 + Ipld::Map(map) => { 739 + let mut json_map = serde_json::Map::new(); 740 + for (key, value) in map { 741 + json_map.insert(key.clone(), ipld_to_json(value)?); 742 + } 743 + Ok(Value::Object(json_map)) 744 + } 745 + _ => Ok(Value::Null), // Simplified for test 746 + } 747 } 748 749 + let json_result = ipld_to_json(&play_record); 750 + assert!(json_result.is_ok()); 751 + let json = json_result.unwrap(); 752 + assert_eq!(json["$type"], "fm.teal.alpha.feed.play"); 753 + assert_eq!(json["track_name"], "Test Song"); 754 + assert_eq!(json["duration"], 180000); 755 } 756 757 + #[tokio::test] 758 + async fn test_car_creation_and_basic_parsing() -> Result<()> { 759 + // Test that we can create a CAR file with Teal records and parse it 760 + let car_bytes = create_test_car_with_teal_records().await?; 761 762 + // Verify we can import the CAR with atmst 763 + let mut importer = CarImporter::new(); 764 + importer.import_from_bytes(car_bytes).await?; 765 + 766 + assert!(!importer.is_empty()); 767 + assert!(importer.len() >= 2); // Should have at least our 2 test records 768 769 + // Test that we can decode the records 770 + for cid in importer.cids() { 771 + if let Ok(Ipld::Map(map)) = importer.decode_cbor(&cid) { 772 + if let Some(Ipld::String(record_type)) = map.get("$type") { 773 + assert!(record_type.starts_with("fm.teal.alpha.")); 774 + println!("Found Teal record: {}", record_type); 775 } 776 } 777 } 778 779 Ok(()) 780 } 781 + 782 + #[tokio::test] 783 + #[ignore = "requires database connection"] 784 + async fn test_full_car_import_integration() -> Result<()> { 785 + // This test requires a real database connection 786 + let database_url = std::env::var("DATABASE_URL") 787 + .unwrap_or_else(|_| "postgresql://localhost/teal_test".to_string()); 788 + 789 + let pool = sqlx::PgPool::connect(&database_url).await?; 790 + let ingestor = CarImportIngestor::new(pool); 791 792 + // Create test CAR with Teal records 793 + let car_bytes = create_test_car_with_teal_records().await?; 794 + 795 + // Test the full import process 796 + let import_id = uuid::Uuid::new_v4().to_string(); 797 + let test_did = "did:plc:test123"; 798 799 + // This should work with our new atmst implementation 800 + let result = ingestor 801 + .process_car_data(&car_bytes, &import_id, test_did) 802 + .await; 803 804 + // For now, we expect this to work but records might not actually get stored 805 + // because the test CAR doesn't have proper MST structure 806 + match result { 807 + Ok(()) => { 808 + println!("โœ… CAR import completed successfully"); 809 } 810 + Err(e) => { 811 + println!("โš ๏ธ CAR import failed (expected for test data): {}", e); 812 + // This is expected since our test CAR doesn't have proper MST structure 813 } 814 } 815 + 816 + Ok(()) 817 } 818 }
+51
services/cadet/src/ingestors/car/jobs.rs
···
··· 1 + use chrono::{DateTime, Utc}; 2 + use serde::{Deserialize, Serialize}; 3 + use uuid::Uuid; 4 + 5 + #[derive(Debug, Clone, Serialize, Deserialize)] 6 + pub struct CarImportJob { 7 + pub request_id: Uuid, 8 + pub identity: String, 9 + pub since: Option<DateTime<Utc>>, 10 + pub created_at: DateTime<Utc>, 11 + pub description: Option<String>, 12 + } 13 + 14 + #[derive(Debug, Clone, Serialize, Deserialize)] 15 + pub struct CarImportJobStatus { 16 + pub status: JobStatus, 17 + pub created_at: DateTime<Utc>, 18 + pub started_at: Option<DateTime<Utc>>, 19 + pub completed_at: Option<DateTime<Utc>>, 20 + pub error_message: Option<String>, 21 + pub progress: Option<JobProgress>, 22 + } 23 + 24 + #[derive(Debug, Clone, Serialize, Deserialize)] 25 + pub enum JobStatus { 26 + Pending, 27 + Processing, 28 + Completed, 29 + Failed, 30 + Cancelled, 31 + } 32 + 33 + #[derive(Debug, Clone, Serialize, Deserialize)] 34 + pub struct JobProgress { 35 + pub step: String, 36 + pub user_did: Option<String>, 37 + pub pds_host: Option<String>, 38 + pub car_size_bytes: Option<u64>, 39 + pub blocks_processed: Option<u64>, 40 + } 41 + 42 + pub mod queue_keys { 43 + use uuid::Uuid; 44 + 45 + pub const CAR_IMPORT_JOBS: &str = "car_import_jobs"; 46 + pub const CAR_IMPORT_STATUS_PREFIX: &str = "car_import_status"; 47 + 48 + pub fn job_status_key(job_id: &Uuid) -> String { 49 + format!("{}:{}", CAR_IMPORT_STATUS_PREFIX, job_id) 50 + } 51 + }
+2 -1
services/cadet/src/ingestors/car/mod.rs
··· 1 pub mod car_import; 2 3 - pub use car_import::CarImportIngestor;
··· 1 pub mod car_import; 2 + pub mod jobs; 3 4 + pub use car_import::CarImportIngestor;
+1 -1
services/cadet/src/ingestors/mod.rs
··· 1 - pub mod teal; 2 pub mod car;
··· 1 pub mod car; 2 + pub mod teal;
+7 -7
services/cadet/src/ingestors/teal/actor_status.rs
··· 23 status: &types::fm::teal::alpha::actor::status::RecordData, 24 ) -> anyhow::Result<()> { 25 let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey); 26 - 27 let record_json = serde_json::to_value(status)?; 28 - 29 sqlx::query!( 30 r#" 31 INSERT INTO statii (uri, did, rkey, cid, record) ··· 43 ) 44 .execute(&self.sql) 45 .await?; 46 - 47 Ok(()) 48 } 49 50 pub async fn remove_status(&self, did: Did, rkey: &str) -> anyhow::Result<()> { 51 let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey); 52 - 53 sqlx::query!( 54 r#" 55 DELETE FROM statii WHERE uri = $1 ··· 58 ) 59 .execute(&self.sql) 60 .await?; 61 - 62 Ok(()) 63 } 64 } ··· 71 let record = serde_json::from_value::< 72 types::fm::teal::alpha::actor::status::RecordData, 73 >(record.clone())?; 74 - 75 if let Some(ref commit) = message.commit { 76 if let Some(ref cid) = commit.cid { 77 self.insert_status( ··· 98 } 99 Ok(()) 100 } 101 - }
··· 23 status: &types::fm::teal::alpha::actor::status::RecordData, 24 ) -> anyhow::Result<()> { 25 let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey); 26 + 27 let record_json = serde_json::to_value(status)?; 28 + 29 sqlx::query!( 30 r#" 31 INSERT INTO statii (uri, did, rkey, cid, record) ··· 43 ) 44 .execute(&self.sql) 45 .await?; 46 + 47 Ok(()) 48 } 49 50 pub async fn remove_status(&self, did: Did, rkey: &str) -> anyhow::Result<()> { 51 let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey); 52 + 53 sqlx::query!( 54 r#" 55 DELETE FROM statii WHERE uri = $1 ··· 58 ) 59 .execute(&self.sql) 60 .await?; 61 + 62 Ok(()) 63 } 64 } ··· 71 let record = serde_json::from_value::< 72 types::fm::teal::alpha::actor::status::RecordData, 73 >(record.clone())?; 74 + 75 if let Some(ref commit) = message.commit { 76 if let Some(ref cid) = commit.cid { 77 self.insert_status( ··· 98 } 99 Ok(()) 100 } 101 + }
+1132 -62
services/cadet/src/ingestors/teal/feed_play.rs
··· 7 8 use super::assemble_at_uri; 9 10 pub struct PlayIngestor { 11 sql: PgPool, 12 } ··· 58 Self { sql } 59 } 60 61 - /// Inserts or updates an artist in the database. 62 - /// Returns the Uuid of the artist. 63 - async fn insert_artist(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> { 64 - let artist_uuid = Uuid::parse_str(mbid)?; 65 - let res = sqlx::query!( 66 r#" 67 - INSERT INTO artists (mbid, name) VALUES ($1, $2) 68 - ON CONFLICT (mbid) DO NOTHING 69 - RETURNING mbid; 70 "#, 71 - artist_uuid, 72 - name 73 ) 74 .fetch_all(&self.sql) 75 .await?; 76 77 - if !res.is_empty() { 78 - // TODO: send request to async scrape data from local MB instance 79 } 80 81 - Ok(artist_uuid) 82 } 83 84 /// Inserts or updates a release in the database. 85 /// Returns the Uuid of the release. 86 async fn insert_release(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> { 87 let release_uuid = Uuid::parse_str(mbid)?; 88 let res = sqlx::query!( 89 r#" 90 - INSERT INTO releases (mbid, name) VALUES ($1, $2) 91 - ON CONFLICT (mbid) DO NOTHING 92 RETURNING mbid; 93 "#, 94 release_uuid, 95 - name 96 ) 97 .fetch_all(&self.sql) 98 .await?; ··· 108 /// Returns the Uuid of the recording. 109 async fn insert_recording(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> { 110 let recording_uuid = Uuid::parse_str(mbid)?; 111 let res = sqlx::query!( 112 r#" 113 - INSERT INTO recordings (mbid, name) VALUES ($1, $2) 114 - ON CONFLICT (mbid) DO NOTHING 115 RETURNING mbid; 116 "#, 117 recording_uuid, 118 - name 119 ) 120 .fetch_all(&self.sql) 121 .await?; ··· 126 127 Ok(recording_uuid) 128 } 129 130 pub async fn insert_play( 131 &self, ··· 137 ) -> anyhow::Result<()> { 138 dbg!("ingesting", play_record); 139 let play_record = clean(play_record); 140 - let mut parsed_artists: Vec<(Uuid, String)> = vec![]; 141 if let Some(ref artists) = &play_record.artists { 142 for artist in artists { 143 let artist_name = artist.artist_name.clone(); 144 - let artist_mbid = artist.artist_mb_id.clone(); 145 - if let Some(artist_mbid) = artist_mbid { 146 - let artist_uuid = self.insert_artist(&artist_mbid, &artist_name).await?; 147 - parsed_artists.push((artist_uuid, artist_name.clone())); 148 } else { 149 - // Handle case where artist MBID is missing, maybe log a warning 150 - eprintln!("Warning: Artist MBID missing for '{}'", artist_name); 151 - } 152 } 153 } else { 154 - if let Some(artist_names) = &play_record.artist_names { 155 - for artist_name in artist_names { 156 - // Assuming artist_mbid is optional, handle missing mbid gracefully 157 - let artist_mbid_opt = if let Some(ref mbid_list) = play_record.artist_mb_ids { 158 - mbid_list.get( 159 - artist_names 160 - .iter() 161 - .position(|name| name == artist_name) 162 - .unwrap_or(0), 163 - ) 164 - } else { 165 - None 166 - }; 167 168 - if let Some(artist_mbid) = artist_mbid_opt { 169 - let artist_uuid = self.insert_artist(artist_mbid, artist_name).await?; 170 - parsed_artists.push((artist_uuid, artist_name.clone())); 171 - } else { 172 - // Handle case where artist MBID is missing, maybe log a warning 173 - eprintln!("Warning: Artist MBID missing for '{}'", artist_name); 174 - } 175 - } 176 - } 177 } 178 179 // Insert release if missing ··· 203 time::OffsetDateTime::from_unix_timestamp(played_time.as_ref().timestamp()) 204 .unwrap_or_else(|_| time::OffsetDateTime::now_utc()); 205 206 - // Our main insert into plays 207 sqlx::query!( 208 r#" 209 INSERT INTO plays ( 210 uri, cid, did, rkey, isrc, duration, track_name, played_time, 211 processed_time, release_mbid, release_name, recording_mbid, 212 - submission_client_agent, music_service_base_domain 213 ) VALUES ( 214 $1, $2, $3, $4, $5, $6, $7, $8, 215 - NOW(), $9, $10, $11, $12, $13 216 ) ON CONFLICT(uri) DO UPDATE SET 217 isrc = EXCLUDED.isrc, 218 duration = EXCLUDED.duration, ··· 223 release_name = EXCLUDED.release_name, 224 recording_mbid = EXCLUDED.recording_mbid, 225 submission_client_agent = EXCLUDED.submission_client_agent, 226 - music_service_base_domain = EXCLUDED.music_service_base_domain; 227 "#, 228 uri, 229 cid, ··· 238 recording_mbid_opt, 239 play_record.submission_client_agent, 240 play_record.music_service_base_domain, 241 ) 242 .execute(&self.sql) 243 .await?; 244 245 - // Insert plays into join table 246 - for (mbid, artist) in &parsed_artists { 247 - let artist_name = artist.clone(); // Clone to move into the query 248 - 249 sqlx::query!( 250 r#" 251 - INSERT INTO play_to_artists (play_uri, artist_mbid, artist_name) VALUES 252 - ($1, $2, $3) 253 - ON CONFLICT (play_uri, artist_mbid) DO NOTHING; 254 - "#, 255 uri, 256 - mbid, 257 artist_name 258 ) 259 .execute(&self.sql)
··· 7 8 use super::assemble_at_uri; 9 10 + #[derive(Debug, Clone)] 11 + struct FuzzyMatchCandidate { 12 + artist_id: i32, 13 + name: String, 14 + confidence: f64, 15 + } 16 + 17 + struct MusicBrainzCleaner; 18 + 19 + impl MusicBrainzCleaner { 20 + /// List of common "guff" words found in parentheses that should be removed 21 + const GUFF_WORDS: &'static [&'static str] = &[ 22 + "a cappella", 23 + "acoustic", 24 + "bonus", 25 + "censored", 26 + "clean", 27 + "club", 28 + "clubmix", 29 + "composition", 30 + "cut", 31 + "dance", 32 + "demo", 33 + "dialogue", 34 + "dirty", 35 + "edit", 36 + "excerpt", 37 + "explicit", 38 + "extended", 39 + "feat", 40 + "featuring", 41 + "ft", 42 + "instrumental", 43 + "interlude", 44 + "intro", 45 + "karaoke", 46 + "live", 47 + "long", 48 + "main", 49 + "maxi", 50 + "megamix", 51 + "mix", 52 + "mono", 53 + "official", 54 + "orchestral", 55 + "original", 56 + "outro", 57 + "outtake", 58 + "outtakes", 59 + "piano", 60 + "quadraphonic", 61 + "radio", 62 + "rap", 63 + "re-edit", 64 + "reedit", 65 + "refix", 66 + "rehearsal", 67 + "reinterpreted", 68 + "released", 69 + "release", 70 + "remake", 71 + "remastered", 72 + "remaster", 73 + "master", 74 + "remix", 75 + "remixed", 76 + "remode", 77 + "reprise", 78 + "rework", 79 + "reworked", 80 + "rmx", 81 + "session", 82 + "short", 83 + "single", 84 + "skit", 85 + "stereo", 86 + "studio", 87 + "take", 88 + "takes", 89 + "tape", 90 + "track", 91 + "tryout", 92 + "uncensored", 93 + "unknown", 94 + "unplugged", 95 + "untitled", 96 + "version", 97 + "ver", 98 + "video", 99 + "vocal", 100 + "vs", 101 + "with", 102 + "without", 103 + ]; 104 + 105 + /// Clean artist name by removing common variations and guff 106 + fn clean_artist_name(name: &str) -> String { 107 + let mut cleaned = name.trim().to_string(); 108 + 109 + // Remove common featuring patterns 110 + if let Some(pos) = cleaned.to_lowercase().find(" feat") { 111 + cleaned = cleaned[..pos].trim().to_string(); 112 + } 113 + if let Some(pos) = cleaned.to_lowercase().find(" ft.") { 114 + cleaned = cleaned[..pos].trim().to_string(); 115 + } 116 + if let Some(pos) = cleaned.to_lowercase().find(" featuring") { 117 + cleaned = cleaned[..pos].trim().to_string(); 118 + } 119 + 120 + // Remove parenthetical content if it looks like guff 121 + if let Some(start) = cleaned.find('(') { 122 + if let Some(end) = cleaned.find(')') { 123 + let paren_content = &cleaned[start + 1..end].to_lowercase(); 124 + if Self::is_likely_guff(paren_content) { 125 + cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..]) 126 + .trim() 127 + .to_string(); 128 + } 129 + } 130 + } 131 + 132 + // Remove brackets with guff 133 + if let Some(start) = cleaned.find('[') { 134 + if let Some(end) = cleaned.find(']') { 135 + let bracket_content = &cleaned[start + 1..end].to_lowercase(); 136 + if Self::is_likely_guff(bracket_content) { 137 + cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..]) 138 + .trim() 139 + .to_string(); 140 + } 141 + } 142 + } 143 + 144 + // Remove common prefixes/suffixes 145 + if cleaned.to_lowercase().starts_with("the ") && cleaned.len() > 4 { 146 + let without_the = &cleaned[4..]; 147 + if !without_the.trim().is_empty() { 148 + return without_the.trim().to_string(); 149 + } 150 + } 151 + 152 + cleaned.trim().to_string() 153 + } 154 + 155 + /// Clean track name by removing common variations and guff 156 + fn clean_track_name(name: &str) -> String { 157 + let mut cleaned = name.trim().to_string(); 158 + 159 + // Remove parenthetical content if it looks like guff 160 + if let Some(start) = cleaned.find('(') { 161 + if let Some(end) = cleaned.find(')') { 162 + let paren_content = &cleaned[start + 1..end].to_lowercase(); 163 + if Self::is_likely_guff(paren_content) { 164 + cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..]) 165 + .trim() 166 + .to_string(); 167 + } 168 + } 169 + } 170 + 171 + // Remove featuring artists from track titles 172 + if let Some(pos) = cleaned.to_lowercase().find(" feat") { 173 + cleaned = cleaned[..pos].trim().to_string(); 174 + } 175 + if let Some(pos) = cleaned.to_lowercase().find(" ft.") { 176 + cleaned = cleaned[..pos].trim().to_string(); 177 + } 178 + 179 + cleaned.trim().to_string() 180 + } 181 + 182 + /// Check if parenthetical content is likely "guff" that should be removed 183 + fn is_likely_guff(content: &str) -> bool { 184 + let content_lower = content.to_lowercase(); 185 + let words: Vec<&str> = content_lower.split_whitespace().collect(); 186 + 187 + // If most words are guff words, consider it guff 188 + let guff_word_count = words 189 + .iter() 190 + .filter(|word| Self::GUFF_WORDS.contains(word)) 191 + .count(); 192 + 193 + // Also check for years (19XX or 20XX) 194 + let has_year = content_lower.chars().collect::<String>().contains("19") 195 + || content_lower.contains("20"); 196 + 197 + // Consider it guff if >50% are guff words, or if it contains years, or if it's short and common 198 + guff_word_count > words.len() / 2 199 + || has_year 200 + || (words.len() <= 2 201 + && Self::GUFF_WORDS 202 + .iter() 203 + .any(|&guff| content_lower.contains(guff))) 204 + } 205 + 206 + /// Normalize text for comparison (remove special chars, lowercase, etc.) 207 + fn normalize_for_comparison(text: &str) -> String { 208 + text.chars() 209 + .filter(|c| c.is_alphanumeric() || c.is_whitespace()) 210 + .collect::<String>() 211 + .to_lowercase() 212 + .split_whitespace() 213 + .collect::<Vec<&str>>() 214 + .join(" ") 215 + } 216 + } 217 + 218 pub struct PlayIngestor { 219 sql: PgPool, 220 } ··· 266 Self { sql } 267 } 268 269 + /// Batch consolidate synthetic artists that match existing MusicBrainz artists 270 + pub async fn consolidate_synthetic_artists( 271 + &self, 272 + min_confidence: f64, 273 + ) -> anyhow::Result<usize> { 274 + tracing::info!( 275 + "๐Ÿ”„ Starting batch consolidation of synthetic artists with confidence >= {:.2}", 276 + min_confidence 277 + ); 278 + 279 + let consolidation_candidates = sqlx::query!( 280 + r#" 281 + SELECT DISTINCT 282 + ae1.id as synthetic_id, 283 + ae1.name as synthetic_name, 284 + ae2.id as target_id, 285 + ae2.name as target_name, 286 + ae2.mbid as target_mbid, 287 + similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score 288 + FROM artists_extended ae1 289 + CROSS JOIN artists_extended ae2 290 + WHERE ae1.id != ae2.id 291 + AND ae1.mbid_type = 'synthetic' 292 + AND ae2.mbid_type = 'musicbrainz' 293 + AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1 294 + ORDER BY similarity_score DESC 295 + "#, 296 + min_confidence as f32 297 + ) 298 + .fetch_all(&self.sql) 299 + .await?; 300 + 301 + let mut consolidated_count = 0; 302 + 303 + for candidate in consolidation_candidates { 304 + let synthetic_id = candidate.synthetic_id; 305 + let target_id = candidate.target_id; 306 + let similarity = candidate.similarity_score.unwrap_or(0.0) as f64; 307 + 308 + // Double-check with our improved similarity calculation 309 + let calculated_similarity = 310 + Self::calculate_similarity(&candidate.synthetic_name, &candidate.target_name, true); 311 + 312 + let final_confidence = similarity.max(calculated_similarity); 313 + 314 + if final_confidence >= min_confidence { 315 + // Move all play relationships from synthetic artist to MusicBrainz artist 316 + let moved_plays = sqlx::query!( 317 + r#" 318 + UPDATE play_to_artists_extended 319 + SET artist_id = $1, artist_name = $2 320 + WHERE artist_id = $3 321 + AND NOT EXISTS ( 322 + SELECT 1 FROM play_to_artists_extended existing 323 + WHERE existing.play_uri = play_to_artists_extended.play_uri 324 + AND existing.artist_id = $1 325 + ) 326 + "#, 327 + target_id, 328 + candidate.target_name, 329 + synthetic_id 330 + ) 331 + .execute(&self.sql) 332 + .await?; 333 + 334 + // Remove duplicate relationships that couldn't be moved 335 + sqlx::query!( 336 + "DELETE FROM play_to_artists_extended WHERE artist_id = $1", 337 + synthetic_id 338 + ) 339 + .execute(&self.sql) 340 + .await?; 341 + 342 + // Remove the synthetic artist 343 + sqlx::query!("DELETE FROM artists_extended WHERE id = $1", synthetic_id) 344 + .execute(&self.sql) 345 + .await?; 346 + 347 + consolidated_count += 1; 348 + 349 + tracing::info!( 350 + "โœ… Consolidated '{}' โ†’ '{}' (confidence: {:.2}, moved {} plays)", 351 + candidate.synthetic_name, 352 + candidate.target_name, 353 + final_confidence, 354 + moved_plays.rows_affected() 355 + ); 356 + } 357 + } 358 + 359 + // Refresh materialized views after consolidation 360 + if consolidated_count > 0 { 361 + tracing::info!("๐Ÿ”„ Refreshing materialized views after consolidation"); 362 + sqlx::query!("REFRESH MATERIALIZED VIEW mv_artist_play_counts;") 363 + .execute(&self.sql) 364 + .await?; 365 + } 366 + 367 + tracing::info!( 368 + "๐ŸŽ‰ Batch consolidation complete: {} artists consolidated", 369 + consolidated_count 370 + ); 371 + Ok(consolidated_count) 372 + } 373 + 374 + /// Find and consolidate duplicate releases/albums (requires matching artist context) 375 + pub async fn consolidate_duplicate_releases( 376 + &self, 377 + min_confidence: f64, 378 + ) -> anyhow::Result<usize> { 379 + tracing::info!( 380 + "๐Ÿ”„ Starting release consolidation with confidence >= {:.2} (requires artist context)", 381 + min_confidence 382 + ); 383 + 384 + // Find releases that have similar names AND share at least one artist 385 + let release_candidates = sqlx::query!( 386 r#" 387 + SELECT DISTINCT 388 + r1.mbid as release1_mbid, 389 + r1.name as release1_name, 390 + r2.mbid as release2_mbid, 391 + r2.name as release2_name, 392 + similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score, 393 + COUNT(DISTINCT ptae1.artist_id) as shared_artists 394 + FROM releases r1 395 + CROSS JOIN releases r2 396 + INNER JOIN plays p1 ON p1.release_mbid = r1.mbid 397 + INNER JOIN plays p2 ON p2.release_mbid = r2.mbid 398 + INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri 399 + INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri 400 + WHERE r1.mbid != r2.mbid 401 + AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1 402 + AND ptae1.artist_id = ptae2.artist_id -- Same artist 403 + AND ( 404 + (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR 405 + (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, '')))) 406 + ) -- Same or no discriminants 407 + GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score 408 + HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist 409 + ORDER BY similarity_score DESC, shared_artists DESC 410 "#, 411 + min_confidence as f32 412 ) 413 .fetch_all(&self.sql) 414 .await?; 415 416 + let mut consolidated_count = 0; 417 + 418 + for candidate in release_candidates { 419 + let similarity = candidate.similarity_score.unwrap_or(0.0) as f64; 420 + let shared_artists = candidate.shared_artists.unwrap_or(0); 421 + 422 + // Use MusicBrainz-style cleaning for better matching 423 + let cleaned_similarity = Self::calculate_similarity( 424 + &candidate.release1_name, 425 + &candidate.release2_name, 426 + false, // is_artist = false for releases 427 + ); 428 + 429 + let final_confidence = similarity.max(cleaned_similarity); 430 + 431 + // Require high confidence AND shared artists for album consolidation 432 + if final_confidence >= min_confidence && shared_artists > 0 { 433 + // Choose the release with more plays as the canonical one 434 + let r1_plays: i64 = sqlx::query_scalar!( 435 + "SELECT COUNT(*) FROM plays WHERE release_mbid = $1", 436 + candidate.release1_mbid 437 + ) 438 + .fetch_one(&self.sql) 439 + .await? 440 + .unwrap_or(0); 441 + 442 + let r2_plays: i64 = sqlx::query_scalar!( 443 + "SELECT COUNT(*) FROM plays WHERE release_mbid = $1", 444 + candidate.release2_mbid 445 + ) 446 + .fetch_one(&self.sql) 447 + .await? 448 + .unwrap_or(0); 449 + 450 + let (keep_mbid, remove_mbid, keep_name) = if r1_plays >= r2_plays { 451 + ( 452 + candidate.release1_mbid, 453 + candidate.release2_mbid, 454 + candidate.release1_name.clone(), 455 + ) 456 + } else { 457 + ( 458 + candidate.release2_mbid, 459 + candidate.release1_mbid, 460 + candidate.release2_name.clone(), 461 + ) 462 + }; 463 + 464 + // Update plays to use the canonical release 465 + let updated_plays = sqlx::query!( 466 + "UPDATE plays SET release_mbid = $1, release_name = $2 WHERE release_mbid = $3", 467 + keep_mbid, 468 + keep_name, 469 + remove_mbid 470 + ) 471 + .execute(&self.sql) 472 + .await?; 473 + 474 + // Remove the duplicate release 475 + sqlx::query!("DELETE FROM releases WHERE mbid = $1", remove_mbid) 476 + .execute(&self.sql) 477 + .await?; 478 + 479 + consolidated_count += 1; 480 + 481 + tracing::info!( 482 + "โœ… Consolidated releases: '{}' โ†’ '{}' (confidence: {:.2}, {} shared artists, updated {} plays)", 483 + if r1_plays >= r2_plays { 484 + &candidate.release2_name 485 + } else { 486 + &candidate.release1_name 487 + }, 488 + keep_name, 489 + final_confidence, 490 + shared_artists, 491 + updated_plays.rows_affected() 492 + ); 493 + } 494 } 495 496 + tracing::info!( 497 + "๐ŸŽ‰ Release consolidation complete: {} releases consolidated", 498 + consolidated_count 499 + ); 500 + Ok(consolidated_count) 501 + } 502 + 503 + /// Find and consolidate duplicate recordings/tracks (requires matching artist context) 504 + pub async fn consolidate_duplicate_recordings( 505 + &self, 506 + min_confidence: f64, 507 + ) -> anyhow::Result<usize> { 508 + tracing::info!( 509 + "๐Ÿ”„ Starting recording consolidation with confidence >= {:.2} (requires artist context)", 510 + min_confidence 511 + ); 512 + 513 + // Find recordings that have similar names AND share at least one artist 514 + let recording_candidates = sqlx::query!( 515 + r#" 516 + SELECT DISTINCT 517 + r1.mbid as recording1_mbid, 518 + r1.name as recording1_name, 519 + r2.mbid as recording2_mbid, 520 + r2.name as recording2_name, 521 + similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score, 522 + COUNT(DISTINCT ptae1.artist_id) as shared_artists 523 + FROM recordings r1 524 + CROSS JOIN recordings r2 525 + INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid 526 + INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid 527 + INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri 528 + INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri 529 + WHERE r1.mbid != r2.mbid 530 + AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1 531 + AND ptae1.artist_id = ptae2.artist_id -- Same artist 532 + AND ( 533 + (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR 534 + (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, '')))) 535 + ) -- Same or no discriminants 536 + GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score 537 + HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist 538 + ORDER BY similarity_score DESC, shared_artists DESC 539 + "#, 540 + min_confidence as f32 541 + ) 542 + .fetch_all(&self.sql) 543 + .await?; 544 + 545 + let mut consolidated_count = 0; 546 + 547 + for candidate in recording_candidates { 548 + let similarity = candidate.similarity_score.unwrap_or(0.0) as f64; 549 + let shared_artists = candidate.shared_artists.unwrap_or(0); 550 + 551 + // Use MusicBrainz-style cleaning for track names 552 + let cleaned_similarity = Self::calculate_similarity( 553 + &candidate.recording1_name, 554 + &candidate.recording2_name, 555 + false, // is_artist = false for recordings 556 + ); 557 + 558 + let final_confidence = similarity.max(cleaned_similarity); 559 + 560 + // Require high confidence AND shared artists for track consolidation 561 + if final_confidence >= min_confidence && shared_artists > 0 { 562 + // Choose the recording with more plays as canonical 563 + let r1_plays: i64 = sqlx::query_scalar!( 564 + "SELECT COUNT(*) FROM plays WHERE recording_mbid = $1", 565 + candidate.recording1_mbid 566 + ) 567 + .fetch_one(&self.sql) 568 + .await? 569 + .unwrap_or(0); 570 + 571 + let r2_plays: i64 = sqlx::query_scalar!( 572 + "SELECT COUNT(*) FROM plays WHERE recording_mbid = $1", 573 + candidate.recording2_mbid 574 + ) 575 + .fetch_one(&self.sql) 576 + .await? 577 + .unwrap_or(0); 578 + 579 + let (keep_mbid, remove_mbid, keep_name) = if r1_plays >= r2_plays { 580 + ( 581 + candidate.recording1_mbid, 582 + candidate.recording2_mbid, 583 + candidate.recording1_name.clone(), 584 + ) 585 + } else { 586 + ( 587 + candidate.recording2_mbid, 588 + candidate.recording1_mbid, 589 + candidate.recording2_name.clone(), 590 + ) 591 + }; 592 + 593 + // Update plays to use the canonical recording 594 + let updated_plays = sqlx::query!( 595 + "UPDATE plays SET recording_mbid = $1 WHERE recording_mbid = $2", 596 + keep_mbid, 597 + remove_mbid 598 + ) 599 + .execute(&self.sql) 600 + .await?; 601 + 602 + // Remove the duplicate recording 603 + sqlx::query!("DELETE FROM recordings WHERE mbid = $1", remove_mbid) 604 + .execute(&self.sql) 605 + .await?; 606 + 607 + consolidated_count += 1; 608 + 609 + tracing::info!( 610 + "โœ… Consolidated recordings: '{}' โ†’ '{}' (confidence: {:.2}, {} shared artists, updated {} plays)", 611 + if r1_plays >= r2_plays { 612 + &candidate.recording2_name 613 + } else { 614 + &candidate.recording1_name 615 + }, 616 + keep_name, 617 + final_confidence, 618 + shared_artists, 619 + updated_plays.rows_affected() 620 + ); 621 + } 622 + } 623 + 624 + tracing::info!( 625 + "๐ŸŽ‰ Recording consolidation complete: {} recordings consolidated", 626 + consolidated_count 627 + ); 628 + Ok(consolidated_count) 629 + } 630 + 631 + /// Preview consolidation candidates to show what would be merged 632 + pub async fn preview_consolidation_candidates( 633 + &self, 634 + min_confidence: f64, 635 + ) -> anyhow::Result<()> { 636 + tracing::info!( 637 + "๐Ÿ” Previewing consolidation candidates (confidence >= {:.2})", 638 + min_confidence 639 + ); 640 + 641 + // Preview artist consolidations 642 + let artist_candidates = sqlx::query!( 643 + r#" 644 + SELECT DISTINCT 645 + ae1.name as synthetic_name, 646 + ae2.name as target_name, 647 + similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score, 648 + COUNT(ptae1.play_uri) as synthetic_plays, 649 + COUNT(ptae2.play_uri) as target_plays 650 + FROM artists_extended ae1 651 + CROSS JOIN artists_extended ae2 652 + LEFT JOIN play_to_artists_extended ptae1 ON ae1.id = ptae1.artist_id 653 + LEFT JOIN play_to_artists_extended ptae2 ON ae2.id = ptae2.artist_id 654 + WHERE ae1.id != ae2.id 655 + AND ae1.mbid_type = 'synthetic' 656 + AND ae2.mbid_type = 'musicbrainz' 657 + AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1 658 + GROUP BY ae1.id, ae1.name, ae2.id, ae2.name, similarity_score 659 + ORDER BY similarity_score DESC 660 + LIMIT 10 661 + "#, 662 + min_confidence as f32 663 + ) 664 + .fetch_all(&self.sql) 665 + .await?; 666 + 667 + if !artist_candidates.is_empty() { 668 + tracing::info!("๐ŸŽฏ Artist consolidation candidates:"); 669 + for candidate in artist_candidates { 670 + tracing::info!( 671 + " '{}' โ†’ '{}' (confidence: {:.2}, {} + {} plays)", 672 + candidate.synthetic_name, 673 + candidate.target_name, 674 + candidate.similarity_score.unwrap_or(0.0), 675 + candidate.synthetic_plays.unwrap_or(0), 676 + candidate.target_plays.unwrap_or(0) 677 + ); 678 + } 679 + } 680 + 681 + // Preview release consolidations (with artist context) 682 + let release_candidates = sqlx::query!( 683 + r#" 684 + SELECT DISTINCT 685 + r1.name as release1_name, 686 + r2.name as release2_name, 687 + similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score, 688 + COUNT(DISTINCT ptae1.artist_id) as shared_artists, 689 + STRING_AGG(DISTINCT ae.name, ', ') as artist_names 690 + FROM releases r1 691 + CROSS JOIN releases r2 692 + INNER JOIN plays p1 ON p1.release_mbid = r1.mbid 693 + INNER JOIN plays p2 ON p2.release_mbid = r2.mbid 694 + INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri 695 + INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri 696 + INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id 697 + WHERE r1.mbid != r2.mbid 698 + AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1 699 + AND ptae1.artist_id = ptae2.artist_id 700 + GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score 701 + HAVING COUNT(DISTINCT ptae1.artist_id) > 0 702 + ORDER BY similarity_score DESC 703 + LIMIT 5 704 + "#, 705 + min_confidence as f32 706 + ) 707 + .fetch_all(&self.sql) 708 + .await?; 709 + 710 + if !release_candidates.is_empty() { 711 + tracing::info!("๐Ÿ’ฟ Release consolidation candidates (with artist context):"); 712 + for candidate in release_candidates { 713 + tracing::info!( 714 + " '{}' โ†” '{}' (confidence: {:.2}, {} shared artists: {})", 715 + candidate.release1_name, 716 + candidate.release2_name, 717 + candidate.similarity_score.unwrap_or(0.0), 718 + candidate.shared_artists.unwrap_or(0), 719 + candidate.artist_names.unwrap_or_default() 720 + ); 721 + } 722 + } 723 + 724 + // Preview recording consolidations (with artist context) 725 + let recording_candidates = sqlx::query!( 726 + r#" 727 + SELECT DISTINCT 728 + r1.name as recording1_name, 729 + r2.name as recording2_name, 730 + similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score, 731 + COUNT(DISTINCT ptae1.artist_id) as shared_artists, 732 + STRING_AGG(DISTINCT ae.name, ', ') as artist_names 733 + FROM recordings r1 734 + CROSS JOIN recordings r2 735 + INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid 736 + INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid 737 + INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri 738 + INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri 739 + INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id 740 + WHERE r1.mbid != r2.mbid 741 + AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1 742 + AND ptae1.artist_id = ptae2.artist_id 743 + GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score 744 + HAVING COUNT(DISTINCT ptae1.artist_id) > 0 745 + ORDER BY similarity_score DESC 746 + LIMIT 5 747 + "#, 748 + min_confidence as f32 749 + ) 750 + .fetch_all(&self.sql) 751 + .await?; 752 + 753 + if !recording_candidates.is_empty() { 754 + tracing::info!("๐ŸŽต Recording consolidation candidates (with artist context):"); 755 + for candidate in recording_candidates { 756 + tracing::info!( 757 + " '{}' โ†” '{}' (confidence: {:.2}, {} shared artists: {})", 758 + candidate.recording1_name, 759 + candidate.recording2_name, 760 + candidate.similarity_score.unwrap_or(0.0), 761 + candidate.shared_artists.unwrap_or(0), 762 + candidate.artist_names.unwrap_or_default() 763 + ); 764 + } 765 + } 766 + 767 + Ok(()) 768 + } 769 + 770 + /// Run full batch consolidation for all entity types 771 + pub async fn run_full_consolidation(&self) -> anyhow::Result<()> { 772 + tracing::info!("๐Ÿš€ Starting full batch consolidation process"); 773 + 774 + // First, preview what we would consolidate 775 + self.preview_consolidation_candidates(0.92).await?; 776 + 777 + let artist_count = self.consolidate_synthetic_artists(0.92).await?; 778 + let release_count = self.consolidate_duplicate_releases(0.92).await?; 779 + let recording_count = self.consolidate_duplicate_recordings(0.92).await?; 780 + 781 + tracing::info!( 782 + "๐ŸŽ‰ Full consolidation complete! Artists: {}, Releases: {}, Recordings: {}", 783 + artist_count, 784 + release_count, 785 + recording_count 786 + ); 787 + 788 + Ok(()) 789 + } 790 + 791 + /// Generate a synthetic MBID for artists without MusicBrainz data using database function 792 + async fn generate_synthetic_mbid(&self, artist_name: &str) -> anyhow::Result<Uuid> { 793 + let result = sqlx::query_scalar!("SELECT generate_synthetic_mbid($1)", artist_name) 794 + .fetch_one(&self.sql) 795 + .await?; 796 + 797 + result.ok_or_else(|| anyhow!("Failed to generate synthetic MBID")) 798 + } 799 + 800 + /// Generate a fallback artist name for tracks without any artist information 801 + fn generate_fallback_artist(track_name: &str) -> String { 802 + format!( 803 + "Unknown Artist ({})", 804 + track_name.chars().take(20).collect::<String>() 805 + ) 806 + } 807 + 808 + /// Normalize text for fuzzy matching with MusicBrainz-style cleaning 809 + fn normalize_text(text: &str, is_artist: bool) -> String { 810 + let cleaned = if is_artist { 811 + MusicBrainzCleaner::clean_artist_name(text) 812 + } else { 813 + MusicBrainzCleaner::clean_track_name(text) 814 + }; 815 + 816 + MusicBrainzCleaner::normalize_for_comparison(&cleaned) 817 + } 818 + 819 + /// Calculate string similarity with MusicBrainz-style cleaning 820 + fn calculate_similarity(s1: &str, s2: &str, is_artist: bool) -> f64 { 821 + let s1_norm = Self::normalize_text(s1, is_artist); 822 + let s2_norm = Self::normalize_text(s2, is_artist); 823 + 824 + if s1_norm == s2_norm { 825 + return 1.0; 826 + } 827 + 828 + if s1_norm.is_empty() || s2_norm.is_empty() { 829 + return 0.0; 830 + } 831 + 832 + // Calculate basic similarity 833 + let max_len = s1_norm.len().max(s2_norm.len()) as f64; 834 + let min_len = s1_norm.len().min(s2_norm.len()) as f64; 835 + 836 + // Character-based similarity 837 + let common_chars = s1_norm 838 + .chars() 839 + .zip(s2_norm.chars()) 840 + .filter(|(a, b)| a == b) 841 + .count() as f64; 842 + 843 + // Word-based similarity boost 844 + let s1_words: std::collections::HashSet<&str> = s1_norm.split_whitespace().collect(); 845 + let s2_words: std::collections::HashSet<&str> = s2_norm.split_whitespace().collect(); 846 + let common_words = s1_words.intersection(&s2_words).count() as f64; 847 + let total_words = s1_words.union(&s2_words).count() as f64; 848 + 849 + let word_similarity = if total_words > 0.0 { 850 + common_words / total_words 851 + } else { 852 + 0.0 853 + }; 854 + let char_similarity = common_chars / max_len; 855 + 856 + // Boost for very similar lengths (helps with minor differences) 857 + let length_factor = if max_len > 0.0 { 858 + min_len / max_len 859 + } else { 860 + 0.0 861 + }; 862 + 863 + // Weighted combination: 50% word similarity, 30% char similarity, 20% length factor 864 + (word_similarity * 0.5) + (char_similarity * 0.3) + (length_factor * 0.2) 865 + } 866 + 867 + /// Find existing artists that fuzzy match the given name 868 + async fn find_fuzzy_artist_matches( 869 + &self, 870 + artist_name: &str, 871 + _track_name: &str, 872 + _album_name: Option<&str>, 873 + ) -> anyhow::Result<Vec<FuzzyMatchCandidate>> { 874 + let normalized_name = Self::normalize_text(artist_name, true); 875 + 876 + // Search for artists with similar names using trigram similarity 877 + let candidates = sqlx::query!( 878 + r#" 879 + SELECT 880 + ae.id, 881 + ae.name 882 + FROM artists_extended ae 883 + WHERE ae.mbid_type = 'musicbrainz' 884 + AND ( 885 + LOWER(TRIM(ae.name)) = $1 886 + OR LOWER(TRIM(ae.name)) LIKE '%' || $1 || '%' 887 + OR $1 LIKE '%' || LOWER(TRIM(ae.name)) || '%' 888 + OR similarity(LOWER(TRIM(ae.name)), $1) > 0.6 889 + ) 890 + ORDER BY similarity(LOWER(TRIM(ae.name)), $1) DESC 891 + LIMIT 10 892 + "#, 893 + normalized_name 894 + ) 895 + .fetch_all(&self.sql) 896 + .await 897 + .unwrap_or_default(); 898 + 899 + let mut matches = Vec::new(); 900 + 901 + for candidate in candidates { 902 + let name_similarity = Self::calculate_similarity(artist_name, &candidate.name, true); 903 + 904 + // Base confidence from name similarity 905 + let mut confidence = name_similarity; 906 + 907 + // Boost confidence for exact matches after normalization 908 + if Self::normalize_text(artist_name, true) 909 + == Self::normalize_text(&candidate.name, true) 910 + { 911 + confidence = confidence.max(0.95); 912 + } 913 + 914 + // Additional boost for cleaned matches 915 + let cleaned_input = MusicBrainzCleaner::clean_artist_name(artist_name); 916 + let cleaned_candidate = MusicBrainzCleaner::clean_artist_name(&candidate.name); 917 + if MusicBrainzCleaner::normalize_for_comparison(&cleaned_input) 918 + == MusicBrainzCleaner::normalize_for_comparison(&cleaned_candidate) 919 + { 920 + confidence = confidence.max(0.9); 921 + } 922 + 923 + // Lower threshold since we have better cleaning now 924 + if confidence >= 0.8 { 925 + matches.push(FuzzyMatchCandidate { 926 + artist_id: candidate.id, 927 + name: candidate.name, 928 + confidence, 929 + }); 930 + } 931 + } 932 + 933 + // Sort by confidence descending 934 + matches.sort_by(|a, b| { 935 + b.confidence 936 + .partial_cmp(&a.confidence) 937 + .unwrap_or(std::cmp::Ordering::Equal) 938 + }); 939 + 940 + Ok(matches) 941 + } 942 + 943 + /// Try to match an artist to existing MusicBrainz data using fuzzy matching 944 + async fn find_or_create_artist_with_fuzzy_matching( 945 + &self, 946 + artist_name: &str, 947 + mbid: Option<&str>, 948 + track_name: &str, 949 + album_name: Option<&str>, 950 + ) -> anyhow::Result<i32> { 951 + // If we already have an MBID, use it directly 952 + if let Some(mbid) = mbid { 953 + return self.insert_artist_extended(Some(mbid), artist_name).await; 954 + } 955 + 956 + // Try fuzzy matching against existing MusicBrainz artists 957 + let matches = self 958 + .find_fuzzy_artist_matches(artist_name, track_name, album_name) 959 + .await?; 960 + 961 + if let Some(best_match) = matches.first() { 962 + // Use high confidence threshold for automatic matching 963 + if best_match.confidence >= 0.92 { 964 + tracing::info!( 965 + "๐Ÿ”— Fuzzy matched '{}' to existing artist '{}' (confidence: {:.2})", 966 + artist_name, 967 + best_match.name, 968 + best_match.confidence 969 + ); 970 + 971 + // Update the existing artist name if the new one seems more complete 972 + if artist_name.len() > best_match.name.len() && best_match.confidence >= 0.95 { 973 + sqlx::query!( 974 + "UPDATE artists_extended SET name = $1, updated_at = NOW() WHERE id = $2", 975 + artist_name, 976 + best_match.artist_id 977 + ) 978 + .execute(&self.sql) 979 + .await?; 980 + } 981 + 982 + return Ok(best_match.artist_id); 983 + } else if best_match.confidence >= 0.85 { 984 + tracing::debug!( 985 + "๐Ÿค” Potential match for '{}' -> '{}' (confidence: {:.2}) but below auto-match threshold", 986 + artist_name, 987 + best_match.name, 988 + best_match.confidence 989 + ); 990 + } 991 + } 992 + 993 + // No good match found, create synthetic artist 994 + self.insert_artist_extended(None, artist_name).await 995 + } 996 + 997 + /// Inserts or updates an artist in the database using the extended table. 998 + /// Returns the internal ID of the artist. 999 + async fn insert_artist_extended(&self, mbid: Option<&str>, name: &str) -> anyhow::Result<i32> { 1000 + if let Some(mbid) = mbid { 1001 + let artist_uuid = Uuid::parse_str(mbid)?; 1002 + let res = sqlx::query!( 1003 + r#" 1004 + INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'musicbrainz') 1005 + ON CONFLICT (mbid) DO UPDATE SET 1006 + name = EXCLUDED.name, 1007 + updated_at = NOW() 1008 + RETURNING id; 1009 + "#, 1010 + artist_uuid, 1011 + name 1012 + ) 1013 + .fetch_one(&self.sql) 1014 + .await?; 1015 + Ok(res.id) 1016 + } else { 1017 + // Artist without MBID - generate synthetic MBID 1018 + let synthetic_uuid = self.generate_synthetic_mbid(name).await?; 1019 + 1020 + let res = sqlx::query!( 1021 + r#" 1022 + INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'synthetic') 1023 + ON CONFLICT (mbid) DO UPDATE SET 1024 + name = EXCLUDED.name, 1025 + updated_at = NOW() 1026 + RETURNING id; 1027 + "#, 1028 + synthetic_uuid, 1029 + name 1030 + ) 1031 + .fetch_one(&self.sql) 1032 + .await?; 1033 + Ok(res.id) 1034 + } 1035 } 1036 1037 /// Inserts or updates a release in the database. 1038 /// Returns the Uuid of the release. 1039 async fn insert_release(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> { 1040 let release_uuid = Uuid::parse_str(mbid)?; 1041 + 1042 + // Extract discriminant from release name for new releases 1043 + // Prioritize edition-specific patterns for better quality 1044 + let discriminant = self 1045 + .extract_edition_discriminant_from_db(name) 1046 + .await 1047 + .or_else(|| { 1048 + futures::executor::block_on(async { self.extract_discriminant_from_db(name).await }) 1049 + }); 1050 + 1051 let res = sqlx::query!( 1052 r#" 1053 + INSERT INTO releases (mbid, name, discriminant) VALUES ($1, $2, $3) 1054 + ON CONFLICT (mbid) DO UPDATE SET 1055 + name = EXCLUDED.name, 1056 + discriminant = COALESCE(EXCLUDED.discriminant, releases.discriminant) 1057 RETURNING mbid; 1058 "#, 1059 release_uuid, 1060 + name, 1061 + discriminant 1062 ) 1063 .fetch_all(&self.sql) 1064 .await?; ··· 1074 /// Returns the Uuid of the recording. 1075 async fn insert_recording(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> { 1076 let recording_uuid = Uuid::parse_str(mbid)?; 1077 + 1078 + // Extract discriminant from recording name for new recordings 1079 + // Prioritize edition-specific patterns for better quality 1080 + let discriminant = self 1081 + .extract_edition_discriminant_from_db(name) 1082 + .await 1083 + .or_else(|| { 1084 + futures::executor::block_on(async { self.extract_discriminant_from_db(name).await }) 1085 + }); 1086 + 1087 let res = sqlx::query!( 1088 r#" 1089 + INSERT INTO recordings (mbid, name, discriminant) VALUES ($1, $2, $3) 1090 + ON CONFLICT (mbid) DO UPDATE SET 1091 + name = EXCLUDED.name, 1092 + discriminant = COALESCE(EXCLUDED.discriminant, recordings.discriminant) 1093 RETURNING mbid; 1094 "#, 1095 recording_uuid, 1096 + name, 1097 + discriminant 1098 ) 1099 .fetch_all(&self.sql) 1100 .await?; ··· 1105 1106 Ok(recording_uuid) 1107 } 1108 + 1109 + /// Extract discriminant from name using database function 1110 + async fn extract_discriminant_from_db(&self, name: &str) -> Option<String> { 1111 + sqlx::query_scalar!("SELECT extract_discriminant($1)", name) 1112 + .fetch_one(&self.sql) 1113 + .await 1114 + .ok() 1115 + .flatten() 1116 + } 1117 + 1118 + /// Extract edition-specific discriminant from name using database function 1119 + async fn extract_edition_discriminant_from_db(&self, name: &str) -> Option<String> { 1120 + sqlx::query_scalar!("SELECT extract_edition_discriminant($1)", name) 1121 + .fetch_one(&self.sql) 1122 + .await 1123 + .ok() 1124 + .flatten() 1125 + } 1126 + 1127 + // /// Get base name without discriminant using database function 1128 + // async fn get_base_name_from_db(&self, name: &str) -> String { 1129 + // sqlx::query_scalar!("SELECT get_base_name($1)", name) 1130 + // .fetch_one(&self.sql) 1131 + // .await 1132 + // .ok() 1133 + // .flatten() 1134 + // .unwrap_or_else(|| name.to_string()) 1135 + // } 1136 1137 pub async fn insert_play( 1138 &self, ··· 1144 ) -> anyhow::Result<()> { 1145 dbg!("ingesting", play_record); 1146 let play_record = clean(play_record); 1147 + let mut parsed_artists: Vec<(i32, String)> = vec![]; 1148 + let mut artist_names_raw: Vec<String> = vec![]; 1149 + 1150 if let Some(ref artists) = &play_record.artists { 1151 for artist in artists { 1152 let artist_name = artist.artist_name.clone(); 1153 + artist_names_raw.push(artist_name.clone()); 1154 + let artist_mbid = artist.artist_mb_id.as_deref(); 1155 + 1156 + let artist_id = self 1157 + .find_or_create_artist_with_fuzzy_matching( 1158 + &artist_name, 1159 + artist_mbid, 1160 + &play_record.track_name, 1161 + play_record.release_name.as_deref(), 1162 + ) 1163 + .await?; 1164 + parsed_artists.push((artist_id, artist_name.clone())); 1165 + } 1166 + } else if let Some(artist_names) = &play_record.artist_names { 1167 + for (index, artist_name) in artist_names.iter().enumerate() { 1168 + artist_names_raw.push(artist_name.clone()); 1169 + 1170 + let artist_mbid_opt = if let Some(ref mbid_list) = play_record.artist_mb_ids { 1171 + mbid_list.get(index) 1172 } else { 1173 + None 1174 + }; 1175 + 1176 + let artist_id = self 1177 + .find_or_create_artist_with_fuzzy_matching( 1178 + artist_name, 1179 + artist_mbid_opt.map(|s| s.as_str()), 1180 + &play_record.track_name, 1181 + play_record.release_name.as_deref(), 1182 + ) 1183 + .await?; 1184 + parsed_artists.push((artist_id, artist_name.clone())); 1185 } 1186 } else { 1187 + // No artist information provided - create a fallback artist 1188 + let fallback_artist_name = Self::generate_fallback_artist(&play_record.track_name); 1189 + artist_names_raw.push(fallback_artist_name.clone()); 1190 1191 + let artist_id = self 1192 + .find_or_create_artist_with_fuzzy_matching( 1193 + &fallback_artist_name, 1194 + None, 1195 + &play_record.track_name, 1196 + play_record.release_name.as_deref(), 1197 + ) 1198 + .await?; 1199 + parsed_artists.push((artist_id, fallback_artist_name)); 1200 } 1201 1202 // Insert release if missing ··· 1226 time::OffsetDateTime::from_unix_timestamp(played_time.as_ref().timestamp()) 1227 .unwrap_or_else(|_| time::OffsetDateTime::now_utc()); 1228 1229 + // Extract discriminants from lexicon fields or infer from names 1230 + // First try lexicon fields, then extract from names with preference for edition-specific patterns 1231 + // TODO: Enable when types are updated with discriminant fields 1232 + // let track_discriminant = play_record.track_discriminant.clone().or_else(|| { 1233 + let track_discriminant = { 1234 + // Try edition-specific patterns first, then general patterns 1235 + futures::executor::block_on(async { 1236 + self.extract_edition_discriminant_from_db(&play_record.track_name) 1237 + .await 1238 + .or_else(|| { 1239 + futures::executor::block_on(async { 1240 + self.extract_discriminant_from_db(&play_record.track_name) 1241 + .await 1242 + }) 1243 + }) 1244 + }) 1245 + }; 1246 + 1247 + // let release_discriminant = play_record.release_discriminant.clone().or_else(|| { 1248 + let release_discriminant = { 1249 + if let Some(ref release_name) = play_record.release_name { 1250 + futures::executor::block_on(async { 1251 + // Try edition-specific patterns first, then general patterns 1252 + self.extract_edition_discriminant_from_db(release_name) 1253 + .await 1254 + .or_else(|| { 1255 + futures::executor::block_on(async { 1256 + self.extract_discriminant_from_db(release_name).await 1257 + }) 1258 + }) 1259 + }) 1260 + } else { 1261 + None 1262 + } 1263 + }; 1264 + 1265 + // Our main insert into plays with raw artist names and discriminants 1266 + let artist_names_json = if !artist_names_raw.is_empty() { 1267 + Some(serde_json::to_value(&artist_names_raw)?) 1268 + } else { 1269 + None 1270 + }; 1271 + 1272 sqlx::query!( 1273 r#" 1274 INSERT INTO plays ( 1275 uri, cid, did, rkey, isrc, duration, track_name, played_time, 1276 processed_time, release_mbid, release_name, recording_mbid, 1277 + submission_client_agent, music_service_base_domain, artist_names_raw, 1278 + track_discriminant, release_discriminant 1279 ) VALUES ( 1280 $1, $2, $3, $4, $5, $6, $7, $8, 1281 + NOW(), $9, $10, $11, $12, $13, $14, $15, $16 1282 ) ON CONFLICT(uri) DO UPDATE SET 1283 isrc = EXCLUDED.isrc, 1284 duration = EXCLUDED.duration, ··· 1289 release_name = EXCLUDED.release_name, 1290 recording_mbid = EXCLUDED.recording_mbid, 1291 submission_client_agent = EXCLUDED.submission_client_agent, 1292 + music_service_base_domain = EXCLUDED.music_service_base_domain, 1293 + artist_names_raw = EXCLUDED.artist_names_raw, 1294 + track_discriminant = EXCLUDED.track_discriminant, 1295 + release_discriminant = EXCLUDED.release_discriminant; 1296 "#, 1297 uri, 1298 cid, ··· 1307 recording_mbid_opt, 1308 play_record.submission_client_agent, 1309 play_record.music_service_base_domain, 1310 + artist_names_json, 1311 + track_discriminant, 1312 + release_discriminant 1313 ) 1314 .execute(&self.sql) 1315 .await?; 1316 1317 + // Insert plays into the extended join table (supports all artists) 1318 + for (artist_id, artist_name) in &parsed_artists { 1319 sqlx::query!( 1320 r#" 1321 + INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) VALUES 1322 + ($1, $2, $3) 1323 + ON CONFLICT (play_uri, artist_id) DO NOTHING; 1324 + "#, 1325 uri, 1326 + artist_id, 1327 artist_name 1328 ) 1329 .execute(&self.sql)
+51 -24
services/cadet/src/main.rs
··· 17 mod cursor; 18 mod db; 19 mod ingestors; 20 - mod resolve; 21 mod redis_client; 22 23 fn setup_tracing() { 24 tracing_subscriber::fmt() ··· 96 97 // CAR import job worker 98 let car_ingestor = ingestors::car::CarImportIngestor::new(pool.clone()); 99 - let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 100 - 101 match redis_client::RedisClient::new(&redis_url) { 102 Ok(redis_client) => { 103 // Spawn CAR import job processing task 104 tokio::spawn(async move { 105 - use types::jobs::{CarImportJob, CarImportJobStatus, JobStatus, JobProgress, queue_keys}; 106 - use tracing::{info, error}; 107 use chrono::Utc; 108 - 109 info!("Starting CAR import job worker, polling Redis queue..."); 110 - 111 loop { 112 // Block for up to 10 seconds waiting for jobs 113 match redis_client.pop_job(queue_keys::CAR_IMPORT_JOBS, 10).await { 114 Ok(Some(job_data)) => { 115 info!("Received CAR import job: {}", job_data); 116 - 117 // Parse job 118 match serde_json::from_str::<CarImportJob>(&job_data) { 119 Ok(job) => { ··· 132 blocks_processed: None, 133 }), 134 }; 135 - 136 let status_key = queue_keys::job_status_key(&job.request_id); 137 - if let Ok(status_data) = serde_json::to_string(&processing_status) { 138 - let _ = redis_client.update_job_status(&status_key, &status_data).await; 139 } 140 - 141 // Process the job 142 - match car_ingestor.fetch_and_process_identity_car(&job.identity).await { 143 Ok(import_id) => { 144 - info!("โœ… CAR import job completed successfully: {}", job.request_id); 145 - 146 let completed_status = CarImportJobStatus { 147 status: JobStatus::Completed, 148 created_at: job.created_at, ··· 150 completed_at: Some(Utc::now()), 151 error_message: None, 152 progress: Some(JobProgress { 153 - step: format!("CAR import completed: {}", import_id), 154 user_did: None, 155 pds_host: None, 156 car_size_bytes: None, 157 blocks_processed: None, 158 }), 159 }; 160 - 161 - if let Ok(status_data) = serde_json::to_string(&completed_status) { 162 - let _ = redis_client.update_job_status(&status_key, &status_data).await; 163 } 164 } 165 Err(e) => { 166 - error!("โŒ CAR import job failed: {}: {}", job.request_id, e); 167 - 168 let failed_status = CarImportJobStatus { 169 status: JobStatus::Failed, 170 created_at: job.created_at, ··· 173 error_message: Some(e.to_string()), 174 progress: None, 175 }; 176 - 177 - if let Ok(status_data) = serde_json::to_string(&failed_status) { 178 - let _ = redis_client.update_job_status(&status_key, &status_data).await; 179 } 180 } 181 }
··· 17 mod cursor; 18 mod db; 19 mod ingestors; 20 mod redis_client; 21 + mod resolve; 22 23 fn setup_tracing() { 24 tracing_subscriber::fmt() ··· 96 97 // CAR import job worker 98 let car_ingestor = ingestors::car::CarImportIngestor::new(pool.clone()); 99 + let redis_url = 100 + std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string()); 101 + 102 match redis_client::RedisClient::new(&redis_url) { 103 Ok(redis_client) => { 104 // Spawn CAR import job processing task 105 tokio::spawn(async move { 106 use chrono::Utc; 107 + use ingestors::car::jobs::{ 108 + queue_keys, CarImportJob, CarImportJobStatus, JobProgress, JobStatus, 109 + }; 110 + use tracing::{error, info}; 111 + 112 info!("Starting CAR import job worker, polling Redis queue..."); 113 + 114 loop { 115 // Block for up to 10 seconds waiting for jobs 116 match redis_client.pop_job(queue_keys::CAR_IMPORT_JOBS, 10).await { 117 Ok(Some(job_data)) => { 118 info!("Received CAR import job: {}", job_data); 119 + 120 // Parse job 121 match serde_json::from_str::<CarImportJob>(&job_data) { 122 Ok(job) => { ··· 135 blocks_processed: None, 136 }), 137 }; 138 + 139 let status_key = queue_keys::job_status_key(&job.request_id); 140 + if let Ok(status_data) = 141 + serde_json::to_string(&processing_status) 142 + { 143 + let _ = redis_client 144 + .update_job_status(&status_key, &status_data) 145 + .await; 146 } 147 + 148 // Process the job 149 + match car_ingestor 150 + .fetch_and_process_identity_car(&job.identity) 151 + .await 152 + { 153 Ok(import_id) => { 154 + info!( 155 + "โœ… CAR import job completed successfully: {}", 156 + job.request_id 157 + ); 158 + 159 let completed_status = CarImportJobStatus { 160 status: JobStatus::Completed, 161 created_at: job.created_at, ··· 163 completed_at: Some(Utc::now()), 164 error_message: None, 165 progress: Some(JobProgress { 166 + step: format!( 167 + "CAR import completed: {}", 168 + import_id 169 + ), 170 user_did: None, 171 pds_host: None, 172 car_size_bytes: None, 173 blocks_processed: None, 174 }), 175 }; 176 + 177 + if let Ok(status_data) = 178 + serde_json::to_string(&completed_status) 179 + { 180 + let _ = redis_client 181 + .update_job_status(&status_key, &status_data) 182 + .await; 183 } 184 } 185 Err(e) => { 186 + error!( 187 + "โŒ CAR import job failed: {}: {}", 188 + job.request_id, e 189 + ); 190 + 191 let failed_status = CarImportJobStatus { 192 status: JobStatus::Failed, 193 created_at: job.created_at, ··· 196 error_message: Some(e.to_string()), 197 progress: None, 198 }; 199 + 200 + if let Ok(status_data) = 201 + serde_json::to_string(&failed_status) 202 + { 203 + let _ = redis_client 204 + .update_job_status(&status_key, &status_data) 205 + .await; 206 } 207 } 208 }
+3 -3
services/cadet/src/redis_client.rs
··· 20 pub async fn pop_job(&self, queue_key: &str, timeout_seconds: u64) -> Result<Option<String>> { 21 let mut conn = self.get_connection().await?; 22 let result: Option<Vec<String>> = conn.brpop(queue_key, timeout_seconds as f64).await?; 23 - 24 match result { 25 Some(mut items) if items.len() >= 2 => { 26 // brpop returns [queue_name, item], we want the item 27 Ok(Some(items.remove(1))) 28 } 29 - _ => Ok(None) 30 } 31 } 32 ··· 36 let _: () = conn.set(status_key, status_data).await?; 37 Ok(()) 38 } 39 - }
··· 20 pub async fn pop_job(&self, queue_key: &str, timeout_seconds: u64) -> Result<Option<String>> { 21 let mut conn = self.get_connection().await?; 22 let result: Option<Vec<String>> = conn.brpop(queue_key, timeout_seconds as f64).await?; 23 + 24 match result { 25 Some(mut items) if items.len() >= 2 => { 26 // brpop returns [queue_name, item], we want the item 27 Ok(Some(items.remove(1))) 28 } 29 + _ => Ok(None), 30 } 31 } 32 ··· 36 let _: () = conn.set(status_key, status_data).await?; 37 Ok(()) 38 } 39 + }
+55
services/cadet/target.sh
···
··· 1 + #!/bin/bash 2 + set -e 3 + 4 + # Debug: Print all available build variables 5 + echo "DEBUG: TARGETPLATFORM=$TARGETPLATFORM" 6 + echo "DEBUG: BUILDPLATFORM=$BUILDPLATFORM" 7 + echo "DEBUG: TARGETARCH=$TARGETARCH" 8 + echo "DEBUG: TARGETOS=$TARGETOS" 9 + 10 + # Use TARGETARCH directly (more reliable than TARGETPLATFORM) 11 + TARGET_ARCH_VAR="${TARGETARCH:-}" 12 + 13 + # If TARGETARCH is not set, try to extract from TARGETPLATFORM 14 + if [ -z "$TARGET_ARCH_VAR" ] && [ -n "$TARGETPLATFORM" ]; then 15 + TARGET_ARCH_VAR=$(echo "$TARGETPLATFORM" | cut -d'/' -f2) 16 + echo "DEBUG: Extracted TARGET_ARCH_VAR=$TARGET_ARCH_VAR from TARGETPLATFORM" 17 + fi 18 + 19 + # Final fallback: detect from uname 20 + if [ -z "$TARGET_ARCH_VAR" ]; then 21 + ARCH=$(uname -m) 22 + case "$ARCH" in 23 + "x86_64") 24 + TARGET_ARCH_VAR="amd64" 25 + ;; 26 + "aarch64") 27 + TARGET_ARCH_VAR="arm64" 28 + ;; 29 + *) 30 + echo "ERROR: Could not detect target architecture. uname -m returned: $ARCH" 31 + echo "Available variables: TARGETARCH=$TARGETARCH, TARGETPLATFORM=$TARGETPLATFORM" 32 + exit 1 33 + ;; 34 + esac 35 + echo "DEBUG: Detected TARGET_ARCH_VAR=$TARGET_ARCH_VAR from uname" 36 + fi 37 + 38 + # Map architecture to Rust target 39 + case "$TARGET_ARCH_VAR" in 40 + "amd64") 41 + export RUST_TARGET="x86_64-unknown-linux-gnu" 42 + export TARGET_ARCH="amd64" 43 + ;; 44 + "arm64") 45 + export RUST_TARGET="aarch64-unknown-linux-gnu" 46 + export TARGET_ARCH="arm64" 47 + ;; 48 + *) 49 + echo "ERROR: Unsupported target architecture: $TARGET_ARCH_VAR" 50 + echo "Supported architectures: amd64, arm64" 51 + exit 1 52 + ;; 53 + esac 54 + 55 + echo "SUCCESS: Using RUST_TARGET=$RUST_TARGET, TARGET_ARCH=$TARGET_ARCH"
-226
services/migrations/20241220000001_initial_schema.sql
··· 1 - -- Initial comprehensive schema for Teal music platform 2 - -- Based on services/cadet/sql/base.sql 3 - 4 - CREATE TABLE artists ( 5 - mbid UUID PRIMARY KEY, 6 - name TEXT NOT NULL, 7 - play_count INTEGER DEFAULT 0 8 - ); 9 - 10 - -- releases are synologous to 'albums' 11 - CREATE TABLE releases ( 12 - mbid UUID PRIMARY KEY, 13 - name TEXT NOT NULL, 14 - play_count INTEGER DEFAULT 0 15 - ); 16 - 17 - -- recordings are synologous to 'tracks' BUT tracks can be in multiple releases! 18 - CREATE TABLE recordings ( 19 - mbid UUID PRIMARY KEY, 20 - name TEXT NOT NULL, 21 - play_count INTEGER DEFAULT 0 22 - ); 23 - 24 - CREATE TABLE plays ( 25 - uri TEXT PRIMARY KEY, 26 - did TEXT NOT NULL, 27 - rkey TEXT NOT NULL, 28 - cid TEXT NOT NULL, 29 - isrc TEXT, 30 - duration INTEGER, 31 - track_name TEXT NOT NULL, 32 - played_time TIMESTAMP WITH TIME ZONE, 33 - processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 34 - release_mbid UUID, 35 - release_name TEXT, 36 - recording_mbid UUID, 37 - submission_client_agent TEXT, 38 - music_service_base_domain TEXT, 39 - origin_url TEXT, 40 - FOREIGN KEY (release_mbid) REFERENCES releases (mbid), 41 - FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid) 42 - ); 43 - 44 - CREATE INDEX idx_plays_release_mbid ON plays (release_mbid); 45 - CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid); 46 - CREATE INDEX idx_plays_played_time ON plays (played_time); 47 - CREATE INDEX idx_plays_did ON plays (did); 48 - 49 - CREATE TABLE play_to_artists ( 50 - play_uri TEXT, -- references plays(uri) 51 - artist_mbid UUID REFERENCES artists (mbid), 52 - artist_name TEXT, -- storing here for ease of use when joining 53 - PRIMARY KEY (play_uri, artist_mbid), 54 - FOREIGN KEY (play_uri) REFERENCES plays (uri) 55 - ); 56 - 57 - CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid); 58 - 59 - -- Profiles table 60 - CREATE TABLE profiles ( 61 - did TEXT PRIMARY KEY, 62 - handle TEXT, 63 - display_name TEXT, 64 - description TEXT, 65 - description_facets JSONB, 66 - avatar TEXT, -- IPLD of the image, bafy... 67 - banner TEXT, 68 - created_at TIMESTAMP WITH TIME ZONE 69 - ); 70 - 71 - -- User featured items table 72 - CREATE TABLE featured_items ( 73 - did TEXT PRIMARY KEY, 74 - mbid TEXT NOT NULL, 75 - type TEXT NOT NULL 76 - ); 77 - 78 - -- Statii table (status records) 79 - CREATE TABLE statii ( 80 - uri TEXT PRIMARY KEY, 81 - did TEXT NOT NULL, 82 - rkey TEXT NOT NULL, 83 - cid TEXT NOT NULL, 84 - record JSONB NOT NULL, 85 - indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 86 - ); 87 - 88 - CREATE INDEX idx_statii_did_rkey ON statii (did, rkey); 89 - 90 - -- Materialized view for artists' play counts 91 - CREATE MATERIALIZED VIEW mv_artist_play_counts AS 92 - SELECT 93 - a.mbid AS artist_mbid, 94 - a.name AS artist_name, 95 - COUNT(p.uri) AS play_count 96 - FROM 97 - artists a 98 - LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 99 - LEFT JOIN plays p ON p.uri = pta.play_uri 100 - GROUP BY 101 - a.mbid, 102 - a.name; 103 - 104 - CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid); 105 - 106 - -- Materialized view for releases' play counts 107 - CREATE MATERIALIZED VIEW mv_release_play_counts AS 108 - SELECT 109 - r.mbid AS release_mbid, 110 - r.name AS release_name, 111 - COUNT(p.uri) AS play_count 112 - FROM 113 - releases r 114 - LEFT JOIN plays p ON p.release_mbid = r.mbid 115 - GROUP BY 116 - r.mbid, 117 - r.name; 118 - 119 - CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid); 120 - 121 - -- Materialized view for recordings' play counts 122 - CREATE MATERIALIZED VIEW mv_recording_play_counts AS 123 - SELECT 124 - rec.mbid AS recording_mbid, 125 - rec.name AS recording_name, 126 - COUNT(p.uri) AS play_count 127 - FROM 128 - recordings rec 129 - LEFT JOIN plays p ON p.recording_mbid = rec.mbid 130 - GROUP BY 131 - rec.mbid, 132 - rec.name; 133 - 134 - CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid); 135 - 136 - -- Global play count materialized view 137 - CREATE MATERIALIZED VIEW mv_global_play_count AS 138 - SELECT 139 - COUNT(uri) AS total_plays, 140 - COUNT(DISTINCT did) AS unique_listeners 141 - FROM plays; 142 - 143 - CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays); 144 - 145 - -- Top artists in the last 30 days 146 - CREATE MATERIALIZED VIEW mv_top_artists_30days AS 147 - SELECT 148 - a.mbid AS artist_mbid, 149 - a.name AS artist_name, 150 - COUNT(p.uri) AS play_count 151 - FROM artists a 152 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 153 - INNER JOIN plays p ON p.uri = pta.play_uri 154 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 155 - GROUP BY a.mbid, a.name 156 - ORDER BY COUNT(p.uri) DESC; 157 - 158 - -- Top releases in the last 30 days 159 - CREATE MATERIALIZED VIEW mv_top_releases_30days AS 160 - SELECT 161 - r.mbid AS release_mbid, 162 - r.name AS release_name, 163 - COUNT(p.uri) AS play_count 164 - FROM releases r 165 - INNER JOIN plays p ON p.release_mbid = r.mbid 166 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 167 - GROUP BY r.mbid, r.name 168 - ORDER BY COUNT(p.uri) DESC; 169 - 170 - -- Top artists for user in the last 30 days 171 - CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS 172 - SELECT 173 - prof.did, 174 - a.mbid AS artist_mbid, 175 - a.name AS artist_name, 176 - COUNT(p.uri) AS play_count 177 - FROM artists a 178 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 179 - INNER JOIN plays p ON p.uri = pta.play_uri 180 - INNER JOIN profiles prof ON prof.did = p.did 181 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 182 - GROUP BY prof.did, a.mbid, a.name 183 - ORDER BY COUNT(p.uri) DESC; 184 - 185 - -- Top artists for user in the last 7 days 186 - CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS 187 - SELECT 188 - prof.did, 189 - a.mbid AS artist_mbid, 190 - a.name AS artist_name, 191 - COUNT(p.uri) AS play_count 192 - FROM artists a 193 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 194 - INNER JOIN plays p ON p.uri = pta.play_uri 195 - INNER JOIN profiles prof ON prof.did = p.did 196 - WHERE p.played_time >= NOW() - INTERVAL '7 days' 197 - GROUP BY prof.did, a.mbid, a.name 198 - ORDER BY COUNT(p.uri) DESC; 199 - 200 - -- Top releases for user in the last 30 days 201 - CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS 202 - SELECT 203 - prof.did, 204 - r.mbid AS release_mbid, 205 - r.name AS release_name, 206 - COUNT(p.uri) AS play_count 207 - FROM releases r 208 - INNER JOIN plays p ON p.release_mbid = r.mbid 209 - INNER JOIN profiles prof ON prof.did = p.did 210 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 211 - GROUP BY prof.did, r.mbid, r.name 212 - ORDER BY COUNT(p.uri) DESC; 213 - 214 - -- Top releases for user in the last 7 days 215 - CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS 216 - SELECT 217 - prof.did, 218 - r.mbid AS release_mbid, 219 - r.name AS release_name, 220 - COUNT(p.uri) AS play_count 221 - FROM releases r 222 - INNER JOIN plays p ON p.release_mbid = r.mbid 223 - INNER JOIN profiles prof ON prof.did = p.did 224 - WHERE p.played_time >= NOW() - INTERVAL '7 days' 225 - GROUP BY prof.did, r.mbid, r.name 226 - ORDER BY COUNT(p.uri) DESC;
···
-59
services/migrations/20241220000002_car_import_tables.sql
··· 1 - -- CAR import functionality tables 2 - -- For handling AT Protocol CAR file imports and processing 3 - 4 - -- Tracks uploaded CAR files that are queued for processing 5 - CREATE TABLE IF NOT EXISTS car_import_requests ( 6 - import_id TEXT PRIMARY KEY, 7 - car_data_base64 TEXT NOT NULL, 8 - status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed 9 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 10 - processed_at TIMESTAMP WITH TIME ZONE, 11 - error_message TEXT, 12 - file_size_bytes INTEGER, 13 - block_count INTEGER, 14 - extracted_records_count INTEGER DEFAULT 0 15 - ); 16 - 17 - CREATE INDEX idx_car_import_requests_status ON car_import_requests (status); 18 - CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at); 19 - 20 - -- Tracks raw IPLD blocks extracted from CAR files 21 - CREATE TABLE IF NOT EXISTS car_blocks ( 22 - cid TEXT PRIMARY KEY, 23 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 24 - block_data BYTEA NOT NULL, 25 - decoded_successfully BOOLEAN DEFAULT FALSE, 26 - collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc. 27 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 28 - ); 29 - 30 - CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id); 31 - CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type); 32 - 33 - -- Tracks records extracted from CAR imports that were successfully processed 34 - CREATE TABLE IF NOT EXISTS car_extracted_records ( 35 - id SERIAL PRIMARY KEY, 36 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 37 - cid TEXT NOT NULL REFERENCES car_blocks(cid), 38 - collection_type TEXT NOT NULL, 39 - record_uri TEXT, -- AT URI if applicable (e.g., for play records) 40 - synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123') 41 - rkey TEXT, 42 - extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 43 - processing_notes TEXT 44 - ); 45 - 46 - CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id); 47 - CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type); 48 - CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri); 49 - 50 - -- Tracks import metadata and commit information 51 - CREATE TABLE IF NOT EXISTS car_import_metadata ( 52 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 53 - metadata_key TEXT NOT NULL, 54 - metadata_value JSONB NOT NULL, 55 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 56 - PRIMARY KEY (import_id, metadata_key) 57 - ); 58 - 59 - CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);
···
-34
services/rocketman/Cargo.toml
··· 1 - [package] 2 - name = "rocketman" 3 - version = "0.2.3" 4 - edition = "2021" 5 - 6 - license = "MIT" 7 - authors = ["Natalie B. <nat@natalie.sh>"] 8 - repository = "https://github.com/espeon/cadet" 9 - 10 - readme = "readme.md" 11 - 12 - description = "A modular(ish) jetstream consumer." 13 - 14 - [dependencies] 15 - tokio.workspace = true 16 - tokio-tungstenite.workspace = true 17 - futures-util = "0.3" 18 - url.workspace = true 19 - rand.workspace = true 20 - tracing.workspace = true 21 - tracing-subscriber.workspace = true 22 - metrics.workspace = true 23 - derive_builder = "0.20.2" 24 - bon = "3.3.2" 25 - serde = { workspace = true, features = ["derive"] } 26 - serde_json.workspace = true 27 - flume.workspace = true 28 - anyhow.workspace = true 29 - async-trait.workspace = true 30 - zstd = { version = "0.13.3", optional = true } 31 - 32 - [features] 33 - default = ["zstd"] 34 - zstd = ["dep:zstd"]
···
-77
services/rocketman/examples/spew-bsky-posts.rs
··· 1 - use rocketman::{ 2 - connection::JetstreamConnection, 3 - handler, 4 - ingestion::LexiconIngestor, 5 - options::JetstreamOptions, 6 - types::event::{ Event, Commit }, 7 - }; 8 - use serde_json::Value; 9 - use std::{ 10 - collections::HashMap, 11 - sync::Arc, 12 - sync::Mutex, 13 - }; 14 - use async_trait::async_trait; 15 - 16 - #[tokio::main] 17 - async fn main() { 18 - // init the builder 19 - let opts = JetstreamOptions::builder() 20 - // your EXACT nsids 21 - .wanted_collections(vec!["app.bsky.feed.post".to_string()]) 22 - .build(); 23 - // create the jetstream connector 24 - let jetstream = JetstreamConnection::new(opts); 25 - 26 - // create your ingestors 27 - let mut ingestors: HashMap<String, Box<dyn LexiconIngestor + Send + Sync>> = HashMap::new(); 28 - ingestors.insert( 29 - // your EXACT nsid 30 - "app.bsky.feed.post".to_string(), 31 - Box::new(MyCoolIngestor), 32 - ); 33 - 34 - 35 - // tracks the last message we've processed 36 - let cursor: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None)); 37 - 38 - // get channels 39 - let msg_rx = jetstream.get_msg_rx(); 40 - let reconnect_tx = jetstream.get_reconnect_tx(); 41 - 42 - // spawn a task to process messages from the queue. 43 - // this is a simple implementation, you can use a more complex one based on needs. 44 - let c_cursor = cursor.clone(); 45 - tokio::spawn(async move { 46 - while let Ok(message) = msg_rx.recv_async().await { 47 - if let Err(e) = 48 - handler::handle_message(message, &ingestors, reconnect_tx.clone(), c_cursor.clone()) 49 - .await 50 - { 51 - eprintln!("Error processing message: {}", e); 52 - }; 53 - } 54 - }); 55 - 56 - // connect to jetstream 57 - // retries internally, but may fail if there is an extreme error. 58 - if let Err(e) = jetstream.connect(cursor.clone()).await { 59 - eprintln!("Failed to connect to Jetstream: {}", e); 60 - std::process::exit(1); 61 - } 62 - } 63 - 64 - pub struct MyCoolIngestor; 65 - 66 - /// A cool ingestor implementation. Will just print the message. Does not do verification. 67 - #[async_trait] 68 - impl LexiconIngestor for MyCoolIngestor { 69 - async fn ingest(&self, message: Event<Value>) -> anyhow::Result<()> { 70 - if let Some(Commit { record: Some(record), .. }) = message.commit { 71 - if let Some(Value::String(text)) = record.get("text") { 72 - println!("{text:?}"); 73 - } 74 - } 75 - Ok(()) 76 - } 77 - }
···
-11
services/rocketman/package.json
··· 1 - { 2 - "name": "@repo/rocketman", 3 - "private": true, 4 - "scripts": { 5 - "build": "cargo build --release", 6 - "build:rust": "cargo build --release", 7 - "dev": "cargo watch -x 'run'", 8 - "test": "cargo test", 9 - "test:rust": "cargo test" 10 - } 11 - }
···
-74
services/rocketman/readme.md
··· 1 - ## Rocketman 2 - 3 - A modular(ish) jetstream consumer. Backed by Tungstenite. 4 - 5 - 6 - ### Installation 7 - ```toml 8 - [dependencies] 9 - rocketman = "latest" # pyt the latest version here 10 - tokio = { version = "1", features = ["macros", "rt-multi-thread"] } 11 - ``` 12 - ### Usage 13 - ```rs 14 - #[tokio::main] 15 - async fn main() { 16 - // init the builder 17 - let opts = JetstreamOptions::builder() 18 - // your EXACT nsids 19 - .wanted_collections(vec!["com.example.cool.nsid".to_string()]) 20 - .build(); 21 - // create the jetstream connector 22 - let jetstream = JetstreamConnection::new(opts); 23 - 24 - // create your ingestors 25 - let mut ingestors: HashMap<String, Box<dyn LexiconIngestor + Send + Sync>> = HashMap::new(); 26 - ingestors.insert( 27 - // your EXACT nsid 28 - "com.example.cool.nsid".to_string(), 29 - Box::new(MyCoolIngestor), 30 - ); 31 - 32 - 33 - // tracks the last message we've processed 34 - let cursor: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None)); 35 - 36 - // get channels 37 - let msg_rx = jetstream.get_msg_rx(); 38 - let reconnect_tx = jetstream.get_reconnect_tx(); 39 - 40 - // spawn a task to process messages from the queue. 41 - // this is a simple implementation, you can use a more complex one based on needs. 42 - let c_cursor = cursor.clone(); 43 - tokio::spawn(async move { 44 - while let Ok(message) = msg_rx.recv_async().await { 45 - if let Err(e) = 46 - handler::handle_message(message, &ingestors, reconnect_tx.clone(), c_cursor.clone()) 47 - .await 48 - { 49 - error!("Error processing message: {}", e); 50 - }; 51 - } 52 - }); 53 - 54 - // connect to jetstream 55 - // retries internally, but may fail if there is an extreme error. 56 - if let Err(e) = jetstream.connect(cursor.clone()).await { 57 - error!("Failed to connect to Jetstream: {}", e); 58 - std::process::exit(1); 59 - } 60 - } 61 - 62 - pub struct MyCoolIngestor; 63 - 64 - /// A cool ingestor implementation. Will just print the message. Does not do verification. 65 - impl LexiconIngestor for MyCoolIngestor { 66 - async fn ingest(&self, message: Event<Value>) -> Result<()> { 67 - info!("{:?}", message); 68 - // Process message for default lexicon. 69 - Ok(()) 70 - } 71 - } 72 - ``` 73 - ### gratz 74 - Based heavily on [phil's jetstream consumer on atcosm constellation.](https://github.com/atcosm/links/blob/main/constellation/src/consumer/jetstream.rs)
···
-335
services/rocketman/src/connection.rs
··· 1 - use flume::{Receiver, Sender}; 2 - use futures_util::StreamExt; 3 - use metrics::{counter, describe_counter, describe_histogram, histogram, Unit}; 4 - use std::cmp::{max, min}; 5 - use std::sync::{Arc, Mutex}; 6 - use std::time::Instant; 7 - use tokio::time::{sleep, Duration}; 8 - use tokio_tungstenite::{connect_async, tungstenite::Message}; 9 - use tracing::{error, info}; 10 - use url::Url; 11 - 12 - use crate::options::JetstreamOptions; 13 - use crate::time::system_time::SystemTimeProvider; 14 - use crate::time::TimeProvider; 15 - 16 - pub struct JetstreamConnection { 17 - pub opts: JetstreamOptions, 18 - reconnect_tx: flume::Sender<()>, 19 - reconnect_rx: flume::Receiver<()>, 20 - msg_tx: flume::Sender<Message>, 21 - msg_rx: flume::Receiver<Message>, 22 - } 23 - 24 - impl JetstreamConnection { 25 - pub fn new(opts: JetstreamOptions) -> Self { 26 - let (reconnect_tx, reconnect_rx) = flume::bounded(opts.bound); 27 - let (msg_tx, msg_rx) = flume::bounded(opts.bound); 28 - Self { 29 - opts, 30 - reconnect_tx, 31 - reconnect_rx, 32 - msg_tx, 33 - msg_rx, 34 - } 35 - } 36 - 37 - pub fn get_reconnect_tx(&self) -> Sender<()> { 38 - self.reconnect_tx.clone() 39 - } 40 - 41 - pub fn get_msg_rx(&self) -> Receiver<Message> { 42 - self.msg_rx.clone() 43 - } 44 - 45 - fn build_ws_url(&self, cursor: Arc<Mutex<Option<u64>>>) -> String { 46 - let mut url = Url::parse(&self.opts.ws_url.to_string()).unwrap(); 47 - 48 - // Append query params 49 - if let Some(ref cols) = self.opts.wanted_collections { 50 - for col in cols { 51 - url.query_pairs_mut().append_pair("wantedCollections", col); 52 - } 53 - } 54 - if let Some(ref dids) = self.opts.wanted_dids { 55 - for did in dids { 56 - url.query_pairs_mut().append_pair("wantedDids", did); 57 - } 58 - } 59 - if let Some(cursor) = cursor.lock().unwrap().as_ref() { 60 - url.query_pairs_mut() 61 - .append_pair("cursor", &cursor.to_string()); 62 - } 63 - #[cfg(feature = "zstd")] 64 - if self.opts.compress { 65 - url.query_pairs_mut().append_pair("compress", "true"); 66 - } 67 - 68 - url.to_string() 69 - } 70 - 71 - pub async fn connect( 72 - &self, 73 - cursor: Arc<Mutex<Option<u64>>>, 74 - ) -> Result<(), Box<dyn std::error::Error>> { 75 - describe_counter!( 76 - "jetstream.connection.attempt", 77 - Unit::Count, 78 - "attempts to connect to jetstream service" 79 - ); 80 - describe_counter!( 81 - "jetstream.connection.error", 82 - Unit::Count, 83 - "errors connecting to jetstream service" 84 - ); 85 - describe_histogram!( 86 - "jetstream.connection.duration", 87 - Unit::Seconds, 88 - "Time connected to jetstream service" 89 - ); 90 - describe_counter!( 91 - "jetstream.connection.reconnect", 92 - Unit::Count, 93 - "reconnects to jetstream service" 94 - ); 95 - let mut retry_interval = 1; 96 - 97 - let time_provider = SystemTimeProvider::new(); 98 - 99 - let mut start_time = time_provider.now(); 100 - 101 - loop { 102 - counter!("jetstream.connection.attempt").increment(1); 103 - info!("Connecting to {}", self.opts.ws_url); 104 - let start = Instant::now(); 105 - 106 - let ws_url = self.build_ws_url(cursor.clone()); 107 - 108 - match connect_async(ws_url).await { 109 - Ok((ws_stream, response)) => { 110 - let elapsed = start.elapsed(); 111 - info!("Connected. HTTP status: {}", response.status()); 112 - 113 - let (_, mut read) = ws_stream.split(); 114 - 115 - loop { 116 - // Inner loop to handle messages, reconnect signals, and receive timeout 117 - let receive_timeout = 118 - sleep(Duration::from_secs(self.opts.timeout_time_sec as u64)); 119 - tokio::pin!(receive_timeout); 120 - 121 - loop { 122 - tokio::select! { 123 - message_result = read.next() => { 124 - match message_result { 125 - Some(message) => { 126 - // Reset timeout on message received 127 - receive_timeout.as_mut().reset(tokio::time::Instant::now() + Duration::from_secs(self.opts.timeout_time_sec as u64)); 128 - 129 - histogram!("jetstream.connection.duration").record(elapsed.as_secs_f64()); 130 - match message { 131 - Ok(message) => { 132 - if let Err(err) = self.msg_tx.send_async(message).await { 133 - counter!("jetstream.error").increment(1); 134 - error!("Failed to queue message: {}", err); 135 - } 136 - } 137 - Err(e) => { 138 - counter!("jetstream.error").increment(1); 139 - error!("Error: {}", e); 140 - } 141 - } 142 - } 143 - None => { 144 - info!("Stream closed by server."); 145 - counter!("jetstream.connection.reconnect").increment(1); 146 - break; // Stream ended, break inner loop to reconnect 147 - } 148 - } 149 - } 150 - _ = self.reconnect_rx.recv_async() => { 151 - info!("Reconnect signal received."); 152 - counter!("jetstream.connection.reconnect").increment(1); 153 - break; 154 - } 155 - _ = &mut receive_timeout => { 156 - // last final poll, just in case 157 - match read.next().await { 158 - Some(Ok(message)) => { 159 - if let Err(err) = self.msg_tx.send_async(message).await { 160 - counter!("jetstream.error").increment(1); 161 - error!("Failed to queue message: {}", err); 162 - } 163 - // Reset timeout to continue 164 - receive_timeout.as_mut().reset(tokio::time::Instant::now() + Duration::from_secs(self.opts.timeout_time_sec as u64)); 165 - } 166 - Some(Err(e)) => { 167 - counter!("jetstream.error").increment(1); 168 - error!("Error receiving message during final poll: {}", e); 169 - counter!("jetstream.connection.reconnect").increment(1); 170 - break; 171 - } 172 - None => { 173 - info!("No commits received in {} seconds, reconnecting.", self.opts.timeout_time_sec); 174 - counter!("jetstream.connection.reconnect").increment(1); 175 - break; 176 - } 177 - } 178 - } 179 - } 180 - } 181 - } 182 - } 183 - Err(e) => { 184 - let elapsed_time = time_provider.elapsed(start_time); 185 - // reset if time connected > the time we set 186 - if elapsed_time.as_secs() > self.opts.max_retry_interval_seconds { 187 - retry_interval = 0; 188 - start_time = time_provider.now(); 189 - } 190 - counter!("jetstream.connection.error").increment(1); 191 - error!("Connection error: {}", e); 192 - } 193 - } 194 - 195 - let sleep_time = max(1, min(self.opts.max_retry_interval_seconds, retry_interval)); 196 - info!("Reconnecting in {} seconds...", sleep_time); 197 - sleep(Duration::from_secs(sleep_time)).await; 198 - 199 - if retry_interval > self.opts.max_retry_interval_seconds { 200 - retry_interval = self.opts.max_retry_interval_seconds; 201 - } else { 202 - retry_interval *= 2; 203 - } 204 - } 205 - } 206 - 207 - pub fn force_reconnect(&self) -> Result<(), flume::SendError<()>> { 208 - info!("Force reconnect requested."); 209 - self.reconnect_tx.send(()) // Send a reconnect signal 210 - } 211 - } 212 - 213 - #[cfg(test)] 214 - mod tests { 215 - use super::*; 216 - use std::sync::{Arc, Mutex}; 217 - use tokio::task; 218 - use tokio::time::{timeout, Duration}; 219 - use tokio_tungstenite::tungstenite::Message; 220 - 221 - #[test] 222 - fn test_build_ws_url() { 223 - let opts = JetstreamOptions { 224 - wanted_collections: Some(vec!["col1".to_string(), "col2".to_string()]), 225 - wanted_dids: Some(vec!["did1".to_string()]), 226 - ..Default::default() 227 - }; 228 - let connection = JetstreamConnection::new(opts); 229 - 230 - let test = Arc::new(Mutex::new(Some(8373))); 231 - 232 - let url = connection.build_ws_url(test); 233 - 234 - assert!(url.starts_with("wss://")); 235 - assert!(url.contains("cursor=8373")); 236 - assert!(url.contains("wantedCollections=col1")); 237 - assert!(url.contains("wantedCollections=col2")); 238 - assert!(url.contains("wantedDids=did1")); 239 - } 240 - 241 - #[tokio::test] 242 - async fn test_force_reconnect() { 243 - let opts = JetstreamOptions::default(); 244 - let connection = JetstreamConnection::new(opts); 245 - 246 - // Spawn a task to listen for the reconnect signal 247 - let reconnect_rx = connection.reconnect_rx.clone(); 248 - let recv_task = task::spawn(async move { 249 - reconnect_rx 250 - .recv_async() 251 - .await 252 - .expect("Failed to receive reconnect signal"); 253 - }); 254 - 255 - connection 256 - .force_reconnect() 257 - .expect("Failed to send reconnect signal"); 258 - 259 - // Ensure reconnect signal was received 260 - assert!(recv_task.await.is_ok()); 261 - } 262 - 263 - #[tokio::test] 264 - async fn test_message_queue() { 265 - let opts = JetstreamOptions::default(); 266 - let connection = JetstreamConnection::new(opts); 267 - 268 - let msg_rx = connection.get_msg_rx(); 269 - let msg = Message::Text("test message".into()); 270 - 271 - // Send a message to the queue 272 - connection 273 - .msg_tx 274 - .send_async(msg.clone()) 275 - .await 276 - .expect("Failed to send message"); 277 - 278 - // Receive and verify the message 279 - let received = msg_rx 280 - .recv_async() 281 - .await 282 - .expect("Failed to receive message"); 283 - assert_eq!(received, msg); 284 - } 285 - 286 - #[tokio::test] 287 - async fn test_connection_retries_on_failure() { 288 - let opts = JetstreamOptions::default(); 289 - let connection = Arc::new(JetstreamConnection::new(opts)); 290 - 291 - let cursor = Arc::new(Mutex::new(None)); 292 - 293 - // Timeout to prevent infinite loop 294 - let result = timeout(Duration::from_secs(3), connection.connect(cursor)).await; 295 - 296 - assert!(result.is_err(), "Expected timeout due to retry logic"); 297 - } 298 - 299 - #[tokio::test] 300 - async fn test_reconnect_after_receive_timeout() { 301 - use tokio::net::TcpListener; 302 - use tokio_tungstenite::accept_async; 303 - 304 - let opts = JetstreamOptions { 305 - ws_url: crate::endpoints::JetstreamEndpoints::Custom("ws://127.0.0.1:9001".to_string()), 306 - bound: 5, 307 - max_retry_interval_seconds: 1, 308 - ..Default::default() 309 - }; 310 - let connection = JetstreamConnection::new(opts); 311 - let cursor = Arc::new(Mutex::new(None)); 312 - 313 - // set up dummy "websocket" 314 - let listener = TcpListener::bind("127.0.0.1:9001") 315 - .await 316 - .expect("Failed to bind"); 317 - let server_handle = tokio::spawn(async move { 318 - if let Ok((stream, _)) = listener.accept().await { 319 - let ws_stream = accept_async(stream).await.expect("Failed to accept"); 320 - // send nothing 321 - tokio::time::sleep(Duration::from_secs(6)).await; 322 - drop(ws_stream); 323 - } 324 - }); 325 - 326 - // spawn, then run for >30 seconds to trigger reconnect 327 - let connect_handle = tokio::spawn(async move { 328 - tokio::time::timeout(Duration::from_secs(5), connection.connect(cursor)) 329 - .await 330 - .ok(); 331 - }); 332 - 333 - let _ = tokio::join!(server_handle, connect_handle); 334 - } 335 - }
···
-65
services/rocketman/src/endpoints.rs
··· 1 - use std::fmt::{Display, Formatter, Result}; 2 - 3 - #[derive(Debug, Clone, PartialEq, Eq, Hash)] 4 - pub enum JetstreamEndpointLocations { 5 - UsEast, 6 - UsWest, 7 - } 8 - 9 - impl Display for JetstreamEndpointLocations { 10 - fn fmt(&self, f: &mut Formatter<'_>) -> Result { 11 - write!( 12 - f, 13 - "{}", 14 - match self { 15 - Self::UsEast => "us-east", 16 - Self::UsWest => "us-west", 17 - } 18 - ) 19 - } 20 - } 21 - 22 - #[derive(Debug, Clone, PartialEq, Eq, Hash)] 23 - pub enum JetstreamEndpoints { 24 - Public(JetstreamEndpointLocations, i8), 25 - Custom(String), 26 - } 27 - 28 - impl Display for JetstreamEndpoints { 29 - fn fmt(&self, f: &mut Formatter<'_>) -> Result { 30 - match self { 31 - Self::Public(location, id) => write!( 32 - f, 33 - "wss://jetstream{}.{}.bsky.network/subscribe", 34 - id, location 35 - ), 36 - Self::Custom(url) => write!(f, "{}", url), 37 - } 38 - } 39 - } 40 - 41 - impl Default for JetstreamEndpoints { 42 - fn default() -> Self { 43 - Self::Public(JetstreamEndpointLocations::UsEast, 2) 44 - } 45 - } 46 - 47 - #[cfg(test)] 48 - mod tests { 49 - use super::*; 50 - 51 - #[test] 52 - fn test_display_public() { 53 - let endpoint = JetstreamEndpoints::Public(JetstreamEndpointLocations::UsEast, 2); 54 - assert_eq!( 55 - endpoint.to_string(), 56 - "wss://jetstream2.us-east.bsky.network/subscribe" 57 - ); 58 - } 59 - 60 - #[test] 61 - fn test_display_custom() { 62 - let endpoint = JetstreamEndpoints::Custom("wss://custom.bsky.network/subscribe".into()); 63 - assert_eq!(endpoint.to_string(), "wss://custom.bsky.network/subscribe"); 64 - } 65 - }
···
-1
services/rocketman/src/err.rs
··· 1 - // TODO: error types instead of using anyhow
···
-452
services/rocketman/src/handler.rs
··· 1 - use anyhow::Result; 2 - use flume::Sender; 3 - use metrics::{counter, describe_counter, Unit}; 4 - use serde_json::Value; 5 - use std::{ 6 - collections::HashMap, 7 - sync::{Arc, Mutex}, 8 - }; 9 - use tokio_tungstenite::tungstenite::{Error, Message}; 10 - use tracing::{debug, error}; 11 - 12 - #[cfg(feature = "zstd")] 13 - use std::io::Cursor as IoCursor; 14 - #[cfg(feature = "zstd")] 15 - use std::sync::LazyLock; 16 - #[cfg(feature = "zstd")] 17 - use zstd::dict::DecoderDictionary; 18 - 19 - use crate::{ 20 - ingestion::LexiconIngestor, 21 - types::event::{Event, Kind}, 22 - }; 23 - 24 - /// The custom `zstd` dictionary used for decoding compressed Jetstream messages. 25 - /// 26 - /// Sourced from the [official Bluesky Jetstream repo.](https://github.com/bluesky-social/jetstream/tree/main/pkg/models) 27 - #[cfg(feature = "zstd")] 28 - static ZSTD_DICTIONARY: LazyLock<DecoderDictionary> = 29 - LazyLock::new(|| DecoderDictionary::copy(include_bytes!("../zstd/dictionary"))); 30 - 31 - pub async fn handle_message( 32 - message: Message, 33 - ingestors: &HashMap<String, Box<dyn LexiconIngestor + Send + Sync>>, 34 - reconnect_tx: Sender<()>, 35 - cursor: Arc<Mutex<Option<u64>>>, 36 - ) -> Result<()> { 37 - describe_counter!( 38 - "jetstream.event", 39 - Unit::Count, 40 - "number of event ingest attempts" 41 - ); 42 - describe_counter!( 43 - "jetstream.event.parse", 44 - Unit::Count, 45 - "events that were successfully processed" 46 - ); 47 - describe_counter!( 48 - "jetstream.event.fail", 49 - Unit::Count, 50 - "events that could not be read" 51 - ); 52 - describe_counter!("jetstream.error", Unit::Count, "errors encountered"); 53 - match message { 54 - Message::Text(text) => { 55 - debug!("Text message received"); 56 - counter!("jetstream.event").increment(1); 57 - let envelope: Event<Value> = serde_json::from_str(&text).map_err(|e| { 58 - anyhow::anyhow!("Failed to parse message: {} with json string {}", e, text) 59 - })?; 60 - debug!("envelope: {:?}", envelope); 61 - handle_envelope(envelope, cursor, ingestors).await?; 62 - Ok(()) 63 - } 64 - #[cfg(feature = "zstd")] 65 - Message::Binary(bytes) => { 66 - debug!("Binary message received"); 67 - counter!("jetstream.event").increment(1); 68 - let decoder = zstd::stream::Decoder::with_prepared_dictionary( 69 - IoCursor::new(bytes), 70 - &*ZSTD_DICTIONARY, 71 - )?; 72 - let envelope: Event<Value> = serde_json::from_reader(decoder) 73 - .map_err(|e| anyhow::anyhow!("Failed to parse binary message: {}", e))?; 74 - debug!("envelope: {:?}", envelope); 75 - handle_envelope(envelope, cursor, ingestors).await?; 76 - Ok(()) 77 - } 78 - #[cfg(not(feature = "zstd"))] 79 - Message::Binary(_) => { 80 - debug!("Binary message received"); 81 - Err(anyhow::anyhow!( 82 - "binary message received but zstd feature is not enabled" 83 - )) 84 - } 85 - Message::Close(_) => { 86 - debug!("Server closed connection"); 87 - if let Err(e) = reconnect_tx.send(()) { 88 - counter!("jetstream.event.parse.error", "error" => "failed_to_send_reconnect_signal").increment(1); 89 - error!("Failed to send reconnect signal: {}", e); 90 - } 91 - Err(Error::ConnectionClosed.into()) 92 - } 93 - _ => Ok(()), 94 - } 95 - } 96 - 97 - async fn handle_envelope( 98 - envelope: Event<Value>, 99 - cursor: Arc<Mutex<Option<u64>>>, 100 - ingestors: &HashMap<String, Box<dyn LexiconIngestor + Send + Sync>>, 101 - ) -> Result<()> { 102 - if let Some(ref time_us) = envelope.time_us { 103 - debug!("Time: {}", time_us); 104 - if let Some(cursor) = cursor.lock().unwrap().as_mut() { 105 - debug!("Cursor: {}", cursor); 106 - if time_us > cursor { 107 - debug!("Cursor is behind, resetting"); 108 - *cursor = *time_us; 109 - } 110 - } 111 - } 112 - 113 - match envelope.kind { 114 - Kind::Commit => match extract_commit_nsid(&envelope) { 115 - Ok(nsid) => { 116 - if let Some(fun) = ingestors.get(&nsid) { 117 - match fun.ingest(envelope).await { 118 - Ok(_) => { 119 - counter!("jetstream.event.parse.commit", "nsid" => nsid).increment(1) 120 - } 121 - Err(e) => { 122 - error!("Error ingesting commit with nsid {}: {}", nsid, e); 123 - counter!("jetstream.error").increment(1); 124 - counter!("jetstream.event.fail").increment(1); 125 - } 126 - } 127 - } 128 - } 129 - Err(e) => error!("Error parsing commit: {}", e), 130 - }, 131 - Kind::Identity => { 132 - counter!("jetstream.event.parse.identity").increment(1); 133 - } 134 - Kind::Account => { 135 - counter!("jetstream.event.parse.account").increment(1); 136 - } 137 - Kind::Unknown(kind) => { 138 - counter!("jetstream.event.parse.unknown", "kind" => kind).increment(1); 139 - } 140 - } 141 - Ok(()) 142 - } 143 - 144 - fn extract_commit_nsid(envelope: &Event<Value>) -> anyhow::Result<String> { 145 - // if the type is not a commit 146 - if envelope.commit.is_none() { 147 - return Err(anyhow::anyhow!( 148 - "Message has no commit, so there is no nsid attached." 149 - )); 150 - } else if let Some(ref commit) = envelope.commit { 151 - return Ok(commit.collection.clone()); 152 - } 153 - 154 - Err(anyhow::anyhow!("Failed to extract nsid: unknown error")) 155 - } 156 - 157 - #[cfg(test)] 158 - mod tests { 159 - use super::*; 160 - use crate::types::event::Event; 161 - use anyhow::Result; 162 - use async_trait::async_trait; 163 - use flume::{Receiver, Sender}; 164 - use serde_json::json; 165 - use std::{ 166 - collections::HashMap, 167 - sync::{Arc, Mutex}, 168 - }; 169 - use tokio_tungstenite::tungstenite::Message; 170 - 171 - // Dummy ingestor that records if it was called. 172 - struct DummyIngestor { 173 - pub called: Arc<Mutex<bool>>, 174 - } 175 - 176 - #[async_trait] 177 - impl crate::ingestion::LexiconIngestor for DummyIngestor { 178 - async fn ingest(&self, _event: Event<serde_json::Value>) -> Result<(), anyhow::Error> { 179 - let mut called = self.called.lock().unwrap(); 180 - *called = true; 181 - Ok(()) 182 - } 183 - } 184 - 185 - // Dummy ingestor that always returns an error. 186 - struct ErrorIngestor; 187 - 188 - #[async_trait] 189 - impl crate::ingestion::LexiconIngestor for ErrorIngestor { 190 - async fn ingest(&self, _event: Event<serde_json::Value>) -> Result<(), anyhow::Error> { 191 - Err(anyhow::anyhow!("Ingest error")) 192 - } 193 - } 194 - 195 - // Helper to create a reconnect channel. 196 - fn setup_reconnect_channel() -> (Sender<()>, Receiver<()>) { 197 - flume::unbounded() 198 - } 199 - 200 - #[tokio::test] 201 - async fn test_valid_commit_success() { 202 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 203 - let cursor = Arc::new(Mutex::new(Some(100))); 204 - let called_flag = Arc::new(Mutex::new(false)); 205 - 206 - // Create a valid commit event JSON. 207 - let event_json = json!({ 208 - "did": "did:example:123", 209 - "time_us": 200, 210 - "kind": "commit", 211 - "commit": { 212 - "rev": "1", 213 - "operation": "create", 214 - "collection": "ns1", 215 - "rkey": "rkey1", 216 - "record": { "foo": "bar" }, 217 - "cid": "cid123" 218 - }, 219 - }) 220 - .to_string(); 221 - 222 - let mut ingestors: HashMap< 223 - String, 224 - Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>, 225 - > = HashMap::new(); 226 - ingestors.insert( 227 - "ns1".to_string(), 228 - Box::new(DummyIngestor { 229 - called: called_flag.clone(), 230 - }), 231 - ); 232 - 233 - let result = handle_message( 234 - Message::Text(event_json), 235 - &ingestors, 236 - reconnect_tx, 237 - cursor.clone(), 238 - ) 239 - .await; 240 - assert!(result.is_ok()); 241 - // Check that the ingestor was called. 242 - assert!(*called_flag.lock().unwrap()); 243 - // Verify that the cursor got updated. 244 - assert_eq!(*cursor.lock().unwrap(), Some(200)); 245 - } 246 - 247 - #[cfg(feature = "zstd")] 248 - #[tokio::test] 249 - async fn test_binary_valid_commit() { 250 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 251 - let cursor = Arc::new(Mutex::new(Some(100))); 252 - let called_flag = Arc::new(Mutex::new(false)); 253 - 254 - let uncompressed_json = json!({ 255 - "did": "did:example:123", 256 - "time_us": 200, 257 - "kind": "commit", 258 - "commit": { 259 - "rev": "1", 260 - "operation": "create", 261 - "collection": "ns1", 262 - "rkey": "rkey1", 263 - "record": { "foo": "bar" }, 264 - "cid": "cid123" 265 - }, 266 - }) 267 - .to_string(); 268 - 269 - let compressed_dest: IoCursor<Vec<u8>> = IoCursor::new(vec![]); 270 - let mut encoder = zstd::Encoder::with_prepared_dictionary( 271 - compressed_dest, 272 - &zstd::dict::EncoderDictionary::copy(include_bytes!("../zstd/dictionary"), 0), 273 - ) 274 - .unwrap(); 275 - std::io::copy( 276 - &mut IoCursor::new(uncompressed_json.as_bytes()), 277 - &mut encoder, 278 - ) 279 - .unwrap(); 280 - let compressed_dest = encoder.finish().unwrap(); 281 - 282 - let mut ingestors: HashMap< 283 - String, 284 - Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>, 285 - > = HashMap::new(); 286 - ingestors.insert( 287 - "ns1".to_string(), 288 - Box::new(DummyIngestor { 289 - called: called_flag.clone(), 290 - }), 291 - ); 292 - 293 - let result = handle_message( 294 - Message::Binary(compressed_dest.into_inner()), 295 - &ingestors, 296 - reconnect_tx, 297 - cursor.clone(), 298 - ) 299 - .await; 300 - 301 - assert!(result.is_ok()); 302 - // Check that the ingestor was called. 303 - assert!(*called_flag.lock().unwrap()); 304 - // Verify that the cursor got updated. 305 - assert_eq!(*cursor.lock().unwrap(), Some(200)); 306 - } 307 - 308 - #[tokio::test] 309 - async fn test_commit_ingest_failure() { 310 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 311 - let cursor = Arc::new(Mutex::new(Some(100))); 312 - 313 - // Valid commit event with an ingestor that fails. 314 - let event_json = json!({ 315 - "did": "did:example:123", 316 - "time_us": 300, 317 - "kind": "commit", 318 - "commit": { 319 - "rev": "1", 320 - "operation": "create", 321 - "collection": "ns_error", 322 - "rkey": "rkey1", 323 - "record": { "foo": "bar" }, 324 - "cid": "cid123" 325 - }, 326 - "identity": null 327 - }) 328 - .to_string(); 329 - 330 - let mut ingestors: HashMap< 331 - String, 332 - Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>, 333 - > = HashMap::new(); 334 - ingestors.insert("ns_error".to_string(), Box::new(ErrorIngestor)); 335 - 336 - // Even though ingestion fails, handle_message returns Ok(()). 337 - let result = handle_message( 338 - Message::Text(event_json), 339 - &ingestors, 340 - reconnect_tx, 341 - cursor.clone(), 342 - ) 343 - .await; 344 - assert!(result.is_ok()); 345 - // Cursor should still update because it comes before the ingest call. 346 - assert_eq!(*cursor.lock().unwrap(), Some(300)); 347 - } 348 - 349 - #[tokio::test] 350 - async fn test_identity_message() { 351 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 352 - let cursor = Arc::new(Mutex::new(None)); 353 - // Valid identity event. 354 - let event_json = json!({ 355 - "did": "did:example:123", 356 - "time_us": 150, 357 - "kind": "identity", 358 - "commit": null, 359 - "identity": { 360 - "did": "did:example:123", 361 - "handle": "user", 362 - "seq": 1, 363 - "time": "2025-01-01T00:00:00Z" 364 - } 365 - }) 366 - .to_string(); 367 - let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> = 368 - HashMap::new(); 369 - 370 - let result = 371 - handle_message(Message::Text(event_json), &ingestors, reconnect_tx, cursor).await; 372 - assert!(result.is_ok()); 373 - } 374 - 375 - #[tokio::test] 376 - async fn test_close_message() { 377 - let (reconnect_tx, reconnect_rx) = setup_reconnect_channel(); 378 - let cursor = Arc::new(Mutex::new(None)); 379 - let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> = 380 - HashMap::new(); 381 - 382 - let result = handle_message(Message::Close(None), &ingestors, reconnect_tx, cursor).await; 383 - // Should return an error due to connection close. 384 - assert!(result.is_err()); 385 - // Verify that a reconnect signal was sent. 386 - let signal = reconnect_rx.recv_async().await; 387 - assert!(signal.is_ok()); 388 - } 389 - 390 - #[tokio::test] 391 - async fn test_invalid_json() { 392 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 393 - let cursor = Arc::new(Mutex::new(None)); 394 - let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> = 395 - HashMap::new(); 396 - 397 - let invalid_json = "this is not json".to_string(); 398 - let result = handle_message( 399 - Message::Text(invalid_json), 400 - &ingestors, 401 - reconnect_tx, 402 - cursor, 403 - ) 404 - .await; 405 - assert!(result.is_err()); 406 - } 407 - 408 - #[tokio::test] 409 - async fn test_cursor_not_updated_if_lower() { 410 - let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel(); 411 - // Set an initial cursor value. 412 - let cursor = Arc::new(Mutex::new(Some(300))); 413 - let event_json = json!({ 414 - "did": "did:example:123", 415 - "time_us": 200, 416 - "kind": "commit", 417 - "commit": { 418 - "rev": "1", 419 - "operation": "create", 420 - "collection": "ns1", 421 - "rkey": "rkey1", 422 - "record": { "foo": "bar" }, 423 - "cid": "cid123" 424 - }, 425 - "identity": null 426 - }) 427 - .to_string(); 428 - 429 - // Use a dummy ingestor that does nothing. 430 - let mut ingestors: HashMap< 431 - String, 432 - Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>, 433 - > = HashMap::new(); 434 - ingestors.insert( 435 - "ns1".to_string(), 436 - Box::new(DummyIngestor { 437 - called: Arc::new(Mutex::new(false)), 438 - }), 439 - ); 440 - 441 - let result = handle_message( 442 - Message::Text(event_json), 443 - &ingestors, 444 - reconnect_tx, 445 - cursor.clone(), 446 - ) 447 - .await; 448 - assert!(result.is_ok()); 449 - // Cursor should remain unchanged. 450 - assert_eq!(*cursor.lock().unwrap(), Some(300)); 451 - } 452 - }
···
-22
services/rocketman/src/ingestion.rs
··· 1 - use anyhow::Result; 2 - use async_trait::async_trait; 3 - use serde_json::Value; 4 - use tracing::info; 5 - 6 - use crate::types::event::Event; 7 - 8 - #[async_trait] 9 - pub trait LexiconIngestor { 10 - async fn ingest(&self, message: Event<Value>) -> Result<()>; 11 - } 12 - 13 - pub struct DefaultLexiconIngestor; 14 - 15 - #[async_trait] 16 - impl LexiconIngestor for DefaultLexiconIngestor { 17 - async fn ingest(&self, message: Event<Value>) -> Result<()> { 18 - info!("Default lexicon processing: {:?}", message); 19 - // Process message for default lexicon. 20 - Ok(()) 21 - } 22 - }
···
-8
services/rocketman/src/lib.rs
··· 1 - // lib.rs 2 - pub mod connection; 3 - pub mod endpoints; 4 - pub mod handler; 5 - pub mod ingestion; 6 - pub mod options; 7 - pub mod time; 8 - pub mod types;
···
-40
services/rocketman/src/options.rs
··· 1 - use bon::Builder; 2 - 3 - use crate::endpoints::JetstreamEndpoints; 4 - 5 - #[derive(Builder, Debug)] 6 - pub struct JetstreamOptions { 7 - #[builder(default)] 8 - pub ws_url: JetstreamEndpoints, 9 - #[builder(default)] 10 - pub max_retry_interval_seconds: u64, 11 - #[builder(default)] 12 - pub connection_success_time_seconds: u64, 13 - #[builder(default)] 14 - pub bound: usize, 15 - #[builder(default)] 16 - pub timeout_time_sec: usize, 17 - #[cfg(feature = "zstd")] 18 - #[builder(default = true)] 19 - pub compress: bool, 20 - pub wanted_collections: Option<Vec<String>>, 21 - pub wanted_dids: Option<Vec<String>>, 22 - pub cursor: Option<String>, 23 - } 24 - 25 - impl Default for JetstreamOptions { 26 - fn default() -> Self { 27 - Self { 28 - ws_url: JetstreamEndpoints::default(), 29 - max_retry_interval_seconds: 120, 30 - connection_success_time_seconds: 60, 31 - bound: 65536, 32 - timeout_time_sec: 40, 33 - #[cfg(feature = "zstd")] 34 - compress: true, 35 - wanted_collections: None, 36 - wanted_dids: None, 37 - cursor: None, 38 - } 39 - } 40 - }
···
-11
services/rocketman/src/time/mod.rs
··· 1 - use std::time::{Duration, Instant, SystemTime}; 2 - 3 - pub mod system_time; 4 - 5 - pub trait TimeProvider { 6 - fn new() -> Self; 7 - fn now(&self) -> SystemTime; // Get the current time 8 - fn elapsed(&self, earlier: SystemTime) -> Duration; // Calculate the elapsed time. 9 - fn instant_now(&self) -> Instant; // For compatibility with your existing code (if needed) 10 - fn instant_elapsed(&self, earlier: Instant) -> Duration; 11 - }
···
-28
services/rocketman/src/time/system_time.rs
··· 1 - use std::time::{Duration, Instant, SystemTime}; 2 - 3 - use super::TimeProvider; 4 - 5 - #[derive(Default, Clone, Copy)] // Add these derives for ease of use 6 - pub struct SystemTimeProvider; // No fields needed, just a marker type 7 - 8 - impl TimeProvider for SystemTimeProvider { 9 - fn new() -> Self { 10 - Self 11 - } 12 - 13 - fn now(&self) -> SystemTime { 14 - SystemTime::now() 15 - } 16 - 17 - fn elapsed(&self, earlier: SystemTime) -> Duration { 18 - earlier.elapsed().unwrap_or_else(|_| Duration::from_secs(0)) 19 - } 20 - 21 - fn instant_now(&self) -> Instant { 22 - Instant::now() 23 - } 24 - 25 - fn instant_elapsed(&self, earlier: Instant) -> Duration { 26 - earlier.elapsed() 27 - } 28 - }
···
-116
services/rocketman/src/types/event.rs
··· 1 - use serde::{Deserialize, Deserializer, Serialize}; 2 - 3 - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] 4 - #[serde(rename_all = "lowercase")] 5 - pub enum Kind { 6 - Account, 7 - Identity, 8 - Commit, 9 - Unknown(String), 10 - } 11 - 12 - #[derive(Debug, Serialize, Deserialize)] 13 - #[serde(rename_all = "snake_case")] 14 - pub struct Event<T> { 15 - pub did: String, 16 - pub time_us: Option<u64>, 17 - pub kind: Kind, 18 - pub commit: Option<Commit<T>>, 19 - pub identity: Option<Identity>, 20 - } 21 - 22 - #[derive(Debug, Serialize, Deserialize)] 23 - pub struct Identity { 24 - did: String, 25 - handle: Option<String>, 26 - seq: u64, 27 - time: String, 28 - } 29 - 30 - #[derive(Debug, Serialize, Deserialize)] 31 - #[serde(rename_all = "lowercase")] 32 - enum AccountStatus { 33 - TakenDown, 34 - Suspended, 35 - Deleted, 36 - Activated, 37 - } 38 - 39 - #[derive(Debug, Serialize, Deserialize)] 40 - pub struct Account { 41 - did: String, 42 - handle: String, 43 - seq: u64, 44 - time: String, 45 - status: AccountStatus, 46 - } 47 - 48 - #[derive(Debug, Serialize)] 49 - #[serde(rename_all = "camelCase")] 50 - pub struct Commit<T> { 51 - pub rev: String, 52 - pub operation: Operation, 53 - pub collection: String, 54 - pub rkey: String, 55 - pub record: Option<T>, 56 - pub cid: Option<String>, 57 - } 58 - 59 - #[derive(Debug, Serialize, Deserialize)] 60 - #[serde(rename_all = "lowercase")] 61 - pub enum Operation { 62 - Create, 63 - Update, 64 - Delete, 65 - } 66 - 67 - /// Enforce that record is None only when operation is 'delete' 68 - impl<'de, T> Deserialize<'de> for Commit<T> 69 - where 70 - T: Deserialize<'de>, 71 - { 72 - fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 73 - where 74 - D: Deserializer<'de>, 75 - { 76 - // Helper struct to perform the deserialization. 77 - #[derive(Deserialize)] 78 - #[serde(rename_all = "camelCase")] 79 - struct Helper<T> { 80 - rev: String, 81 - operation: Operation, 82 - collection: String, 83 - rkey: String, 84 - record: Option<T>, 85 - cid: Option<String>, 86 - } 87 - 88 - let helper = Helper::deserialize(deserializer)?; 89 - 90 - match helper.operation { 91 - Operation::Delete => { 92 - if helper.record.is_some() || helper.cid.is_some() { 93 - return Err(<D::Error as serde::de::Error>::custom( 94 - "record and cid must be null when operation is delete", 95 - )); 96 - } 97 - } 98 - _ => { 99 - if helper.record.is_none() || helper.cid.is_none() { 100 - return Err(<D::Error as serde::de::Error>::custom( 101 - "record and cid must be present unless operation is delete", 102 - )); 103 - } 104 - } 105 - } 106 - 107 - Ok(Commit { 108 - rev: helper.rev, 109 - operation: helper.operation, 110 - collection: helper.collection, 111 - rkey: helper.rkey, 112 - record: helper.record, 113 - cid: helper.cid, 114 - }) 115 - } 116 - }
···
-1
services/rocketman/src/types/mod.rs
··· 1 - pub mod event;
···
services/rocketman/zstd/dictionary

This is a binary file and will not be displayed.

+1 -6
services/satellite/src/counts.rs
··· 3 http::StatusCode, 4 Json, 5 }; 6 - use chrono::{DateTime, Utc}; 7 use serde::{Deserialize, Serialize}; 8 use sqlx::FromRow; 9 use uuid::Uuid; ··· 43 pub limit: i64, 44 } 45 46 - #[derive(FromRow, Debug, Deserialize, Serialize)] 47 pub struct Play { 48 pub did: String, 49 pub track_name: String, ··· 51 pub release_name: Option<String>, 52 pub release_mbid: Option<Uuid>, 53 pub duration: Option<i32>, 54 - pub played_time: Option<DateTime<Utc>>, 55 pub uri: Option<String>, 56 // MASSIVE HUGE HACK 57 pub artists: Option<String>, ··· 65 pub release_name: Option<String>, 66 pub release_mbid: Option<Uuid>, 67 pub duration: Option<i32>, 68 - pub played_time: Option<DateTime<Utc>>, 69 pub uri: Option<String>, 70 pub artists: Vec<Artist>, 71 } ··· 92 -- TODO: replace with actual 93 STRING_AGG(pa.artist_name || '|' || TEXT(pa.artist_mbid), ',') AS artists, 94 p.release_name, 95 - p.played_time, 96 p.duration, 97 p.uri, 98 p.recording_mbid, ··· 138 release_name: play.release_name, 139 release_mbid: play.release_mbid, 140 duration: play.duration, 141 - played_time: play.played_time, 142 uri: play.uri, 143 artists, 144 }
··· 3 http::StatusCode, 4 Json, 5 }; 6 use serde::{Deserialize, Serialize}; 7 use sqlx::FromRow; 8 use uuid::Uuid; ··· 42 pub limit: i64, 43 } 44 45 + #[derive(FromRow, Debug)] 46 pub struct Play { 47 pub did: String, 48 pub track_name: String, ··· 50 pub release_name: Option<String>, 51 pub release_mbid: Option<Uuid>, 52 pub duration: Option<i32>, 53 pub uri: Option<String>, 54 // MASSIVE HUGE HACK 55 pub artists: Option<String>, ··· 63 pub release_name: Option<String>, 64 pub release_mbid: Option<Uuid>, 65 pub duration: Option<i32>, 66 pub uri: Option<String>, 67 pub artists: Vec<Artist>, 68 } ··· 89 -- TODO: replace with actual 90 STRING_AGG(pa.artist_name || '|' || TEXT(pa.artist_mbid), ',') AS artists, 91 p.release_name, 92 p.duration, 93 p.uri, 94 p.recording_mbid, ··· 134 release_name: play.release_name, 135 release_mbid: play.release_mbid, 136 duration: play.duration, 137 uri: play.uri, 138 artists, 139 }
+31
services/types/Cargo.toml
···
··· 1 + [package] 2 + name = "types" 3 + version = "0.1.0" 4 + edition = "2021" 5 + 6 + [dependencies] 7 + atrium-api.workspace = true 8 + atrium-xrpc = "0.12.1" 9 + chrono = "0.4.39" 10 + http = "1.2.0" 11 + ipld-core = { version = "0.4.2", features = ["serde"] } 12 + langtag = { version = "0.3", features = ["serde"] } 13 + regex = "1.11.1" 14 + serde = { workspace = true, features = ["derive"] } 15 + serde_bytes = "0.11.15" 16 + serde_ipld_dagcbor = "0.6.2" 17 + serde_json.workspace = true 18 + thiserror = "2.0.11" 19 + 20 + # features 21 + [features] 22 + default = [ 23 + "namespace-fmteal", 24 + "namespace-appbsky", 25 + "namespace-toolsozone", 26 + "namespace-chatbsky", 27 + ] 28 + namespace-fmteal = [] 29 + namespace-appbsky = [] 30 + namespace-toolsozone = [] 31 + namespace-chatbsky = []
+10
services/types/readme.md
···
··· 1 + ## Types 2 + Rust lexicons for teal.fm and others. 3 + 4 + ### Generate lexicons 5 + You will need to install [esquema-cli](https://github.com/fatfingers23/esquema) a fork of the [atrium codegen tool](https://github.com/sugyan/atrium). 6 + 7 + Currently can install directly from the repo 8 + `cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git` 9 + 10 + Then can recreate with `esquema-cli generate local --lexdir ./lexicons --outdir ./src` from this directory
+55
target.sh
···
··· 1 + #!/bin/bash 2 + set -e 3 + 4 + # Debug: Print all available build variables 5 + echo "DEBUG: TARGETPLATFORM=$TARGETPLATFORM" 6 + echo "DEBUG: BUILDPLATFORM=$BUILDPLATFORM" 7 + echo "DEBUG: TARGETARCH=$TARGETARCH" 8 + echo "DEBUG: TARGETOS=$TARGETOS" 9 + 10 + # Use TARGETARCH directly (more reliable than TARGETPLATFORM) 11 + TARGET_ARCH_VAR="${TARGETARCH:-}" 12 + 13 + # If TARGETARCH is not set, try to extract from TARGETPLATFORM 14 + if [ -z "$TARGET_ARCH_VAR" ] && [ -n "$TARGETPLATFORM" ]; then 15 + TARGET_ARCH_VAR=$(echo "$TARGETPLATFORM" | cut -d'/' -f2) 16 + echo "DEBUG: Extracted TARGET_ARCH_VAR=$TARGET_ARCH_VAR from TARGETPLATFORM" 17 + fi 18 + 19 + # Final fallback: detect from uname 20 + if [ -z "$TARGET_ARCH_VAR" ]; then 21 + ARCH=$(uname -m) 22 + case "$ARCH" in 23 + "x86_64") 24 + TARGET_ARCH_VAR="amd64" 25 + ;; 26 + "aarch64") 27 + TARGET_ARCH_VAR="arm64" 28 + ;; 29 + *) 30 + echo "ERROR: Could not detect target architecture. uname -m returned: $ARCH" 31 + echo "Available variables: TARGETARCH=$TARGETARCH, TARGETPLATFORM=$TARGETPLATFORM" 32 + exit 1 33 + ;; 34 + esac 35 + echo "DEBUG: Detected TARGET_ARCH_VAR=$TARGET_ARCH_VAR from uname" 36 + fi 37 + 38 + # Map architecture to Rust target 39 + case "$TARGET_ARCH_VAR" in 40 + "amd64") 41 + export RUST_TARGET="x86_64-unknown-linux-gnu" 42 + export TARGET_ARCH="amd64" 43 + ;; 44 + "arm64") 45 + export RUST_TARGET="aarch64-unknown-linux-gnu" 46 + export TARGET_ARCH="arm64" 47 + ;; 48 + *) 49 + echo "ERROR: Unsupported target architecture: $TARGET_ARCH_VAR" 50 + echo "Supported architectures: amd64, arm64" 51 + exit 1 52 + ;; 53 + esac 54 + 55 + echo "SUCCESS: Using RUST_TARGET=$RUST_TARGET, TARGET_ARCH=$TARGET_ARCH"
+92 -62
tools/lexicon-cli/src/commands/generate.ts
··· 1 - import { execa } from 'execa'; 2 - import { existsSync } from 'fs'; 3 - import { join } from 'path'; 4 - import pc from 'picocolors'; 5 - import { findWorkspaceRoot } from '../utils/workspace.js'; 6 7 interface GenerateOptions { 8 tsOnly?: boolean; ··· 12 13 export async function generate(options: GenerateOptions = {}) { 14 const workspaceRoot = findWorkspaceRoot(); 15 - 16 - console.log(pc.blue('๐Ÿ”ง Generating lexicon types...')); 17 - 18 try { 19 if (!options.rustOnly) { 20 await generateTypeScript(workspaceRoot, options.force); 21 } 22 - 23 if (!options.tsOnly) { 24 await generateRust(workspaceRoot, options.force); 25 } 26 - 27 - console.log(pc.green('โœ… Lexicon generation complete!')); 28 } catch (error) { 29 - console.error(pc.red('โŒ Generation failed:'), error instanceof Error ? error.message : String(error)); 30 process.exit(1); 31 } 32 } 33 34 async function generateTypeScript(workspaceRoot: string, force?: boolean) { 35 - const lexiconsPath = join(workspaceRoot, 'lexicons'); 36 - 37 if (!existsSync(lexiconsPath)) { 38 - throw new Error('Lexicons directory not found at workspace root'); 39 } 40 - 41 // Check if packages/lexicons exists for TypeScript generation 42 - const packagesLexiconsPath = join(workspaceRoot, 'packages/lexicons'); 43 if (!existsSync(packagesLexiconsPath)) { 44 - console.log(pc.yellow(' โš ๏ธ TypeScript lexicons package not found, skipping TypeScript generation')); 45 return; 46 } 47 - 48 - console.log(pc.cyan(' ๐Ÿ“ฆ Generating TypeScript types...')); 49 - 50 try { 51 - await execa('pnpm', ['lex:gen-server'], { 52 cwd: packagesLexiconsPath, 53 - stdio: 'inherit' 54 }); 55 - console.log(pc.green(' โœ“ TypeScript types generated')); 56 } catch (error) { 57 - throw new Error(`TypeScript generation failed: ${error instanceof Error ? error.message : String(error)}`); 58 } 59 } 60 61 async function generateRust(workspaceRoot: string, force?: boolean) { 62 - const typesPath = join(workspaceRoot, 'services/types'); 63 - const lexiconsPath = join(workspaceRoot, 'lexicons'); 64 - 65 if (!existsSync(typesPath)) { 66 - throw new Error('Rust types service not found'); 67 } 68 - 69 if (!existsSync(lexiconsPath)) { 70 - throw new Error('Lexicons directory not found at workspace root'); 71 } 72 - 73 - console.log(pc.cyan(' ๐Ÿฆ€ Generating Rust types...')); 74 - 75 try { 76 // Check if esquema-cli is available 77 try { 78 - await execa('esquema-cli', ['--version'], { stdio: 'pipe' }); 79 } catch { 80 - console.log(pc.yellow(' โš ๏ธ esquema-cli not found. Installing...')); 81 try { 82 - await execa('cargo', [ 83 - 'install', 84 - 'esquema-cli', 85 - '--git', 86 - 'https://github.com/fatfingers23/esquema.git' 87 - ], { 88 - stdio: 'inherit' 89 - }); 90 - console.log(pc.green(' โœ“ esquema-cli installed successfully')); 91 } catch (installError) { 92 - throw new Error('Failed to install esquema-cli. Please install manually: cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git'); 93 } 94 } 95 - 96 - await execa('esquema-cli', [ 97 - 'generate', 98 - 'local', 99 - '--lexdir', 100 - lexiconsPath, 101 - '--outdir', 102 - join(typesPath, 'src') 103 - ], { 104 - cwd: typesPath, 105 - stdio: 'inherit' 106 - }); 107 - 108 - console.log(pc.green(' โœ“ Rust types generated')); 109 } catch (error) { 110 - throw new Error(`Rust generation failed: ${error instanceof Error ? error.message : String(error)}`); 111 } 112 - }
··· 1 + import { existsSync } from "fs"; 2 + import { join } from "path"; 3 + import { execa } from "execa"; 4 + import pc from "picocolors"; 5 + 6 + import { findWorkspaceRoot } from "../utils/workspace.js"; 7 8 interface GenerateOptions { 9 tsOnly?: boolean; ··· 13 14 export async function generate(options: GenerateOptions = {}) { 15 const workspaceRoot = findWorkspaceRoot(); 16 + 17 + console.log(pc.blue("๐Ÿ”ง Generating lexicon types...")); 18 + 19 try { 20 if (!options.rustOnly) { 21 await generateTypeScript(workspaceRoot, options.force); 22 } 23 + 24 if (!options.tsOnly) { 25 await generateRust(workspaceRoot, options.force); 26 } 27 + 28 + console.log(pc.green("โœ… Lexicon generation complete!")); 29 } catch (error) { 30 + console.error( 31 + pc.red("โŒ Generation failed:"), 32 + error instanceof Error ? error.message : String(error), 33 + ); 34 process.exit(1); 35 } 36 } 37 38 async function generateTypeScript(workspaceRoot: string, force?: boolean) { 39 + const lexiconsPath = join(workspaceRoot, "lexicons"); 40 + 41 if (!existsSync(lexiconsPath)) { 42 + throw new Error("Lexicons directory not found at workspace root"); 43 } 44 + 45 // Check if packages/lexicons exists for TypeScript generation 46 + const packagesLexiconsPath = join(workspaceRoot, "packages/lexicons"); 47 if (!existsSync(packagesLexiconsPath)) { 48 + console.log( 49 + pc.yellow( 50 + " โš ๏ธ TypeScript lexicons package not found, skipping TypeScript generation", 51 + ), 52 + ); 53 return; 54 } 55 + 56 + console.log(pc.cyan(" ๐Ÿ“ฆ Generating TypeScript types...")); 57 + 58 try { 59 + await execa("pnpm", ["lex:gen-server"], { 60 cwd: packagesLexiconsPath, 61 + stdio: "inherit", 62 }); 63 + console.log(pc.green(" โœ“ TypeScript types generated")); 64 } catch (error) { 65 + throw new Error( 66 + `TypeScript generation failed: ${error instanceof Error ? error.message : String(error)}`, 67 + ); 68 } 69 } 70 71 async function generateRust(workspaceRoot: string, force?: boolean) { 72 + const typesPath = join(workspaceRoot, "services/types"); 73 + const lexiconsPath = join(workspaceRoot, "lexicons"); 74 + 75 if (!existsSync(typesPath)) { 76 + throw new Error("Rust types service not found"); 77 } 78 + 79 if (!existsSync(lexiconsPath)) { 80 + throw new Error("Lexicons directory not found at workspace root"); 81 } 82 + 83 + console.log(pc.cyan(" ๐Ÿฆ€ Generating Rust types...")); 84 + 85 try { 86 // Check if esquema-cli is available 87 try { 88 + await execa("esquema-cli", ["--version"], { stdio: "pipe" }); 89 } catch { 90 + console.log(pc.yellow(" โš ๏ธ esquema-cli not found. Installing...")); 91 try { 92 + await execa( 93 + "cargo", 94 + [ 95 + "install", 96 + "esquema-cli", 97 + "--git", 98 + "https://github.com/fatfingers23/esquema.git", 99 + ], 100 + { 101 + stdio: "inherit", 102 + }, 103 + ); 104 + console.log(pc.green(" โœ“ esquema-cli installed successfully")); 105 } catch (installError) { 106 + throw new Error( 107 + "Failed to install esquema-cli. Please install manually: cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git", 108 + ); 109 } 110 } 111 + 112 + // create typespath/src if it doesn't exist 113 + if (!existsSync(join(typesPath, "src"))) { 114 + console.log(pc.yellow(" Creating src directory for Rust types...")); 115 + await execa("mkdir", ["-p", join(typesPath, "src")], { 116 + stdio: "inherit", 117 + }); 118 + } 119 + 120 + await execa( 121 + "esquema-cli", 122 + [ 123 + "generate", 124 + "local", 125 + "--lexdir", 126 + lexiconsPath, 127 + "--outdir", 128 + join(typesPath, "src"), 129 + ], 130 + { 131 + cwd: typesPath, 132 + stdio: "inherit", 133 + }, 134 + ); 135 + 136 + console.log(pc.green(" โœ“ Rust types generated")); 137 } catch (error) { 138 + throw new Error( 139 + `Rust generation failed: ${error instanceof Error ? error.message : String(error)}`, 140 + ); 141 } 142 + }
+44
tools/teal-cli/Cargo.toml
···
··· 1 + [package] 2 + name = "teal-cli" 3 + version = "0.1.0" 4 + edition = "2021" 5 + description = "A simple management tool for teal.fm AT Protocol services" 6 + 7 + [[bin]] 8 + name = "teal" 9 + path = "src/main.rs" 10 + 11 + [dependencies] 12 + # CLI framework 13 + clap = { version = "4.0", features = ["derive"] } 14 + anyhow = "1.0" 15 + serde = { version = "1.0", features = ["derive"] } 16 + serde_json = "1.0" 17 + 18 + # Async runtime 19 + tokio = { version = "1.0", features = [ 20 + "rt", 21 + "macros", 22 + "fs", 23 + "rt-multi-thread", 24 + ] } 25 + 26 + # Cryptography 27 + k256 = { version = "0.13", features = ["ecdsa"] } 28 + multibase = "0.9" 29 + hex = "0.4" 30 + rand = "0.8" 31 + 32 + # File system and paths 33 + dirs = "5.0" 34 + 35 + # Utilities 36 + chrono = { version = "0.4", features = ["serde"] } 37 + colored = "2.0" 38 + 39 + 40 + [features] 41 + default = [] 42 + 43 + [dev-dependencies] 44 + tempfile = "3.0"
+257
tools/teal-cli/README.md
···
··· 1 + # Teal CLI 2 + 3 + A comprehensive management tool for Teal AT Protocol services, featuring cryptographic key management and CAR (Content Addressable aRchive) file exploration. 4 + 5 + ## Installation 6 + 7 + From the project root: 8 + 9 + ```bash 10 + cargo build --release --bin teal 11 + ``` 12 + 13 + The binary will be available at `target/release/teal`. 14 + 15 + ## Usage 16 + 17 + ### CAR File Explorer 18 + 19 + Explore and analyze CAR files containing AT Protocol and Teal records. 20 + 21 + #### Fetch CAR file from the internet 22 + 23 + ```bash 24 + # Fetch from AT Protocol handle 25 + teal car fetch --identity alice.bsky.social 26 + 27 + # Fetch from DID 28 + teal car fetch --identity did:plc:vdjlpwlhbnug4fnjodwr3vzh 29 + 30 + # Fetch and save to specific file 31 + teal car fetch --identity mmatt.net --output mmatt.car 32 + 33 + # Fetch and immediately explore 34 + teal car fetch --identity mmatt.net --explore 35 + ``` 36 + 37 + #### Explore a CAR file 38 + 39 + ```bash 40 + # Basic exploration 41 + teal car explore --file path/to/archive.car 42 + 43 + # Verbose output with detailed information 44 + teal car explore --file path/to/archive.car --verbose 45 + ``` 46 + 47 + #### Search for specific content 48 + 49 + ```bash 50 + # Search for records containing "play" 51 + teal car search --file path/to/archive.car --query "play" 52 + 53 + # Search with verbose JSON output 54 + teal car search --file path/to/archive.car --query "queen" --verbose 55 + ``` 56 + 57 + #### Export Teal records to JSON 58 + 59 + ```bash 60 + # Export to default directory (./teal_exports) 61 + teal car export --file path/to/archive.car 62 + 63 + # Export to custom directory 64 + teal car export --file path/to/archive.car --output ./my_exports 65 + ``` 66 + 67 + ### Generate a new K256 key pair 68 + 69 + ```bash 70 + # Generate with default settings (saves to ~/.teal/keys/) 71 + teal gen-key 72 + 73 + # Generate with custom name 74 + teal gen-key --name production 75 + 76 + # Generate with custom output directory 77 + teal gen-key --output ./keys 78 + 79 + # Overwrite existing keys 80 + teal gen-key --force 81 + 82 + # Output only the multibase (useful for scripts) 83 + teal gen-key --format multibase 84 + 85 + # Output as JSON 86 + teal gen-key --format json 87 + ``` 88 + 89 + ### Extract public key from existing private key 90 + 91 + ```bash 92 + # Extract as multibase (default) 93 + teal extract-pubkey --private-key ./keys/repo.key 94 + 95 + # Extract as hex 96 + teal extract-pubkey --private-key ./keys/repo.key --format hex 97 + 98 + # Extract as JSON with both formats 99 + teal extract-pubkey --private-key ./keys/repo.key --format json 100 + ``` 101 + 102 + ### List available keys 103 + 104 + ```bash 105 + # List keys in default directory 106 + teal list 107 + 108 + # List keys in custom directory 109 + teal list --directory ./keys 110 + ``` 111 + 112 + ### Rotate keys (backup old, generate new) 113 + 114 + ```bash 115 + # Rotate the default 'repo' key 116 + teal rotate --name repo 117 + 118 + # Rotate with custom backup directory 119 + teal rotate --name repo --backup-dir ./backups 120 + ``` 121 + 122 + ## CAR File Analysis 123 + 124 + The CAR explorer can analyze AT Protocol archives and identify: 125 + 126 + - **Teal Records**: Music plays (`fm.teal.alpha.feed.play`), profiles (`fm.teal.alpha.actor.profile`), and status updates 127 + - **AT Protocol Records**: BlueSky posts, likes, follows, and other social data 128 + - **Commit Operations**: Repository changes and metadata 129 + - **IPLD Structure**: Content addressing and linking 130 + 131 + ### Example Output 132 + 133 + ``` 134 + ๐Ÿ“Š CAR Analysis Results 135 + ================================================== 136 + 137 + ๐Ÿ“ File Overview: 138 + File size: 10267026 bytes 139 + Total blocks: 30195 140 + Root CIDs: 1 141 + 142 + ๐Ÿ“‹ Record Types: 143 + app.bsky.feed.like: 11034 144 + app.bsky.feed.post: 7510 145 + fm.teal.alpha.feed.play: 2605 146 + fm.teal.alpha.actor.profile: 1 147 + 148 + ๐ŸŽต Teal Records Found: 149 + fm.teal.alpha.feed.play: 2605 150 + fm.teal.alpha.actor.profile: 1 151 + 152 + ๐Ÿ” Sample Teal Records: 153 + 1. fm.teal.alpha.feed.play (bafyreigmu...) 154 + ๐ŸŽต Track: Bohemian Rhapsody 155 + ๐ŸŽค Artists: Queen 156 + โฑ๏ธ Duration: 355000ms 157 + ``` 158 + 159 + ### Exported JSON Structure 160 + 161 + ```json 162 + [ 163 + { 164 + "cid": "bafyreigmuwliezhxczoxgxq5hjtsdzaj3jl54kg...", 165 + "data": { 166 + "$type": "fm.teal.alpha.feed.play", 167 + "track_name": "Bohemian Rhapsody", 168 + "artist_names": ["Queen"], 169 + "duration": 355000, 170 + "played_time": "2024-01-15T14:30:00Z" 171 + } 172 + } 173 + ] 174 + ``` 175 + 176 + ## Key Management 177 + 178 + The tool generates K256 (secp256k1) keys compatible with AT Protocol: 179 + 180 + - **Private Key**: 32-byte secp256k1 private key stored as binary 181 + - **Public Key**: Base58-encoded multibase of the compressed public key 182 + - **Default Location**: `~/.teal/keys/` 183 + 184 + ### File Structure 185 + 186 + ``` 187 + ~/.teal/keys/ 188 + โ”œโ”€โ”€ repo.key # Private key (32 bytes, binary) 189 + โ”œโ”€โ”€ repo.pub # Public key multibase (text) 190 + โ”œโ”€โ”€ production.key # Another private key 191 + โ””โ”€โ”€ production.pub # Another public key multibase 192 + ``` 193 + 194 + ## Integration 195 + 196 + Replace the hardcoded multibase in your DID document: 197 + 198 + ```rust 199 + // Before (hardcoded) 200 + "publicKeyMultibase": "z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK" 201 + 202 + // After (using generated key) 203 + let pubkey = std::fs::read_to_string("~/.teal/keys/repo.pub")?; 204 + // Use pubkey in your DID document 205 + ``` 206 + 207 + ## Examples 208 + 209 + ### CAR File Analysis 210 + 211 + ```bash 212 + # Fetch CAR file from a user's handle 213 + teal car fetch --identity mmatt.net --output mmatt.car 214 + 215 + # Fetch and immediately explore 216 + teal car fetch --identity alice.bsky.social --explore 217 + 218 + # Analyze a local CAR export 219 + teal car explore --file nat.car 220 + 221 + # Search for specific tracks 222 + teal car search --file nat.car --query "bohemian rhapsody" 223 + 224 + # Export all Teal records for data analysis 225 + teal car export --file nat.car --output ./music_data 226 + 227 + # View exported play records 228 + cat ./music_data/fm_teal_alpha_feed_play.json | jq '.[0]' 229 + ``` 230 + 231 + ### Quick setup 232 + 233 + ```bash 234 + # Generate a key for development 235 + teal gen-key --name dev 236 + 237 + # Get the multibase for your DID document 238 + teal extract-pubkey --private-key ~/.teal/keys/dev.key 239 + ``` 240 + 241 + ### Production deployment 242 + 243 + ```bash 244 + # Generate production keys in a secure location 245 + teal gen-key --name production --output /secure/keys 246 + 247 + # Extract multibase for configuration 248 + PUBKEY=$(teal extract-pubkey --private-key /secure/keys/production.key) 249 + echo "Public key: $PUBKEY" 250 + ``` 251 + 252 + ## Security Notes 253 + 254 + - Private keys are stored as raw 32-byte files with restrictive permissions (600 on Unix) 255 + - Keys are generated using cryptographically secure random number generation 256 + - Never commit private keys to version control 257 + - Consider using secure key management systems in production
+104
tools/teal-cli/rkey_example.md
···
··· 1 + # How to Extract rkey from AT Protocol CAR Files 2 + 3 + The **rkey** (record key) is not stored inside the IPLD record data itself. Instead, it's found in **commit operations** that map collection paths to record CIDs. 4 + 5 + ## AT Protocol Structure 6 + 7 + ``` 8 + Repository Structure: 9 + โ”œโ”€โ”€ Records (IPLD blocks) 10 + โ”‚ โ”œโ”€โ”€ bafyrei123... (actual play record data) 11 + โ”‚ โ”œโ”€โ”€ bafyrei456... (actual profile record data) 12 + โ”‚ โ””โ”€โ”€ bafyrei789... (actual post record data) 13 + โ””โ”€โ”€ Commits (IPLD blocks) 14 + โ”œโ”€โ”€ bafycommit1... (operations mapping paths to CIDs) 15 + โ””โ”€โ”€ bafycommit2... (more operations) 16 + ``` 17 + 18 + ## Example: Record IPLD (without rkey) 19 + 20 + ```json 21 + { 22 + "$type": "fm.teal.alpha.feed.play", 23 + "track_name": "Bohemian Rhapsody", 24 + "artist_names": ["Queen"], 25 + "duration": 355000, 26 + "played_time": "2024-01-15T14:30:00Z" 27 + } 28 + ``` 29 + 30 + **โŒ No rkey here!** The record contains the data but not its key. 31 + 32 + ## Example: Commit IPLD (with rkey mappings) 33 + 34 + ```json 35 + { 36 + "ops": [ 37 + { 38 + "action": "create", 39 + "path": "fm.teal.alpha.feed.play/3k2akjdlkjsf", // โ† collection/rkey 40 + "cid": "bafyrei123..." // โ† points to the record above 41 + }, 42 + { 43 + "action": "create", 44 + "path": "fm.teal.alpha.actor.profile/self", 45 + "cid": "bafyrei456..." 46 + } 47 + ], 48 + "prev": "bafyrei...", 49 + "rev": "3k2bkl...", 50 + "time": "2024-01-15T14:35:00Z" 51 + } 52 + ``` 53 + 54 + **โœ… rkey is here!** Extract it from the `path` field: `"3k2akjdlkjsf"` 55 + 56 + ## Extraction Algorithm 57 + 58 + ```rust 59 + fn extract_rkeys_from_commits(commits: &[CommitInfo]) -> HashMap<String, String> { 60 + let mut cid_to_rkey = HashMap::new(); 61 + 62 + for commit in commits { 63 + for operation in &commit.operations { 64 + // Path format: "collection/rkey" 65 + if let Some(rkey) = operation.path.split('/').last() { 66 + if let Some(ref record_cid) = operation.record_cid { 67 + cid_to_rkey.insert(record_cid.clone(), rkey.to_string()); 68 + } 69 + } 70 + } 71 + } 72 + 73 + cid_to_rkey 74 + } 75 + ``` 76 + 77 + ## Complete Example 78 + 79 + 1. **Find commit blocks** in CAR file 80 + 2. **Extract operations** from commit IPLD 81 + 3. **Parse paths** like `"fm.teal.alpha.feed.play/3k2akjdlkjsf"` 82 + 4. **Map CID โ†’ rkey**: `bafyrei123... โ†’ 3k2akjdlkjsf` 83 + 5. **Use rkey** when processing records 84 + 85 + ## Why This Matters 86 + 87 + The rkey is essential for: 88 + - **AT URI construction**: `at://did:plc:user123/fm.teal.alpha.feed.play/3k2akjdlkjsf` 89 + - **Record identity**: Uniquely identifies the record within the collection 90 + - **Data integrity**: Maintains proper AT Protocol addressing 91 + 92 + ## CLI Usage 93 + 94 + ```bash 95 + # Explore CAR file and show rkey extraction 96 + teal car explore --file archive.car --verbose 97 + 98 + # The verbose output will show: 99 + # ๐Ÿ”‘ rkey Extraction Examples: 100 + # 1. bafyrei123... โ†’ rkey: 3k2akjdlkjsf 101 + # 2. bafyrei456... โ†’ rkey: self 102 + ``` 103 + 104 + **Note**: Some CAR files may not contain commit operations with rkey mappings, especially if they're partial exports or contain only raw records without repository structure.
+116
tools/teal-cli/src/commands/dev.rs
···
··· 1 + use anyhow::Result; 2 + use colored::*; 3 + 4 + use crate::config::TealConfig; 5 + use crate::DevCommands; 6 + 7 + pub async fn run(cmd: DevCommands, config: &TealConfig) -> Result<()> { 8 + match cmd { 9 + DevCommands::Setup { 10 + skip_docker, 11 + skip_db, 12 + } => setup_dev_environment(skip_docker, skip_db, config).await, 13 + DevCommands::Clean { all } => clean_dev_artifacts(all).await, 14 + DevCommands::Dev { port, watch } => run_dev_server(port, watch, config).await, 15 + DevCommands::Seed { count, data_type } => generate_seed_data(count, data_type, config).await, 16 + } 17 + } 18 + 19 + async fn setup_dev_environment( 20 + skip_docker: bool, 21 + skip_db: bool, 22 + config: &TealConfig, 23 + ) -> Result<()> { 24 + println!("{} Setting up development environment...", "๐Ÿ› ๏ธ".blue()); 25 + println!(); 26 + 27 + if !skip_docker { 28 + println!("{} Docker Setup:", "๐Ÿณ".blue()); 29 + println!(" {} Checking Docker...", "โ€ข".bold()); 30 + 31 + // TODO: Check if Docker is installed and running 32 + println!(" {} Docker check not implemented", "โš ๏ธ".yellow()); 33 + println!(" {} Manually ensure Docker is running", "๐Ÿ’ก".blue()); 34 + println!(); 35 + } 36 + 37 + if !skip_db { 38 + println!("{} Database Setup:", "๐Ÿ—„๏ธ".blue()); 39 + println!(" {} Database URL: {}", "โ€ข".bold(), mask_db_url(&config.database.url)); 40 + 41 + // TODO: Run database initialization and migrations 42 + println!(" {} Database setup not implemented", "โš ๏ธ".yellow()); 43 + println!(" {} Run: teal database init", "๐Ÿ’ก".blue()); 44 + println!(" {} Run: teal database migrate", "๐Ÿ’ก".blue()); 45 + println!(); 46 + } 47 + 48 + println!("{} Keys Setup:", "๐Ÿ”".blue()); 49 + let key_path = config.get_key_path(&config.crypto.default_key_name); 50 + if key_path.exists() { 51 + println!(" {} Default key already exists", "โœ…".green()); 52 + } else { 53 + println!(" {} Generating default key...", "โ€ข".bold()); 54 + // TODO: Auto-generate key 55 + println!(" {} Run: teal crypto gen-key", "๐Ÿ’ก".blue()); 56 + } 57 + println!(); 58 + 59 + println!("{} Development environment setup complete!", "โœ…".green()); 60 + println!(); 61 + println!("{} Next steps:", "๐Ÿ’ก".yellow()); 62 + println!(" 1. teal crypto gen-key --name dev"); 63 + println!(" 2. teal database init"); 64 + println!(" 3. teal dev dev --watch"); 65 + 66 + Ok(()) 67 + } 68 + 69 + async fn clean_dev_artifacts(all: bool) -> Result<()> { 70 + println!("{} Cleaning development artifacts...", "๐Ÿงน".blue()); 71 + println!(); 72 + 73 + let mut cleaned_items = Vec::new(); 74 + 75 + // Clean logs 76 + if let Ok(entries) = std::fs::read_dir("logs") { 77 + let mut log_count = 0; 78 + for entry in entries.flatten() { 79 + if entry.path().extension().map_or(false, |ext| ext == "log") { 80 + // TODO: Actually delete log files 81 + log_count += 1; 82 + } 83 + } 84 + if log_count > 0 { 85 + cleaned_items.push(format!("{} log files", log_count)); 86 + } 87 + } 88 + 89 + // Clean temporary files 90 + if let Ok(entries) = std::fs::read_dir(".") { 91 + let mut temp_count = 0; 92 + for entry in entries.flatten() { 93 + let path = entry.path(); 94 + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { 95 + if name.starts_with("tmp_") || name.ends_with(".tmp") { 96 + temp_count += 1; 97 + } 98 + } 99 + } 100 + if temp_count > 0 { 101 + cleaned_items.push(format!("{} temporary files", temp_count)); 102 + } 103 + } 104 + 105 + if all { 106 + // Clean build artifacts 107 + cleaned_items.push("build artifacts".to_string()); 108 + println!(" {} Would clean: target/ directory", "โ€ข".bold()); 109 + 110 + // Clean Docker artifacts 111 + cleaned_items.push("Docker artifacts".to_string()); 112 + println!(" {} Would clean: Docker images and containers", "โ€ข".bold()); 113 + } 114 + 115 + if cleaned_items.is_empty() { 116 + println!("{} No artifacts to clean", "โ„น๏ธ".blue
+349
tools/teal-cli/src/crypto.rs
···
··· 1 + use anyhow::{Context, Result}; 2 + use colored::*; 3 + use k256::ecdsa::{SigningKey, VerifyingKey}; 4 + use k256::SecretKey; 5 + use multibase::Base; 6 + use rand::rngs::OsRng; 7 + use serde_json::json; 8 + use std::path::PathBuf; 9 + use tokio::fs; 10 + 11 + /// Generate a new K256 private key 12 + pub fn generate_private_key() -> SigningKey { 13 + SigningKey::random(&mut OsRng) 14 + } 15 + 16 + /// Load a private key from a file 17 + pub async fn load_private_key(path: &PathBuf) -> Result<SigningKey> { 18 + let key_bytes = fs::read(path) 19 + .await 20 + .with_context(|| format!("Failed to read private key from {:?}", path))?; 21 + 22 + if key_bytes.len() != 32 { 23 + anyhow::bail!( 24 + "Invalid private key length. Expected 32 bytes, got {}", 25 + key_bytes.len() 26 + ); 27 + } 28 + 29 + let secret_key = SecretKey::from_slice(&key_bytes).context("Failed to parse private key")?; 30 + 31 + Ok(SigningKey::from(secret_key)) 32 + } 33 + 34 + /// Save a private key to a file 35 + pub async fn save_private_key(key: &SigningKey, path: &PathBuf) -> Result<()> { 36 + let key_bytes = key.as_nonzero_scalar().to_bytes(); 37 + 38 + // Create parent directory if it doesn't exist 39 + if let Some(parent) = path.parent() { 40 + fs::create_dir_all(parent) 41 + .await 42 + .with_context(|| format!("Failed to create key directory: {:?}", parent))?; 43 + } 44 + 45 + fs::write(path, key_bytes) 46 + .await 47 + .with_context(|| format!("Failed to write private key to {:?}", path))?; 48 + 49 + // Set restrictive permissions on Unix systems 50 + #[cfg(unix)] 51 + { 52 + use std::os::unix::fs::PermissionsExt; 53 + let mut perms = fs::metadata(path).await?.permissions(); 54 + perms.set_mode(0o600); // rw------- 55 + fs::set_permissions(path, perms).await?; 56 + } 57 + 58 + Ok(()) 59 + } 60 + 61 + /// Convert a public key to AT Protocol compatible multibase format 62 + pub fn public_key_to_multibase(public_key: &VerifyingKey) -> Result<String> { 63 + // Get the compressed public key bytes (33 bytes) 64 + let public_key_bytes = public_key.to_encoded_point(true).as_bytes().to_vec(); 65 + 66 + // Encode as multibase with base58btc (z prefix) 67 + let multibase_string = multibase::encode(Base::Base58Btc, &public_key_bytes); 68 + 69 + Ok(multibase_string) 70 + } 71 + 72 + /// Generate a new key pair and save to files 73 + pub async fn generate_key( 74 + name: String, 75 + keys_dir: PathBuf, 76 + force: bool, 77 + format: String, 78 + ) -> Result<()> { 79 + let private_key_path = keys_dir.join(format!("{}.key", name)); 80 + let public_key_path = keys_dir.join(format!("{}.pub", name)); 81 + 82 + // Check if files already exist 83 + if !force && (private_key_path.exists() || public_key_path.exists()) { 84 + anyhow::bail!( 85 + "Key files already exist for '{}'. Use --force to overwrite.\n Private: {:?}\n Public: {:?}", 86 + name, 87 + private_key_path, 88 + public_key_path 89 + ); 90 + } 91 + 92 + println!( 93 + "{} Generating K256 key pair for '{}'...", 94 + "๐Ÿ”".blue(), 95 + name.bold() 96 + ); 97 + 98 + // Generate new private key 99 + let private_key = generate_private_key(); 100 + let public_key = private_key.verifying_key(); 101 + 102 + // Save private key 103 + save_private_key(&private_key, &private_key_path) 104 + .await 105 + .with_context(|| format!("Failed to save private key to {:?}", private_key_path))?; 106 + 107 + // Generate public key multibase 108 + let public_key_multibase = 109 + public_key_to_multibase(public_key).context("Failed to generate public key multibase")?; 110 + 111 + // Output based on format 112 + match format.as_str() { 113 + "json" => { 114 + let output = json!({ 115 + "keyName": name, 116 + "privateKeyPath": private_key_path, 117 + "publicKeyPath": public_key_path, 118 + "publicKeyMultibase": public_key_multibase, 119 + "publicKeyHex": hex::encode(public_key.to_encoded_point(false).as_bytes()), 120 + }); 121 + println!("{}", serde_json::to_string_pretty(&output)?); 122 + } 123 + "multibase" => { 124 + println!("{}", public_key_multibase); 125 + } 126 + _ => { 127 + // includes "files" 128 + // Save public key multibase to file 129 + fs::write(&public_key_path, &public_key_multibase) 130 + .await 131 + .with_context(|| format!("Failed to write public key to {:?}", public_key_path))?; 132 + 133 + println!("{} Key pair generated successfully!", "โœ…".green()); 134 + println!(" {} {}", "Name:".bold(), name); 135 + println!(" {} {:?}", "Private key:".bold(), private_key_path); 136 + println!(" {} {:?}", "Public key:".bold(), public_key_path); 137 + println!( 138 + " {} {}", 139 + "Multibase:".bold(), 140 + public_key_multibase.bright_blue() 141 + ); 142 + println!(); 143 + println!("{} Add this to your DID document:", "๐Ÿ’ก".yellow()); 144 + println!(" \"publicKeyMultibase\": \"{}\"", public_key_multibase); 145 + } 146 + } 147 + 148 + Ok(()) 149 + } 150 + 151 + /// Extract public key from private key file 152 + pub async fn extract_pubkey(private_key_path: PathBuf, format: String) -> Result<()> { 153 + println!( 154 + "{} Extracting public key from {:?}...", 155 + "๐Ÿ”".blue(), 156 + private_key_path 157 + ); 158 + 159 + let private_key = load_private_key(&private_key_path) 160 + .await 161 + .with_context(|| format!("Failed to load private key from {:?}", private_key_path))?; 162 + 163 + let public_key = private_key.verifying_key(); 164 + 165 + match format.as_str() { 166 + "multibase" => { 167 + let multibase = public_key_to_multibase(public_key)?; 168 + println!("{}", multibase); 169 + } 170 + "hex" => { 171 + let hex = hex::encode(public_key.to_encoded_point(false).as_bytes()); 172 + println!("{}", hex); 173 + } 174 + "compressed-hex" => { 175 + let hex = hex::encode(public_key.to_encoded_point(true).as_bytes()); 176 + println!("{}", hex); 177 + } 178 + "json" => { 179 + let multibase = public_key_to_multibase(public_key)?; 180 + let hex_uncompressed = hex::encode(public_key.to_encoded_point(false).as_bytes()); 181 + let hex_compressed = hex::encode(public_key.to_encoded_point(true).as_bytes()); 182 + 183 + let output = json!({ 184 + "publicKeyMultibase": multibase, 185 + "publicKeyHex": hex_uncompressed, 186 + "publicKeyHexCompressed": hex_compressed, 187 + }); 188 + println!("{}", serde_json::to_string_pretty(&output)?); 189 + } 190 + _ => { 191 + anyhow::bail!( 192 + "Invalid format '{}'. Use: multibase, hex, compressed-hex, or json", 193 + format 194 + ); 195 + } 196 + } 197 + 198 + Ok(()) 199 + } 200 + 201 + /// List available keys in directory 202 + pub async fn list_keys(keys_dir: PathBuf) -> Result<()> { 203 + if !keys_dir.exists() { 204 + println!("{} No keys directory found at {:?}", "โ„น๏ธ".blue(), keys_dir); 205 + println!("Run 'teal gen-key' to create your first key."); 206 + return Ok(()); 207 + } 208 + 209 + let mut keys = Vec::new(); 210 + let mut entries = fs::read_dir(&keys_dir).await?; 211 + 212 + while let Some(entry) = entries.next_entry().await? { 213 + let path = entry.path(); 214 + if let Some(extension) = path.extension() { 215 + if extension == "key" { 216 + if let Some(stem) = path.file_stem() { 217 + if let Some(name) = stem.to_str() { 218 + keys.push(name.to_string()); 219 + } 220 + } 221 + } 222 + } 223 + } 224 + 225 + if keys.is_empty() { 226 + println!("{} No keys found in {:?}", "โ„น๏ธ".blue(), keys_dir); 227 + println!("Run 'teal gen-key' to create your first key."); 228 + return Ok(()); 229 + } 230 + 231 + keys.sort(); 232 + 233 + println!("{} Available keys in {:?}:", "๐Ÿ”‘".blue(), keys_dir); 234 + println!(); 235 + 236 + let keys_count = keys.len(); 237 + 238 + for key_name in keys { 239 + let private_path = keys_dir.join(format!("{}.key", key_name)); 240 + let public_path = keys_dir.join(format!("{}.pub", key_name)); 241 + 242 + let mut status_parts = Vec::new(); 243 + 244 + if private_path.exists() { 245 + status_parts.push("private".green().to_string()); 246 + } 247 + 248 + if public_path.exists() { 249 + status_parts.push("public".cyan().to_string()); 250 + 251 + // Try to read and display the multibase 252 + if let Ok(multibase) = fs::read_to_string(&public_path).await { 253 + let multibase = multibase.trim(); 254 + println!( 255 + " {} {} ({})", 256 + "โ€ข".bold(), 257 + key_name.bold(), 258 + status_parts.join(", ") 259 + ); 260 + println!(" {}: {}", "Multibase".dimmed(), multibase.bright_blue()); 261 + } else { 262 + println!( 263 + " {} {} ({})", 264 + "โ€ข".bold(), 265 + key_name.bold(), 266 + status_parts.join(", ") 267 + ); 268 + } 269 + } else { 270 + println!( 271 + " {} {} ({})", 272 + "โ€ข".bold(), 273 + key_name.bold(), 274 + status_parts.join(", ") 275 + ); 276 + } 277 + 278 + // Show file modification times 279 + if let Ok(metadata) = fs::metadata(&private_path).await { 280 + if let Ok(modified) = metadata.modified() { 281 + let datetime = chrono::DateTime::<chrono::Local>::from(modified); 282 + println!( 283 + " {}: {}", 284 + "Created".dimmed(), 285 + datetime.format("%Y-%m-%d %H:%M:%S").to_string().dimmed() 286 + ); 287 + } 288 + } 289 + println!(); 290 + } 291 + 292 + println!( 293 + "{} Total: {} key(s)", 294 + "๐Ÿ“Š".blue(), 295 + keys_count.to_string().bold() 296 + ); 297 + 298 + Ok(()) 299 + } 300 + 301 + /// Rotate a key (backup old, generate new) 302 + pub async fn rotate_key( 303 + keys_dir: PathBuf, 304 + name: String, 305 + backup_dir: Option<PathBuf>, 306 + ) -> Result<()> { 307 + let private_key_path = keys_dir.join(format!("{}.key", name)); 308 + 309 + if !private_key_path.exists() { 310 + anyhow::bail!("Key '{}' does not exist in {:?}", name, keys_dir); 311 + } 312 + 313 + println!("{} Rotating key '{}'...", "๐Ÿ”„".blue(), name.bold()); 314 + 315 + // Backup existing key 316 + let backup_location = backup_dir.unwrap_or_else(|| keys_dir.join("backups")); 317 + 318 + fs::create_dir_all(&backup_location).await?; 319 + 320 + let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S"); 321 + let backup_private = backup_location.join(format!("{}_{}.key", name, timestamp)); 322 + let backup_public = backup_location.join(format!("{}_{}.pub", name, timestamp)); 323 + 324 + fs::copy(&private_key_path, &backup_private).await?; 325 + 326 + let public_key_path = keys_dir.join(format!("{}.pub", name)); 327 + if public_key_path.exists() { 328 + fs::copy(&public_key_path, &backup_public).await?; 329 + } 330 + 331 + println!("Backed up existing key to: {:?}", backup_private); 332 + 333 + // Generate new key 334 + let new_key = generate_private_key(); 335 + save_private_key(&new_key, &private_key_path).await?; 336 + 337 + // Save new public key multibase 338 + let public_key = new_key.verifying_key(); 339 + let multibase = public_key_to_multibase(public_key)?; 340 + fs::write(&public_key_path, &multibase).await?; 341 + 342 + println!("{} Key rotation completed!", "โœ…".green()); 343 + println!(" {} {}", "New multibase:".bold(), multibase.bright_blue()); 344 + println!(); 345 + println!("{} Update your DID document with:", "๐Ÿ’ก".yellow()); 346 + println!(" \"publicKeyMultibase\": \"{}\"", multibase); 347 + 348 + Ok(()) 349 + }
+102
tools/teal-cli/src/main.rs
···
··· 1 + use anyhow::Result; 2 + use clap::{Parser, Subcommand}; 3 + 4 + use std::path::PathBuf; 5 + 6 + mod crypto; 7 + 8 + #[derive(Parser)] 9 + #[command(name = "teal")] 10 + #[command(about = "Teal management utilities")] 11 + #[command(version = "0.1.0")] 12 + struct Cli { 13 + #[command(subcommand)] 14 + command: Commands, 15 + } 16 + 17 + #[derive(Subcommand)] 18 + enum Commands { 19 + /// Generate a new K256 key pair 20 + GenKey { 21 + /// Key name/identifier 22 + #[arg(short, long, default_value = "repo")] 23 + name: String, 24 + 25 + /// Output directory (defaults to ~/.teal/keys) 26 + #[arg(short, long)] 27 + output: Option<PathBuf>, 28 + 29 + /// Overwrite existing keys 30 + #[arg(short, long)] 31 + force: bool, 32 + 33 + /// Output format: json, multibase, or files 34 + #[arg(long, default_value = "files")] 35 + format: String, 36 + }, 37 + 38 + /// Extract public key multibase from private key 39 + ExtractPubkey { 40 + /// Path to private key file 41 + #[arg(short, long)] 42 + private_key: PathBuf, 43 + 44 + /// Output format 45 + #[arg(short, long, default_value = "multibase")] 46 + format: String, 47 + }, 48 + 49 + /// List available keys 50 + List { 51 + /// Keys directory (defaults to ~/.teal/keys) 52 + #[arg(short, long)] 53 + directory: Option<PathBuf>, 54 + }, 55 + 56 + /// Rotate keys (generate new, backup old) 57 + Rotate { 58 + /// Key name to rotate 59 + #[arg(short, long)] 60 + name: String, 61 + 62 + /// Backup directory 63 + #[arg(short, long)] 64 + backup_dir: Option<PathBuf>, 65 + }, 66 + } 67 + 68 + fn get_default_keys_dir() -> PathBuf { 69 + dirs::home_dir() 70 + .unwrap_or_else(|| PathBuf::from(".")) 71 + .join(".teal") 72 + .join("keys") 73 + } 74 + 75 + #[tokio::main] 76 + async fn main() -> Result<()> { 77 + let cli = Cli::parse(); 78 + 79 + match cli.command { 80 + Commands::GenKey { 81 + name, 82 + output, 83 + force, 84 + format, 85 + } => { 86 + let keys_dir = output.unwrap_or_else(get_default_keys_dir); 87 + crypto::generate_key(name, keys_dir, force, format).await 88 + } 89 + Commands::ExtractPubkey { 90 + private_key, 91 + format, 92 + } => crypto::extract_pubkey(private_key, format).await, 93 + Commands::List { directory } => { 94 + let keys_dir = directory.unwrap_or_else(get_default_keys_dir); 95 + crypto::list_keys(keys_dir).await 96 + } 97 + Commands::Rotate { name, backup_dir } => { 98 + let keys_dir = get_default_keys_dir(); 99 + crypto::rotate_key(keys_dir, name, backup_dir).await 100 + } 101 + } 102 + }
+13 -1
turbo.json
··· 23 }, 24 "lex:gen-server": { 25 "dependsOn": [], 26 - "outputs": ["./src/types/**"] 27 }, 28 "lex:gen": { 29 "dependsOn": [], ··· 43 }, 44 "db:migrate": { 45 "cache": false 46 } 47 } 48 }
··· 23 }, 24 "lex:gen-server": { 25 "dependsOn": [], 26 + "outputs": ["./src/**"] 27 }, 28 "lex:gen": { 29 "dependsOn": [], ··· 43 }, 44 "db:migrate": { 45 "cache": false 46 + }, 47 + "@teal/amethyst#build": { 48 + "dependsOn": ["@teal/lexicons#lex:gen-server"], 49 + "outputs": ["./build/**"] 50 + }, 51 + "@teal/amethyst#build:web": { 52 + "dependsOn": ["@teal/lexicons#lex:gen-server"], 53 + "outputs": ["./build/**"] 54 + }, 55 + "@teal/amethyst#build:ios": { 56 + "dependsOn": ["@teal/lexicons#lex:gen-server"], 57 + "outputs": ["./build/**"] 58 } 59 } 60 }