+51
.dockerignore
+51
.dockerignore
···
···
1
+
# Rust build artifacts
2
+
target/
3
+
**/target/
4
+
services/target/
5
+
apps/*/target/
6
+
7
+
# Node.js dependencies and build artifacts
8
+
node_modules/
9
+
**/node_modules/
10
+
.turbo/
11
+
**/.turbo/
12
+
build/
13
+
dist/
14
+
.next/
15
+
16
+
# Development and cache files
17
+
.gitignore
18
+
**/.DS_Store
19
+
*.log
20
+
*.tmp
21
+
*.temp
22
+
23
+
# IDE and editor files
24
+
.vscode/
25
+
.idea/
26
+
*.swp
27
+
*.swo
28
+
*~
29
+
30
+
# Environment and config files
31
+
.env
32
+
.env.local
33
+
.env.*.local
34
+
35
+
# Database files
36
+
*.db
37
+
*.sqlite
38
+
*.sqlite3
39
+
40
+
# Test coverage
41
+
coverage/
42
+
**/coverage/
43
+
44
+
# Temporary files
45
+
tmp/
46
+
temp/
47
+
48
+
# SQLx offline query cache
49
+
# Include workspace-level cache for monorepo builds
50
+
# Uncomment the line below if you want to force online compilation
51
+
# .sqlx/
+22
.env.development
+22
.env.development
···
···
1
+
# Test Database Environment Configuration
2
+
# This file provides database credentials for testing discriminant improvements
3
+
4
+
# Database Configuration
5
+
DB_USER=postgres
6
+
DB_PASSWORD=testpass123
7
+
DB_NAME=teal_test
8
+
9
+
# Docker Database URL (used by services in compose)
10
+
DOCKER_DB_URL=postgres://postgres:testpass123@postgres:5432/teal_test
11
+
12
+
# Local Database URL (used by migration tools and local testing)
13
+
DATABASE_URL=postgres://postgres:testpass123@localhost:5433/teal_test
14
+
15
+
# Redis Configuration (if needed)
16
+
REDIS_URL=redis://garnet:6379
17
+
18
+
# AT Protocol Configuration (placeholder for testing)
19
+
AT_PROTOCOL_JWT_SECRET=test-jwt-secret-for-development-only
20
+
21
+
# Client Configuration
22
+
CLIENT_ADDRESS=localhost
+10
-7
.env.template
+10
-7
.env.template
···
2
NODE_ENV=development
3
PORT=3000
4
HOST=0.0.0.0
5
-
PUBLIC_URL=A publicly accessible url for aqua
6
DB_USER=postgres
7
DB_PASSWORD=supersecurepassword123987
8
DB_NAME=teal
9
DATABASE_URL="postgresql://${DB_USER}:${DB_PASSWORD}@localhost:5432/${DB_NAME}"
10
DOCKER_DB_URL="postgresql://${DB_USER}:${DB_PASSWORD}@host.docker.internal:5432/${DB_NAME}"
11
-
#This is not currently being used fully so can just use this default pubkey for now
12
DID_WEB_PUBKEY=zQ3sheEnMKhEK87PSu4P2mjAevViqHcjKmgxBWsDQPjLRM9wP
13
-
CLIENT_ADDRESS=A publicly accessible host for amethyst like amethyst.teal.fm
14
-
PUBLIC_DID_WEB=did:web:{aqua's PUBLIC_URL goes here after did:web:}
15
16
-
#amethyst
17
-
EXPO_PUBLIC_DID_WEB=same as PUBLIC_DID_WEB
18
-
EXPO_PUBLIC_BASE_URL=same as CLIENT_ADDRESS but with http scheme like https://amethyst.teal.fm
···
2
NODE_ENV=development
3
PORT=3000
4
HOST=0.0.0.0
5
+
PUBLIC_URL= # A publicly accessible url for aqua
6
DB_USER=postgres
7
DB_PASSWORD=supersecurepassword123987
8
DB_NAME=teal
9
DATABASE_URL="postgresql://${DB_USER}:${DB_PASSWORD}@localhost:5432/${DB_NAME}"
10
DOCKER_DB_URL="postgresql://${DB_USER}:${DB_PASSWORD}@host.docker.internal:5432/${DB_NAME}"
11
+
# `cargo run --bin teal gen-key` to generate a new pubkey
12
DID_WEB_PUBKEY=zQ3sheEnMKhEK87PSu4P2mjAevViqHcjKmgxBWsDQPjLRM9wP
13
+
CLIENT_ADDRESS= # A publicly accessible host for amethyst like amethyst.teal.fm
14
+
PUBLIC_DID_WEB= # did:web:{aqua's PUBLIC_URL goes here after did:web:}
15
+
16
+
# amethyst
17
+
EXPO_PUBLIC_DID_WEB= # same as PUBLIC_DID_WEB
18
+
EXPO_PUBLIC_BASE_URL= # same as CLIENT_ADDRESS but with http scheme like https://amethyst.teal.fm
19
20
+
SQLX_OFFLINE=true
21
+
SQLX_OFFLINE_DIR="./.sqlx"
+201
.github/WORKFLOWS.md
+201
.github/WORKFLOWS.md
···
···
1
+
# GitHub Actions Workflows Documentation
2
+
3
+
This document describes the CI/CD workflows configured for the Teal project.
4
+
5
+
## Overview
6
+
7
+
The project uses GitHub Actions for continuous integration, deployment, and security scanning. The workflows are designed to handle a polyglot codebase with Rust services, Node.js packages, and a React Native application.
8
+
9
+
## Workflows
10
+
11
+
### ๐ง CI (`ci.yml`)
12
+
13
+
**Triggers:** Push/PR to `main` or `develop` branches
14
+
15
+
**Purpose:** Primary continuous integration workflow that runs tests, linting, and type checking.
16
+
17
+
**Jobs:**
18
+
- **rust-check**: Formats, lints (clippy), and tests all Rust code in both `services/` and `apps/`
19
+
- **node-check**: Type checking, linting, building, and testing Node.js packages
20
+
- **lexicon-check**: Validates lexicon files and ensures generated code is up to date
21
+
22
+
**Key Features:**
23
+
- Caches Rust and Node.js dependencies for faster builds
24
+
- Runs in parallel for optimal performance
25
+
- Fails fast if any check fails
26
+
27
+
### ๐ Aqua (`aqua.yml`)
28
+
29
+
**Triggers:** Push/PR to `main` with changes to `apps/aqua/**`
30
+
31
+
**Purpose:** Builds and pushes the Aqua Rust application Docker image.
32
+
33
+
**Features:**
34
+
- Multi-platform builds (linux/amd64, linux/arm64)
35
+
- Pushes to GitHub Container Registry (ghcr.io)
36
+
- Only pushes on main branch (not PRs)
37
+
- Uses GitHub Actions cache for Docker layers
38
+
39
+
### ๐ค Cadet (`cadet.yml`)
40
+
41
+
**Triggers:** Push/PR to `main` with changes to `services/cadet/**`
42
+
43
+
**Purpose:** Builds and pushes the Cadet Rust service Docker image.
44
+
45
+
**Features:**
46
+
- Multi-platform builds (linux/amd64, linux/arm64)
47
+
- Pushes to GitHub Container Registry (ghcr.io)
48
+
- Only pushes on main branch (not PRs)
49
+
- Uses GitHub Actions cache for Docker layers
50
+
51
+
### ๐ฎ Amethyst (`amethyst.yml`)
52
+
53
+
**Triggers:** Push/PR to `main` with changes to `apps/amethyst/**`
54
+
55
+
**Purpose:** Builds the React Native/Expo application for different platforms.
56
+
57
+
**Jobs:**
58
+
- **build-web**: Builds web version and uploads artifacts
59
+
- **build-ios**: Builds iOS version (only on main branch pushes, requires macOS runner)
60
+
- **lint-and-test**: Type checking and testing
61
+
62
+
**Features:**
63
+
- Generates lexicons before building
64
+
- Platform-specific builds
65
+
- Artifact uploads for build assets
66
+
67
+
### ๐ ๏ธ Services (`services.yml`)
68
+
69
+
**Triggers:** Push/PR to `main` with changes to `services/**`
70
+
71
+
**Purpose:** Dynamically detects and builds all services with Dockerfiles.
72
+
73
+
**Jobs:**
74
+
- **detect-services**: Scans for services with Dockerfiles
75
+
- **build-service**: Matrix build for each detected service
76
+
- **test-services**: Runs tests for all services
77
+
78
+
**Features:**
79
+
- Dynamic service detection
80
+
- Skips special directories (target, migrations, types, .sqlx)
81
+
- Per-service Docker caching
82
+
- Multi-platform builds
83
+
84
+
### ๐ Release (`release.yml`)
85
+
86
+
**Triggers:**
87
+
- Push to tags matching `v*`
88
+
- Manual workflow dispatch
89
+
90
+
**Purpose:** Creates GitHub releases and builds production Docker images.
91
+
92
+
**Jobs:**
93
+
- **create-release**: Creates GitHub release with changelog
94
+
- **build-and-release-aqua**: Builds and tags Aqua for release
95
+
- **build-and-release-cadet**: Builds and tags Cadet for release
96
+
- **release-other-services**: Builds other services (rocketman, satellite)
97
+
- **build-and-release-amethyst**: Builds Amethyst and uploads to release
98
+
99
+
**Features:**
100
+
- Automatic changelog extraction
101
+
- Production Docker tags (latest + version)
102
+
- Release artifact uploads
103
+
- Support for pre-releases (tags with `-`)
104
+
105
+
### ๐ Security (`security.yml`)
106
+
107
+
**Triggers:**
108
+
- Push/PR to `main` or `develop`
109
+
- Daily at 2 AM UTC
110
+
- Manual dispatch
111
+
112
+
**Purpose:** Comprehensive security scanning and vulnerability detection.
113
+
114
+
**Jobs:**
115
+
- **rust-security-audit**: Uses `cargo audit` for Rust dependencies
116
+
- **node-security-audit**: Uses `pnpm audit` for Node.js dependencies
117
+
- **codeql-analysis**: GitHub's semantic code analysis
118
+
- **docker-security-scan**: Trivy vulnerability scanning for Docker images
119
+
- **secrets-scan**: TruffleHog for secrets detection
120
+
121
+
**Features:**
122
+
- Fails on high/critical vulnerabilities
123
+
- SARIF upload for security tab integration
124
+
- Historical scanning with git history
125
+
126
+
## Configuration Files
127
+
128
+
### Dependabot (`dependabot.yml`)
129
+
130
+
Automated dependency updates for:
131
+
- **npm**: Weekly updates for Node.js dependencies
132
+
- **cargo**: Weekly updates for Rust dependencies (services + apps)
133
+
- **github-actions**: Weekly updates for workflow actions
134
+
- **docker**: Weekly updates for Docker base images
135
+
136
+
**Schedule:** Monday-Tuesday mornings, staggered to avoid conflicts
137
+
138
+
## Container Registry
139
+
140
+
All Docker images are pushed to GitHub Container Registry:
141
+
- `ghcr.io/[owner]/[repo]/aqua`
142
+
- `ghcr.io/[owner]/[repo]/cadet`
143
+
- `ghcr.io/[owner]/[repo]/[service-name]`
144
+
145
+
**Tags:**
146
+
- `latest`: Latest build from main branch
147
+
- `sha-[commit]`: Specific commit builds
148
+
- `v[version]`: Release builds
149
+
- `pr-[number]`: Pull request builds (for testing)
150
+
151
+
## Secrets and Permissions
152
+
153
+
**Required secrets:**
154
+
- `GITHUB_TOKEN`: Automatically provided (for registry access and releases)
155
+
156
+
**Permissions used:**
157
+
- `contents: read`: Read repository contents
158
+
- `packages: write`: Push to GitHub Container Registry
159
+
- `security-events: write`: Upload security scan results
160
+
- `actions: read`: Access workflow information
161
+
162
+
## Best Practices
163
+
164
+
1. **Path-based triggers**: Workflows only run when relevant files change
165
+
2. **Caching**: Aggressive caching for Rust, Node.js, and Docker layers
166
+
3. **Multi-platform**: Docker images built for amd64 and arm64
167
+
4. **Security-first**: Regular vulnerability scanning and secrets detection
168
+
5. **Fail-fast**: Early termination on critical issues
169
+
6. **Artifact preservation**: Build outputs stored for debugging/deployment
170
+
171
+
## Usage Examples
172
+
173
+
### Manual Release
174
+
```bash
175
+
# Tag and push for automatic release
176
+
git tag v1.0.0
177
+
git push origin v1.0.0
178
+
179
+
# Or use workflow dispatch in GitHub UI
180
+
```
181
+
182
+
### Local Development
183
+
```bash
184
+
# Run the same checks locally
185
+
pnpm rust:fmt
186
+
pnpm rust:clippy
187
+
pnpm typecheck
188
+
pnpm test
189
+
```
190
+
191
+
### Debugging Failed Builds
192
+
1. Check the Actions tab for detailed logs
193
+
2. Download artifacts from successful builds
194
+
3. Use the same commands locally with cached dependencies
195
+
196
+
## Maintenance
197
+
198
+
- **Weekly**: Review Dependabot PRs
199
+
- **Monthly**: Update action versions if not auto-updated
200
+
- **Quarterly**: Review and update security scanning tools
201
+
- **As needed**: Add new services to release workflow matrix
+77
.github/actions/setup/action.yml
+77
.github/actions/setup/action.yml
···
···
1
+
name: "Setup Teal Environment"
2
+
description: "Sets up the common environment for Teal builds including Node.js, Rust, pnpm, and lexicons"
3
+
4
+
inputs:
5
+
setup-rust:
6
+
description: "Whether to setup Rust toolchain"
7
+
required: false
8
+
default: "false"
9
+
rust-components:
10
+
description: 'Rust components to install (e.g., "rustfmt,clippy")'
11
+
required: false
12
+
default: "rustfmt,clippy"
13
+
setup-node:
14
+
description: "Whether to setup Node.js and pnpm"
15
+
required: false
16
+
default: "true"
17
+
node-version:
18
+
description: "Node.js version to use"
19
+
required: false
20
+
default: "20"
21
+
lexicons-only-rust:
22
+
description: "Generate only Rust lexicons"
23
+
required: false
24
+
default: "false"
25
+
cache-key-suffix:
26
+
description: "Additional suffix for cache keys"
27
+
required: false
28
+
default: ""
29
+
30
+
runs:
31
+
using: "composite"
32
+
steps:
33
+
- name: Setup lexicons
34
+
shell: bash
35
+
run: ./scripts/setup-lexicons.sh
36
+
37
+
- name: Install pnpm
38
+
if: inputs.setup-node == 'true'
39
+
uses: pnpm/action-setup@v4
40
+
41
+
- name: Setup Node.js
42
+
if: inputs.setup-node == 'true'
43
+
uses: actions/setup-node@v4
44
+
with:
45
+
node-version: ${{ inputs.node-version }}
46
+
cache: "pnpm"
47
+
48
+
- name: Install Node dependencies
49
+
if: inputs.setup-node == 'true'
50
+
shell: bash
51
+
run: pnpm install --frozen-lockfile
52
+
53
+
- name: Generate lexicons
54
+
if: inputs.setup-node == 'true'
55
+
shell: bash
56
+
run: |
57
+
cd tools/lexicon-cli && pnpm i && pnpm build && cd ..
58
+
if [ "${{ inputs.lexicons-only-rust }}" = "true" ]; then
59
+
pnpm lex:gen --rust-only
60
+
else
61
+
pnpm lex:gen
62
+
fi
63
+
64
+
- name: Install Rust toolchain
65
+
if: inputs.setup-rust == 'true'
66
+
uses: dtolnay/rust-toolchain@stable
67
+
with:
68
+
components: ${{ inputs.rust-components }}
69
+
70
+
- name: Cache Rust dependencies
71
+
if: inputs.setup-rust == 'true'
72
+
uses: Swatinem/rust-cache@v2
73
+
with:
74
+
workspaces: |
75
+
services
76
+
apps/aqua
77
+
key: ${{ inputs.cache-key-suffix }}
+91
.github/workflows/amethyst.yml
+91
.github/workflows/amethyst.yml
···
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: Build Amethyst
4
+
5
+
on:
6
+
push:
7
+
branches: [main]
8
+
paths:
9
+
- "apps/amethyst/**"
10
+
- "packages/**"
11
+
- "lexicons/**"
12
+
- "package.json"
13
+
- "pnpm-lock.yaml"
14
+
- ".github/workflows/amethyst.yml"
15
+
pull_request:
16
+
branches: [main]
17
+
paths:
18
+
- "apps/amethyst/**"
19
+
- "packages/**"
20
+
- "lexicons/**"
21
+
- "package.json"
22
+
- "pnpm-lock.yaml"
23
+
- ".github/workflows/amethyst.yml"
24
+
25
+
jobs:
26
+
build:
27
+
name: Build Amethyst
28
+
runs-on: ubuntu-latest
29
+
outputs:
30
+
build-cache-key: ${{ steps.cache-key.outputs.key }}
31
+
steps:
32
+
- name: Checkout repository
33
+
uses: actions/checkout@v4
34
+
35
+
- name: Setup environment
36
+
uses: ./.github/actions/setup
37
+
with:
38
+
setup-node: "true"
39
+
40
+
- name: Generate cache key
41
+
id: cache-key
42
+
run: echo "key=amethyst-build-${{ hashFiles('apps/amethyst/**', 'packages/**', 'lexicons/**') }}" >> $GITHUB_OUTPUT
43
+
44
+
- name: Build lex tool
45
+
run: cd tools/lexicon-cli && pnpm i && pnpm build
46
+
47
+
- name: Build web
48
+
run: pnpm turbo build:web --filter=@teal/amethyst
49
+
50
+
- name: Type check
51
+
run: pnpm turbo check-types --filter=@teal/amethyst
52
+
53
+
- name: Run tests
54
+
run: pnpm turbo test --filter=@teal/amethyst
55
+
56
+
- name: Upload web build artifacts
57
+
uses: actions/upload-artifact@v4
58
+
with:
59
+
name: amethyst-web-build
60
+
path: apps/amethyst/build/
61
+
retention-days: 7
62
+
63
+
build-ios:
64
+
name: Build iOS
65
+
runs-on: macos-latest
66
+
needs: build
67
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
68
+
steps:
69
+
- name: Checkout repository
70
+
uses: actions/checkout@v4
71
+
72
+
- name: Setup environment
73
+
uses: ./.github/actions/setup
74
+
with:
75
+
setup-node: "true"
76
+
77
+
- name: Build lex tool
78
+
run: cd tools/lexicon-cli && pnpm i && pnpm build
79
+
80
+
- name: Setup Expo CLI
81
+
run: npm install -g @expo/cli
82
+
83
+
- name: Build iOS
84
+
run: pnpm turbo build:ios --filter=@teal/amethyst
85
+
86
+
- name: Upload iOS build artifacts
87
+
uses: actions/upload-artifact@v4
88
+
with:
89
+
name: amethyst-ios-build
90
+
path: apps/amethyst/build/
91
+
retention-days: 7
+70
.github/workflows/aqua.yml
+70
.github/workflows/aqua.yml
···
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: Build and Push Aqua
4
+
5
+
on:
6
+
push:
7
+
branches:
8
+
- main
9
+
paths:
10
+
- "apps/aqua/**"
11
+
- "Cargo.toml"
12
+
- "Cargo.lock"
13
+
- ".github/workflows/aqua.yml"
14
+
pull_request:
15
+
branches:
16
+
- main
17
+
paths:
18
+
- "apps/aqua/**"
19
+
- "Cargo.toml"
20
+
- "Cargo.lock"
21
+
- ".github/workflows/aqua.yml"
22
+
23
+
env:
24
+
REGISTRY: ghcr.io
25
+
IMAGE_NAME: ${{ github.repository }}/aqua
26
+
27
+
jobs:
28
+
build-and-push:
29
+
runs-on: ubuntu-latest
30
+
permissions:
31
+
contents: read
32
+
packages: write
33
+
34
+
steps:
35
+
- name: Checkout repository
36
+
uses: actions/checkout@v4
37
+
38
+
- name: Log in to Container Registry
39
+
if: github.event_name != 'pull_request'
40
+
uses: docker/login-action@v3
41
+
with:
42
+
registry: ${{ env.REGISTRY }}
43
+
username: ${{ github.actor }}
44
+
password: ${{ secrets.GITHUB_TOKEN }}
45
+
46
+
- name: Extract metadata
47
+
id: meta
48
+
uses: docker/metadata-action@v5
49
+
with:
50
+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
51
+
tags: |
52
+
type=ref,event=branch
53
+
type=ref,event=pr
54
+
type=sha,prefix=sha-
55
+
type=raw,value=latest,enable={{is_default_branch}}
56
+
57
+
- name: Set up Docker Buildx
58
+
uses: docker/setup-buildx-action@v3
59
+
60
+
- name: Build and push Docker image
61
+
uses: docker/build-push-action@v5
62
+
with:
63
+
context: .
64
+
file: ./apps/aqua/Dockerfile
65
+
push: ${{ github.event_name != 'pull_request' }}
66
+
tags: ${{ steps.meta.outputs.tags }}
67
+
labels: ${{ steps.meta.outputs.labels }}
68
+
platforms: linux/amd64,linux/arm64
69
+
cache-from: type=gha
70
+
cache-to: type=gha,mode=max
+68
.github/workflows/cadet.yml
+68
.github/workflows/cadet.yml
···
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: Build and Push Cadet
4
+
5
+
on:
6
+
push:
7
+
branches: [main]
8
+
paths:
9
+
- "services/cadet/**"
10
+
- "Cargo.toml"
11
+
- "Cargo.lock"
12
+
- ".github/workflows/cadet.yml"
13
+
pull_request:
14
+
branches: [main]
15
+
paths:
16
+
- "services/cadet/**"
17
+
- "Cargo.toml"
18
+
- "Cargo.lock"
19
+
- ".github/workflows/cadet.yml"
20
+
21
+
env:
22
+
REGISTRY: ghcr.io
23
+
IMAGE_NAME: ${{ github.repository }}/cadet
24
+
25
+
jobs:
26
+
build-and-push:
27
+
runs-on: ubuntu-latest
28
+
permissions:
29
+
contents: read
30
+
packages: write
31
+
32
+
steps:
33
+
- name: Checkout repository
34
+
uses: actions/checkout@v4
35
+
36
+
- name: Log in to Container Registry
37
+
if: github.event_name != 'pull_request'
38
+
uses: docker/login-action@v3
39
+
with:
40
+
registry: ${{ env.REGISTRY }}
41
+
username: ${{ github.actor }}
42
+
password: ${{ secrets.GITHUB_TOKEN }}
43
+
44
+
- name: Extract metadata
45
+
id: meta
46
+
uses: docker/metadata-action@v5
47
+
with:
48
+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
49
+
tags: |
50
+
type=ref,event=branch
51
+
type=ref,event=pr
52
+
type=sha,prefix=sha-
53
+
type=raw,value=latest,enable={{is_default_branch}}
54
+
55
+
- name: Set up Docker Buildx
56
+
uses: docker/setup-buildx-action@v3
57
+
58
+
- name: Build and push Docker image
59
+
uses: docker/build-push-action@v5
60
+
with:
61
+
context: .
62
+
file: ./services/cadet/Dockerfile
63
+
push: ${{ github.event_name != 'pull_request' }}
64
+
tags: ${{ steps.meta.outputs.tags }}
65
+
labels: ${{ steps.meta.outputs.labels }}
66
+
platforms: linux/amd64,linux/arm64
67
+
cache-from: type=gha
68
+
cache-to: type=gha,mode=max
+217
.github/workflows/ci.yml
+217
.github/workflows/ci.yml
···
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: CI
4
+
5
+
on:
6
+
push:
7
+
branches: [main, develop]
8
+
pull_request:
9
+
branches: [main, develop]
10
+
11
+
env:
12
+
CARGO_TERM_COLOR: always
13
+
SQLX_OFFLINE: true
14
+
SQLX_OFFLINE_DIR: "./.sqlx"
15
+
16
+
jobs:
17
+
setup-and-build:
18
+
name: Setup and Build All
19
+
runs-on: ubuntu-latest
20
+
outputs:
21
+
rust-cache-key: ${{ steps.rust-cache.outputs.cache-hit }}
22
+
node-cache-key: ${{ steps.node-cache.outputs.cache-hit }}
23
+
steps:
24
+
- name: Checkout repository
25
+
uses: actions/checkout@v4
26
+
27
+
- name: Setup environment
28
+
uses: ./.github/actions/setup
29
+
with:
30
+
setup-rust: "true"
31
+
setup-node: "true"
32
+
cache-key-suffix: "ci-build"
33
+
34
+
- name: Setup SQLx offline files
35
+
run: ./scripts/setup-sqlx-offline.sh
36
+
37
+
- name: Build Node packages
38
+
run: pnpm build
39
+
40
+
- name: Build Rust services (x86_64)
41
+
run: |
42
+
cargo build --release --all-features
43
+
44
+
- name: Build Rust apps (x86_64)
45
+
run: |
46
+
cd apps/aqua
47
+
cargo build --release --all-features
48
+
49
+
- name: Collect executables (x86_64)
50
+
run: |
51
+
mkdir -p artifacts/x86_64
52
+
# Copy service executables
53
+
if [ -d "services/target/release" ]; then
54
+
find services/target/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/x86_64/ \;
55
+
fi
56
+
# Copy app executables
57
+
if [ -d "apps/aqua/target/release" ]; then
58
+
find apps/aqua/target/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/x86_64/ \;
59
+
fi
60
+
echo "x86_64 executables:"
61
+
ls -la artifacts/x86_64/ || echo "No executables found"
62
+
63
+
- name: Upload Node build artifacts
64
+
uses: actions/upload-artifact@v4
65
+
with:
66
+
name: node-builds
67
+
path: |
68
+
packages/*/dist/
69
+
apps/amethyst/build/
70
+
retention-days: 1
71
+
72
+
- name: Upload Rust build artifacts
73
+
uses: actions/upload-artifact@v4
74
+
with:
75
+
name: rust-builds-x86_64
76
+
path: |
77
+
artifacts/x86_64/
78
+
retention-days: 1
79
+
80
+
rust-cross-compile:
81
+
name: Cross-compile Rust
82
+
runs-on: ubuntu-latest
83
+
needs: setup-and-build
84
+
strategy:
85
+
matrix:
86
+
target: [aarch64-unknown-linux-gnu]
87
+
steps:
88
+
- name: Checkout repository
89
+
uses: actions/checkout@v4
90
+
91
+
- name: Setup environment
92
+
uses: ./.github/actions/setup
93
+
with:
94
+
setup-rust: "true"
95
+
setup-node: "true"
96
+
lexicons-only-rust: "true"
97
+
cache-key-suffix: "cross-${{ matrix.target }}"
98
+
99
+
- name: Setup SQLx offline files
100
+
run: ./scripts/setup-sqlx-offline.sh
101
+
102
+
- name: Install cross-compilation tools
103
+
run: |
104
+
cargo install cross --git https://github.com/cross-rs/cross
105
+
rustup target add ${{ matrix.target }}
106
+
# Set up environment for cross-compilation
107
+
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
108
+
echo "CROSS_NO_WARNINGS=0" >> $GITHUB_ENV
109
+
110
+
- name: Cross-compile services
111
+
run: |
112
+
cross build --release --all-features --target ${{ matrix.target }}
113
+
114
+
- name: Collect cross-compiled executables
115
+
run: |
116
+
mkdir -p artifacts/${{ matrix.target }}
117
+
# Copy service executables
118
+
if [ -d "services/target/${{ matrix.target }}/release" ]; then
119
+
find services/target/${{ matrix.target }}/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/${{ matrix.target }}/ \;
120
+
fi
121
+
# Copy app executables
122
+
if [ -d "apps/aqua/target/${{ matrix.target }}/release" ]; then
123
+
find apps/aqua/target/${{ matrix.target }}/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/${{ matrix.target }}/ \;
124
+
fi
125
+
echo "Cross-compiled executables for ${{ matrix.target }}:"
126
+
ls -la artifacts/${{ matrix.target }}/ || echo "No executables found"
127
+
128
+
- name: Upload cross-compiled artifacts
129
+
uses: actions/upload-artifact@v4
130
+
with:
131
+
name: rust-builds-${{ matrix.target }}
132
+
path: |
133
+
artifacts/${{ matrix.target }}/
134
+
retention-days: 1
135
+
136
+
# disabled b/c it's triggered on autogenerated content
137
+
# and can't find a way around it rn
138
+
139
+
# rust-quality:
140
+
# name: Rust Quality Checks
141
+
# runs-on: ubuntu-latest
142
+
# needs: setup-and-build
143
+
# steps:
144
+
# - name: Checkout repository
145
+
# uses: actions/checkout@v4
146
+
147
+
# - name: Setup environment
148
+
# uses: ./.github/actions/setup
149
+
# with:
150
+
# setup-rust: "true"
151
+
# setup-node: "true"
152
+
# lexicons-only-rust: "true"
153
+
# cache-key-suffix: "ci-build"
154
+
155
+
# - name: Setup SQLx offline files
156
+
# run: ./scripts/setup-sqlx-offline.sh
157
+
158
+
# # - name: Check Rust formatting
159
+
# # run: |
160
+
# # cargo fmt --all -- --check
161
+
162
+
# - name: Run Clippy
163
+
# run: |
164
+
# cargo clippy --all-targets --all-features --workspace --exclude types -- -D warnings
165
+
166
+
# - name: Run Rust tests
167
+
# run: |
168
+
# cargo test --all-features
169
+
170
+
# node-quality:
171
+
# name: Node.js Quality Checks
172
+
# runs-on: ubuntu-latest
173
+
# needs: setup-and-build
174
+
# steps:
175
+
# - name: Checkout repository
176
+
# uses: actions/checkout@v4
177
+
178
+
# - name: Setup environment
179
+
# uses: ./.github/actions/setup
180
+
# with:
181
+
# setup-node: "true"
182
+
# cache-key-suffix: "ci-build"
183
+
184
+
# - name: Download Node build artifacts
185
+
# uses: actions/download-artifact@v4
186
+
# with:
187
+
# name: node-builds
188
+
# path: .
189
+
190
+
# # - name: Type check
191
+
# # run: pnpm typecheck
192
+
193
+
# - name: Lint and format check
194
+
# run: pnpm fix --check
195
+
196
+
# - name: Run tests
197
+
# run: pnpm test
198
+
199
+
lexicon-validation:
200
+
name: Lexicon Validation
201
+
runs-on: ubuntu-latest
202
+
steps:
203
+
- name: Checkout repository
204
+
uses: actions/checkout@v4
205
+
206
+
- name: Setup environment
207
+
uses: ./.github/actions/setup
208
+
with:
209
+
setup-node: "true"
210
+
211
+
- name: Validate lexicons
212
+
run: pnpm lex:validate
213
+
214
+
- name: Check lexicon generation consistency
215
+
run: |
216
+
pnpm lex:gen
217
+
git diff --exit-code || (echo "Lexicon files are out of sync. Run 'pnpm lex:gen' locally." && exit 1)
+236
.github/workflows/release.yml
+236
.github/workflows/release.yml
···
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: Release
4
+
5
+
on:
6
+
push:
7
+
tags:
8
+
- "v*"
9
+
workflow_dispatch:
10
+
inputs:
11
+
tag:
12
+
description: "Release tag"
13
+
required: true
14
+
type: string
15
+
16
+
env:
17
+
REGISTRY: ghcr.io
18
+
CARGO_TERM_COLOR: always
19
+
SQLX_OFFLINE: true
20
+
21
+
jobs:
22
+
create-release:
23
+
name: Create Release
24
+
runs-on: ubuntu-latest
25
+
outputs:
26
+
release_id: ${{ steps.create_release.outputs.id }}
27
+
upload_url: ${{ steps.create_release.outputs.upload_url }}
28
+
tag: ${{ steps.tag.outputs.tag }}
29
+
steps:
30
+
- name: Checkout repository
31
+
uses: actions/checkout@v4
32
+
33
+
- name: Get tag name
34
+
id: tag
35
+
run: |
36
+
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
37
+
echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
38
+
else
39
+
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
40
+
fi
41
+
42
+
- name: Generate changelog
43
+
id: changelog
44
+
run: |
45
+
if [ -f "CHANGELOG.md" ]; then
46
+
# Extract changelog for this version
47
+
awk '/^## \[${{ steps.tag.outputs.tag }}\]/{flag=1; next} /^## \[/{flag=0} flag' CHANGELOG.md > release_notes.md
48
+
else
49
+
echo "Release ${{ steps.tag.outputs.tag }}" > release_notes.md
50
+
fi
51
+
52
+
- name: Create Release
53
+
id: create_release
54
+
uses: actions/create-release@v1
55
+
env:
56
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
57
+
with:
58
+
tag_name: ${{ steps.tag.outputs.tag }}
59
+
release_name: Release ${{ steps.tag.outputs.tag }}
60
+
body_path: release_notes.md
61
+
draft: false
62
+
prerelease: ${{ contains(steps.tag.outputs.tag, '-') }}
63
+
64
+
build-all:
65
+
name: Build All Artifacts
66
+
runs-on: ubuntu-latest
67
+
needs: create-release
68
+
outputs:
69
+
rust-artifacts: ${{ steps.upload-rust.outputs.artifact-id }}
70
+
node-artifacts: ${{ steps.upload-node.outputs.artifact-id }}
71
+
steps:
72
+
- name: Checkout repository
73
+
uses: actions/checkout@v4
74
+
75
+
- name: Setup environment
76
+
uses: ./.github/actions/setup
77
+
with:
78
+
setup-rust: "true"
79
+
setup-node: "true"
80
+
cache-key-suffix: "release-${{ needs.create-release.outputs.tag }}"
81
+
82
+
- name: Install cross-compilation tools
83
+
run: |
84
+
cargo install cross
85
+
rustup target add aarch64-unknown-linux-gnu
86
+
87
+
- name: Build Node.js artifacts
88
+
run: |
89
+
pnpm build
90
+
cd apps/amethyst && pnpm build
91
+
92
+
- name: Build Rust services (x86_64)
93
+
run: |
94
+
cd services
95
+
cargo build --release --all-features
96
+
97
+
- name: Build Rust services (aarch64)
98
+
run: |
99
+
cd services
100
+
cross build --release --all-features --target aarch64-unknown-linux-gnu
101
+
102
+
- name: Build Rust apps (x86_64)
103
+
run: |
104
+
cd apps/aqua
105
+
cargo build --release --all-features
106
+
107
+
- name: Build Rust apps (aarch64)
108
+
run: |
109
+
cd apps/aqua
110
+
cross build --release --all-features --target aarch64-unknown-linux-gnu
111
+
112
+
- name: Create Amethyst build archive
113
+
run: |
114
+
cd apps/amethyst
115
+
tar -czf amethyst-${{ needs.create-release.outputs.tag }}.tar.gz build/
116
+
117
+
- name: Upload Rust build artifacts
118
+
id: upload-rust
119
+
uses: actions/upload-artifact@v4
120
+
with:
121
+
name: rust-release-builds
122
+
path: |
123
+
target/release/
124
+
target/aarch64-unknown-linux-gnu/release/
125
+
apps/aqua/target/release/
126
+
apps/aqua/target/aarch64-unknown-linux-gnu/release/
127
+
retention-days: 7
128
+
129
+
- name: Upload Node build artifacts
130
+
id: upload-node
131
+
uses: actions/upload-artifact@v4
132
+
with:
133
+
name: node-release-builds
134
+
path: |
135
+
packages/*/dist/
136
+
apps/amethyst/build/
137
+
apps/amethyst/amethyst-${{ needs.create-release.outputs.tag }}.tar.gz
138
+
retention-days: 7
139
+
140
+
- name: Upload Amethyst build to release
141
+
uses: actions/upload-release-asset@v1
142
+
env:
143
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
144
+
with:
145
+
upload_url: ${{ needs.create-release.outputs.upload_url }}
146
+
asset_path: ./apps/amethyst/amethyst-${{ needs.create-release.outputs.tag }}.tar.gz
147
+
asset_name: amethyst-${{ needs.create-release.outputs.tag }}.tar.gz
148
+
asset_content_type: application/gzip
149
+
150
+
release-services:
151
+
name: Release Services
152
+
runs-on: ubuntu-latest
153
+
needs: [create-release, build-all]
154
+
permissions:
155
+
contents: read
156
+
packages: write
157
+
strategy:
158
+
matrix:
159
+
service:
160
+
- name: aqua
161
+
dockerfile: apps/aqua/Dockerfile
162
+
context: .
163
+
- name: cadet
164
+
dockerfile: services/cadet/Dockerfile
165
+
context: .
166
+
- name: rocketman
167
+
dockerfile: services/rocketman/Dockerfile
168
+
context: .
169
+
- name: satellite
170
+
dockerfile: services/satellite/Dockerfile
171
+
context: .
172
+
steps:
173
+
- name: Checkout repository
174
+
uses: actions/checkout@v4
175
+
176
+
- name: Check if service has Dockerfile
177
+
id: check
178
+
run: |
179
+
if [ -f "${{ matrix.service.dockerfile }}" ]; then
180
+
echo "has_dockerfile=true" >> $GITHUB_OUTPUT
181
+
echo "Service ${{ matrix.service.name }} has Dockerfile"
182
+
else
183
+
echo "has_dockerfile=false" >> $GITHUB_OUTPUT
184
+
echo "Service ${{ matrix.service.name }} does not have Dockerfile, skipping"
185
+
fi
186
+
187
+
- name: Setup environment
188
+
if: steps.check.outputs.has_dockerfile == 'true'
189
+
uses: ./.github/actions/setup
190
+
with:
191
+
setup-node: "true"
192
+
lexicons-only-rust: "true"
193
+
194
+
- name: Download build artifacts
195
+
if: steps.check.outputs.has_dockerfile == 'true'
196
+
uses: actions/download-artifact@v4
197
+
with:
198
+
name: rust-release-builds
199
+
path: .
200
+
201
+
- name: Log in to Container Registry
202
+
if: steps.check.outputs.has_dockerfile == 'true'
203
+
uses: docker/login-action@v3
204
+
with:
205
+
registry: ${{ env.REGISTRY }}
206
+
username: ${{ github.actor }}
207
+
password: ${{ secrets.GITHUB_TOKEN }}
208
+
209
+
- name: Extract metadata
210
+
if: steps.check.outputs.has_dockerfile == 'true'
211
+
id: meta
212
+
uses: docker/metadata-action@v5
213
+
with:
214
+
images: ${{ env.REGISTRY }}/${{ github.repository }}/${{ matrix.service.name }}
215
+
tags: |
216
+
type=raw,value=latest
217
+
type=raw,value=${{ needs.create-release.outputs.tag }}
218
+
219
+
- name: Set up Docker Buildx
220
+
if: steps.check.outputs.has_dockerfile == 'true'
221
+
uses: docker/setup-buildx-action@v3
222
+
223
+
- name: Build and push Docker image
224
+
if: steps.check.outputs.has_dockerfile == 'true'
225
+
uses: docker/build-push-action@v5
226
+
with:
227
+
context: ${{ matrix.service.context }}
228
+
file: ${{ matrix.service.dockerfile }}
229
+
push: true
230
+
tags: ${{ steps.meta.outputs.tags }}
231
+
labels: ${{ steps.meta.outputs.labels }}
232
+
platforms: linux/amd64,linux/arm64
233
+
cache-from: type=gha,scope=${{ matrix.service.name }}
234
+
cache-to: type=gha,mode=max,scope=${{ matrix.service.name }}
235
+
build-args: |
236
+
BUILDKIT_INLINE_CACHE=1
+4
-17
.gitignore
+4
-17
.gitignore
···
55
56
# generated lexicons
57
# js lexicons
58
-
*/**/lexicons
59
# rust lexicons (types :)))
60
-
*/**/types
61
62
# vendor directory for submodules
63
!vendor/
···
66
vendor/**/dist/
67
vendor/**/node_modules/
68
69
-
# lexicons directory structure
70
-
!lexicons/
71
-
# Track our custom lexicons
72
-
!lexicons/fm.teal.alpha/
73
-
!lexicons/fm.teal.alpha/**/*.json
74
-
# Track the symlinks to atproto lexicons
75
-
!lexicons/app
76
-
!lexicons/chat
77
-
!lexicons/com
78
-
!lexicons/tools
79
-
# But ignore any generated files within lexicons
80
-
lexicons/**/*.js
81
-
lexicons/**/*.d.ts
82
-
lexicons/**/dist/
83
-
lexicons/**/node_modules/
+126
.pre-commit-config.yaml
+126
.pre-commit-config.yaml
···
···
1
+
# Pre-commit configuration for Teal project
2
+
# Install with: pip install pre-commit && pre-commit install
3
+
# Run manually with: pre-commit run --all-files
4
+
5
+
repos:
6
+
# General file checks
7
+
- repo: https://github.com/pre-commit/pre-commit-hooks
8
+
rev: v4.6.0
9
+
hooks:
10
+
- id: trailing-whitespace
11
+
- id: end-of-file-fixer
12
+
- id: check-yaml
13
+
- id: check-json
14
+
- id: check-toml
15
+
- id: check-merge-conflict
16
+
- id: check-added-large-files
17
+
args: ["--maxkb=500"]
18
+
- id: mixed-line-ending
19
+
args: ["--fix=lf"]
20
+
21
+
# TypeScript/JavaScript formatting and linting
22
+
- repo: local
23
+
hooks:
24
+
- id: prettier
25
+
name: Prettier
26
+
entry: pnpm prettier --write
27
+
language: system
28
+
files: \.(ts|tsx|js|jsx|json|md|yaml|yml)$
29
+
pass_filenames: true
30
+
31
+
- id: biome-check
32
+
name: Biome Check
33
+
entry: pnpm biome check --apply
34
+
language: system
35
+
files: \.(ts|tsx|js|jsx)$
36
+
pass_filenames: false
37
+
38
+
# TypeScript check temporarily disabled due to vendor compilation issues
39
+
# - id: typescript-check
40
+
# name: TypeScript Check
41
+
# entry: pnpm typecheck
42
+
# language: system
43
+
# files: \.(ts|tsx)$
44
+
# pass_filenames: false
45
+
46
+
# Rust formatting and linting
47
+
- repo: local
48
+
hooks:
49
+
- id: cargo-fmt-services
50
+
name: Cargo Format (Services Workspace)
51
+
entry: bash -c 'cd services && cargo fmt'
52
+
language: system
53
+
files: services/.*\.rs$
54
+
pass_filenames: false
55
+
56
+
- id: cargo-clippy-services
57
+
name: Cargo Clippy (Services Workspace)
58
+
entry: bash -c 'cd services && cargo clippy -- -D warnings'
59
+
language: system
60
+
files: services/.*\.rs$
61
+
pass_filenames: false
62
+
63
+
- id: cargo-fmt-apps
64
+
name: Cargo Format (Apps)
65
+
entry: bash -c 'for dir in apps/*/; do if [ -f "$dir/Cargo.toml" ]; then cd "$dir" && cargo fmt && cd ../..; fi; done'
66
+
language: system
67
+
files: apps/.*\.rs$
68
+
pass_filenames: false
69
+
70
+
- id: cargo-clippy-apps
71
+
name: Cargo Clippy (Apps)
72
+
entry: bash -c 'for dir in apps/*/; do if [ -f "$dir/Cargo.toml" ]; then cd "$dir" && cargo clippy -- -D warnings && cd ../..; fi; done'
73
+
language: system
74
+
files: apps/.*\.rs$
75
+
pass_filenames: false
76
+
77
+
# Lexicon validation and generation
78
+
- repo: local
79
+
hooks:
80
+
- id: lexicon-validate
81
+
name: Validate Lexicons
82
+
entry: pnpm lex:validate
83
+
language: system
84
+
files: lexicons/.*\.json$
85
+
pass_filenames: false
86
+
87
+
- id: lexicon-generate
88
+
name: Generate Lexicons (files ignored by .gitignore)
89
+
entry: pnpm lex:gen-server
90
+
language: system
91
+
files: lexicons/.*\.json$
92
+
pass_filenames: false
93
+
always_run: false
94
+
95
+
# Optional: Additional checks
96
+
- repo: local
97
+
hooks:
98
+
- id: no-console-log
99
+
name: Check for console.log
100
+
entry: bash -c 'if grep -r "console\.log" --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" .; then echo "Found console.log statements. Please remove them."; exit 1; fi'
101
+
language: system
102
+
files: \.(ts|tsx|js|jsx)$
103
+
pass_filenames: false
104
+
105
+
- id: check-todos
106
+
name: Check for TODO/FIXME
107
+
entry: bash -c 'if grep -r -i "TODO\|FIXME" --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" --include="*.rs" .; then echo "Found TODO/FIXME comments. Consider addressing them."; fi'
108
+
language: system
109
+
files: \.(ts|tsx|js|jsx|rs)$
110
+
pass_filenames: false
111
+
verbose: true
112
+
113
+
# Global settings
114
+
default_language_version:
115
+
node: system
116
+
python: python3
117
+
118
+
# Skip certain hooks for specific file patterns
119
+
exclude: |
120
+
(?x)^(
121
+
vendor/.*|
122
+
node_modules/.*|
123
+
target/.*|
124
+
.git/.*|
125
+
.*\.lock$
126
+
)$
+46
.sqlx/query-00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75.json
+46
.sqlx/query-00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n ae1.name as synthetic_name,\n ae2.name as target_name,\n similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score,\n COUNT(ptae1.play_uri) as synthetic_plays,\n COUNT(ptae2.play_uri) as target_plays\n FROM artists_extended ae1\n CROSS JOIN artists_extended ae2\n LEFT JOIN play_to_artists_extended ptae1 ON ae1.id = ptae1.artist_id\n LEFT JOIN play_to_artists_extended ptae2 ON ae2.id = ptae2.artist_id\n WHERE ae1.id != ae2.id\n AND ae1.mbid_type = 'synthetic'\n AND ae2.mbid_type = 'musicbrainz'\n AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1\n GROUP BY ae1.id, ae1.name, ae2.id, ae2.name, similarity_score\n ORDER BY similarity_score DESC\n LIMIT 10\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "synthetic_name",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "target_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "similarity_score",
19
+
"type_info": "Float4"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "synthetic_plays",
24
+
"type_info": "Int8"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "target_plays",
29
+
"type_info": "Int8"
30
+
}
31
+
],
32
+
"parameters": {
33
+
"Left": [
34
+
"Float4"
35
+
]
36
+
},
37
+
"nullable": [
38
+
false,
39
+
false,
40
+
null,
41
+
null,
42
+
null
43
+
]
44
+
},
45
+
"hash": "00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75"
46
+
}
+12
.sqlx/query-0d7c3ef80c20dac6efd0fe3c430d7f41b1c90368ff99ce8a09f66bca63864d1e.json
+12
.sqlx/query-0d7c3ef80c20dac6efd0fe3c430d7f41b1c90368ff99ce8a09f66bca63864d1e.json
···
+35
.sqlx/query-0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273.json
+35
.sqlx/query-0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n pta.artist_mbid as mbid,\n pta.artist_name as name,\n COUNT(*) as play_count\n FROM plays p\n INNER JOIN play_to_artists pta ON p.uri = pta.play_uri\n WHERE p.did = $1\n AND pta.artist_mbid IS NOT NULL\n AND pta.artist_name IS NOT NULL\n GROUP BY pta.artist_mbid, pta.artist_name\n ORDER BY play_count DESC\n LIMIT $2\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "play_count",
19
+
"type_info": "Int8"
20
+
}
21
+
],
22
+
"parameters": {
23
+
"Left": [
24
+
"Text",
25
+
"Int8"
26
+
]
27
+
},
28
+
"nullable": [
29
+
false,
30
+
true,
31
+
null
32
+
]
33
+
},
34
+
"hash": "0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273"
35
+
}
+14
.sqlx/query-0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec.json
+14
.sqlx/query-0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM artists_extended WHERE id = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Int4"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec"
14
+
}
+112
.sqlx/query-0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1.json
+112
.sqlx/query-0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays\n LEFT JOIN play_to_artists as pta ON uri = pta.play_uri\n WHERE did = ANY($1)\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time desc\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "uri",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "did",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "rkey",
19
+
"type_info": "Text"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "cid",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "isrc",
29
+
"type_info": "Text"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "duration",
34
+
"type_info": "Int4"
35
+
},
36
+
{
37
+
"ordinal": 6,
38
+
"name": "track_name",
39
+
"type_info": "Text"
40
+
},
41
+
{
42
+
"ordinal": 7,
43
+
"name": "played_time",
44
+
"type_info": "Timestamptz"
45
+
},
46
+
{
47
+
"ordinal": 8,
48
+
"name": "processed_time",
49
+
"type_info": "Timestamptz"
50
+
},
51
+
{
52
+
"ordinal": 9,
53
+
"name": "release_mbid",
54
+
"type_info": "Uuid"
55
+
},
56
+
{
57
+
"ordinal": 10,
58
+
"name": "release_name",
59
+
"type_info": "Text"
60
+
},
61
+
{
62
+
"ordinal": 11,
63
+
"name": "recording_mbid",
64
+
"type_info": "Uuid"
65
+
},
66
+
{
67
+
"ordinal": 12,
68
+
"name": "submission_client_agent",
69
+
"type_info": "Text"
70
+
},
71
+
{
72
+
"ordinal": 13,
73
+
"name": "music_service_base_domain",
74
+
"type_info": "Text"
75
+
},
76
+
{
77
+
"ordinal": 14,
78
+
"name": "origin_url",
79
+
"type_info": "Text"
80
+
},
81
+
{
82
+
"ordinal": 15,
83
+
"name": "artists",
84
+
"type_info": "Json"
85
+
}
86
+
],
87
+
"parameters": {
88
+
"Left": [
89
+
"TextArray"
90
+
]
91
+
},
92
+
"nullable": [
93
+
false,
94
+
false,
95
+
false,
96
+
false,
97
+
true,
98
+
true,
99
+
false,
100
+
true,
101
+
true,
102
+
true,
103
+
true,
104
+
true,
105
+
true,
106
+
true,
107
+
true,
108
+
null
109
+
]
110
+
},
111
+
"hash": "0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1"
112
+
}
+22
.sqlx/query-193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7.json
+22
.sqlx/query-193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT extract_discriminant($1)",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "extract_discriminant",
9
+
"type_info": "Text"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Text"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7"
22
+
}
+14
.sqlx/query-1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c.json
+14
.sqlx/query-1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM releases WHERE mbid = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Uuid"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c"
14
+
}
+14
.sqlx/query-1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363.json
+14
.sqlx/query-1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM play_to_artists WHERE play_uri = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363"
14
+
}
+22
.sqlx/query-28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e.json
+22
.sqlx/query-28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT extract_edition_discriminant($1)",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "extract_edition_discriminant",
9
+
"type_info": "Text"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Text"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e"
22
+
}
+52
.sqlx/query-2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149.json
+52
.sqlx/query-2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n r1.mbid as release1_mbid,\n r1.name as release1_name,\n r2.mbid as release2_mbid,\n r2.name as release2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists\n FROM releases r1\n CROSS JOIN releases r2\n INNER JOIN plays p1 ON p1.release_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.release_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id -- Same artist\n AND (\n (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR\n (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))\n ) -- Same or no discriminants\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist\n ORDER BY similarity_score DESC, shared_artists DESC\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "release1_mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "release1_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "release2_mbid",
19
+
"type_info": "Uuid"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "release2_name",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "similarity_score",
29
+
"type_info": "Float4"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "shared_artists",
34
+
"type_info": "Int8"
35
+
}
36
+
],
37
+
"parameters": {
38
+
"Left": [
39
+
"Float4"
40
+
]
41
+
},
42
+
"nullable": [
43
+
false,
44
+
false,
45
+
false,
46
+
false,
47
+
null,
48
+
null
49
+
]
50
+
},
51
+
"hash": "2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149"
52
+
}
+14
.sqlx/query-2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a.json
+14
.sqlx/query-2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM play_to_artists_extended WHERE artist_id = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Int4"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a"
14
+
}
+12
.sqlx/query-3d84a9e1ed05846bc931eea9b90fd88cae8b636968af4bd2f9b1a9927d15379d.json
+12
.sqlx/query-3d84a9e1ed05846bc931eea9b90fd88cae8b636968af4bd2f9b1a9927d15379d.json
+22
.sqlx/query-413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0.json
+22
.sqlx/query-413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT COUNT(*) FROM plays WHERE recording_mbid = $1",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "count",
9
+
"type_info": "Int8"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0"
22
+
}
+14
.sqlx/query-5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1.json
+14
.sqlx/query-5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n DELETE FROM profiles WHERE did = $1\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1"
14
+
}
+112
.sqlx/query-651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0.json
+112
.sqlx/query-651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays\n LEFT JOIN play_to_artists as pta ON uri = pta.play_uri\n WHERE uri = $1\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time desc\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "uri",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "did",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "rkey",
19
+
"type_info": "Text"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "cid",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "isrc",
29
+
"type_info": "Text"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "duration",
34
+
"type_info": "Int4"
35
+
},
36
+
{
37
+
"ordinal": 6,
38
+
"name": "track_name",
39
+
"type_info": "Text"
40
+
},
41
+
{
42
+
"ordinal": 7,
43
+
"name": "played_time",
44
+
"type_info": "Timestamptz"
45
+
},
46
+
{
47
+
"ordinal": 8,
48
+
"name": "processed_time",
49
+
"type_info": "Timestamptz"
50
+
},
51
+
{
52
+
"ordinal": 9,
53
+
"name": "release_mbid",
54
+
"type_info": "Uuid"
55
+
},
56
+
{
57
+
"ordinal": 10,
58
+
"name": "release_name",
59
+
"type_info": "Text"
60
+
},
61
+
{
62
+
"ordinal": 11,
63
+
"name": "recording_mbid",
64
+
"type_info": "Uuid"
65
+
},
66
+
{
67
+
"ordinal": 12,
68
+
"name": "submission_client_agent",
69
+
"type_info": "Text"
70
+
},
71
+
{
72
+
"ordinal": 13,
73
+
"name": "music_service_base_domain",
74
+
"type_info": "Text"
75
+
},
76
+
{
77
+
"ordinal": 14,
78
+
"name": "origin_url",
79
+
"type_info": "Text"
80
+
},
81
+
{
82
+
"ordinal": 15,
83
+
"name": "artists",
84
+
"type_info": "Json"
85
+
}
86
+
],
87
+
"parameters": {
88
+
"Left": [
89
+
"Text"
90
+
]
91
+
},
92
+
"nullable": [
93
+
false,
94
+
false,
95
+
false,
96
+
false,
97
+
true,
98
+
true,
99
+
false,
100
+
true,
101
+
true,
102
+
true,
103
+
true,
104
+
true,
105
+
true,
106
+
true,
107
+
true,
108
+
null
109
+
]
110
+
},
111
+
"hash": "651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0"
112
+
}
+16
.sqlx/query-6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48.json
+16
.sqlx/query-6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n UPDATE play_to_artists_extended\n SET artist_id = $1, artist_name = $2\n WHERE artist_id = $3\n AND NOT EXISTS (\n SELECT 1 FROM play_to_artists_extended existing\n WHERE existing.play_uri = play_to_artists_extended.play_uri\n AND existing.artist_id = $1\n )\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Int4",
9
+
"Text",
10
+
"Int4"
11
+
]
12
+
},
13
+
"nullable": []
14
+
},
15
+
"hash": "6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48"
16
+
}
+52
.sqlx/query-6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56.json
+52
.sqlx/query-6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n ae1.id as synthetic_id,\n ae1.name as synthetic_name,\n ae2.id as target_id,\n ae2.name as target_name,\n ae2.mbid as target_mbid,\n similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score\n FROM artists_extended ae1\n CROSS JOIN artists_extended ae2\n WHERE ae1.id != ae2.id\n AND ae1.mbid_type = 'synthetic'\n AND ae2.mbid_type = 'musicbrainz'\n AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1\n ORDER BY similarity_score DESC\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "synthetic_id",
9
+
"type_info": "Int4"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "synthetic_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "target_id",
19
+
"type_info": "Int4"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "target_name",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "target_mbid",
29
+
"type_info": "Uuid"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "similarity_score",
34
+
"type_info": "Float4"
35
+
}
36
+
],
37
+
"parameters": {
38
+
"Left": [
39
+
"Float4"
40
+
]
41
+
},
42
+
"nullable": [
43
+
false,
44
+
false,
45
+
false,
46
+
false,
47
+
true,
48
+
null
49
+
]
50
+
},
51
+
"hash": "6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56"
52
+
}
+23
.sqlx/query-76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0.json
+23
.sqlx/query-76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'musicbrainz')\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n updated_at = NOW()\n RETURNING id;\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "id",
9
+
"type_info": "Int4"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid",
15
+
"Text"
16
+
]
17
+
},
18
+
"nullable": [
19
+
false
20
+
]
21
+
},
22
+
"hash": "76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0"
23
+
}
+29
.sqlx/query-7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85.json
+29
.sqlx/query-7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO plays (\n uri, cid, did, rkey, isrc, duration, track_name, played_time,\n processed_time, release_mbid, release_name, recording_mbid,\n submission_client_agent, music_service_base_domain, artist_names_raw,\n track_discriminant, release_discriminant\n ) VALUES (\n $1, $2, $3, $4, $5, $6, $7, $8,\n NOW(), $9, $10, $11, $12, $13, $14, $15, $16\n ) ON CONFLICT(uri) DO UPDATE SET\n isrc = EXCLUDED.isrc,\n duration = EXCLUDED.duration,\n track_name = EXCLUDED.track_name,\n played_time = EXCLUDED.played_time,\n processed_time = EXCLUDED.processed_time,\n release_mbid = EXCLUDED.release_mbid,\n release_name = EXCLUDED.release_name,\n recording_mbid = EXCLUDED.recording_mbid,\n submission_client_agent = EXCLUDED.submission_client_agent,\n music_service_base_domain = EXCLUDED.music_service_base_domain,\n artist_names_raw = EXCLUDED.artist_names_raw,\n track_discriminant = EXCLUDED.track_discriminant,\n release_discriminant = EXCLUDED.release_discriminant;\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Text",
10
+
"Text",
11
+
"Text",
12
+
"Text",
13
+
"Int4",
14
+
"Text",
15
+
"Timestamptz",
16
+
"Uuid",
17
+
"Text",
18
+
"Uuid",
19
+
"Text",
20
+
"Text",
21
+
"Jsonb",
22
+
"Text",
23
+
"Text"
24
+
]
25
+
},
26
+
"nullable": []
27
+
},
28
+
"hash": "7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85"
29
+
}
+16
.sqlx/query-7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10.json
+16
.sqlx/query-7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "UPDATE plays SET release_mbid = $1, release_name = $2 WHERE release_mbid = $3",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Uuid",
9
+
"Text",
10
+
"Uuid"
11
+
]
12
+
},
13
+
"nullable": []
14
+
},
15
+
"hash": "7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10"
16
+
}
+18
.sqlx/query-8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946.json
+18
.sqlx/query-8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO statii (uri, did, rkey, cid, record)\n VALUES ($1, $2, $3, $4, $5)\n ON CONFLICT (uri) DO UPDATE SET\n cid = EXCLUDED.cid,\n record = EXCLUDED.record,\n indexed_at = NOW();\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Text",
10
+
"Text",
11
+
"Text",
12
+
"Jsonb"
13
+
]
14
+
},
15
+
"nullable": []
16
+
},
17
+
"hash": "8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946"
18
+
}
+34
.sqlx/query-97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59.json
+34
.sqlx/query-97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n p.release_mbid as mbid,\n p.release_name as name,\n COUNT(*) as play_count\n FROM plays p\n WHERE p.release_mbid IS NOT NULL\n AND p.release_name IS NOT NULL\n GROUP BY p.release_mbid, p.release_name\n ORDER BY play_count DESC\n LIMIT $1\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "play_count",
19
+
"type_info": "Int8"
20
+
}
21
+
],
22
+
"parameters": {
23
+
"Left": [
24
+
"Int8"
25
+
]
26
+
},
27
+
"nullable": [
28
+
true,
29
+
true,
30
+
null
31
+
]
32
+
},
33
+
"hash": "97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59"
34
+
}
+12
.sqlx/query-9af33e4329198dee7814519573b63858eaf69f08ad2959d96ffee5c8387af0ba.json
+12
.sqlx/query-9af33e4329198dee7814519573b63858eaf69f08ad2959d96ffee5c8387af0ba.json
···
+16
.sqlx/query-9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3.json
+16
.sqlx/query-9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) VALUES\n ($1, $2, $3)\n ON CONFLICT (play_uri, artist_id) DO NOTHING;\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Int4",
10
+
"Text"
11
+
]
12
+
},
13
+
"nullable": []
14
+
},
15
+
"hash": "9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3"
16
+
}
+24
.sqlx/query-9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81.json
+24
.sqlx/query-9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO recordings (mbid, name, discriminant) VALUES ($1, $2, $3)\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n discriminant = COALESCE(EXCLUDED.discriminant, recordings.discriminant)\n RETURNING mbid;\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid",
15
+
"Text",
16
+
"Text"
17
+
]
18
+
},
19
+
"nullable": [
20
+
false
21
+
]
22
+
},
23
+
"hash": "9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81"
24
+
}
+14
.sqlx/query-9d4e872755f90087f64f116d8fee340218e09b40ab8f94b5d9d17b9c39bf3d4f.json
+14
.sqlx/query-9d4e872755f90087f64f116d8fee340218e09b40ab8f94b5d9d17b9c39bf3d4f.json
···
+22
.sqlx/query-ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd.json
+22
.sqlx/query-ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT generate_synthetic_mbid($1)",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "generate_synthetic_mbid",
9
+
"type_info": "Uuid"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Text"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd"
22
+
}
+35
.sqlx/query-af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3.json
+35
.sqlx/query-af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n p.release_mbid as mbid,\n p.release_name as name,\n COUNT(*) as play_count\n FROM plays p\n WHERE p.did = $1\n AND p.release_mbid IS NOT NULL\n AND p.release_name IS NOT NULL\n GROUP BY p.release_mbid, p.release_name\n ORDER BY play_count DESC\n LIMIT $2\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "play_count",
19
+
"type_info": "Int8"
20
+
}
21
+
],
22
+
"parameters": {
23
+
"Left": [
24
+
"Text",
25
+
"Int8"
26
+
]
27
+
},
28
+
"nullable": [
29
+
true,
30
+
true,
31
+
null
32
+
]
33
+
},
34
+
"hash": "af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3"
35
+
}
+46
.sqlx/query-b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453.json
+46
.sqlx/query-b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n r1.name as recording1_name,\n r2.name as recording2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists,\n STRING_AGG(DISTINCT ae.name, ', ') as artist_names\n FROM recordings r1\n CROSS JOIN recordings r2\n INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0\n ORDER BY similarity_score DESC\n LIMIT 5\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "recording1_name",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "recording2_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "similarity_score",
19
+
"type_info": "Float4"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "shared_artists",
24
+
"type_info": "Int8"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "artist_names",
29
+
"type_info": "Text"
30
+
}
31
+
],
32
+
"parameters": {
33
+
"Left": [
34
+
"Float4"
35
+
]
36
+
},
37
+
"nullable": [
38
+
false,
39
+
false,
40
+
null,
41
+
null,
42
+
null
43
+
]
44
+
},
45
+
"hash": "b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453"
46
+
}
+15
.sqlx/query-b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56.json
+15
.sqlx/query-b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "UPDATE artists_extended SET name = $1, updated_at = NOW() WHERE id = $2",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Int4"
10
+
]
11
+
},
12
+
"nullable": []
13
+
},
14
+
"hash": "b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56"
15
+
}
+65
.sqlx/query-b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837.json
+65
.sqlx/query-b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT\n p.avatar,\n p.banner,\n p.created_at,\n p.description,\n p.description_facets,\n p.did,\n p.display_name,\n s.record as status\n FROM profiles p\n LEFT JOIN statii s ON p.did = s.did AND s.rkey = 'self'\n WHERE (p.did = ANY($1))\n OR (p.handle = ANY($2))",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "avatar",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "banner",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "created_at",
19
+
"type_info": "Timestamptz"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "description",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "description_facets",
29
+
"type_info": "Jsonb"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "did",
34
+
"type_info": "Text"
35
+
},
36
+
{
37
+
"ordinal": 6,
38
+
"name": "display_name",
39
+
"type_info": "Text"
40
+
},
41
+
{
42
+
"ordinal": 7,
43
+
"name": "status",
44
+
"type_info": "Jsonb"
45
+
}
46
+
],
47
+
"parameters": {
48
+
"Left": [
49
+
"TextArray",
50
+
"TextArray"
51
+
]
52
+
},
53
+
"nullable": [
54
+
true,
55
+
true,
56
+
true,
57
+
true,
58
+
true,
59
+
false,
60
+
true,
61
+
true
62
+
]
63
+
},
64
+
"hash": "b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837"
65
+
}
+34
.sqlx/query-b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e.json
+34
.sqlx/query-b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n pta.artist_mbid as mbid,\n pta.artist_name as name,\n COUNT(*) as play_count\n FROM plays p\n INNER JOIN play_to_artists pta ON p.uri = pta.play_uri\n WHERE pta.artist_mbid IS NOT NULL\n AND pta.artist_name IS NOT NULL\n GROUP BY pta.artist_mbid, pta.artist_name\n ORDER BY play_count DESC\n LIMIT $1\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "play_count",
19
+
"type_info": "Int8"
20
+
}
21
+
],
22
+
"parameters": {
23
+
"Left": [
24
+
"Int8"
25
+
]
26
+
},
27
+
"nullable": [
28
+
false,
29
+
true,
30
+
null
31
+
]
32
+
},
33
+
"hash": "b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e"
34
+
}
+21
.sqlx/query-b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded.json
+21
.sqlx/query-b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO profiles (did, handle, display_name, description, description_facets, avatar, banner, created_at)\n VALUES ($1, $2, $3, $4, $5, $6, $7, $8)\n ON CONFLICT (did) DO UPDATE SET\n display_name = EXCLUDED.display_name,\n description = EXCLUDED.description,\n description_facets = EXCLUDED.description_facets,\n avatar = EXCLUDED.avatar,\n banner = EXCLUDED.banner,\n created_at = EXCLUDED.created_at;\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Text",
10
+
"Text",
11
+
"Text",
12
+
"Jsonb",
13
+
"Text",
14
+
"Text",
15
+
"Timestamptz"
16
+
]
17
+
},
18
+
"nullable": []
19
+
},
20
+
"hash": "b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded"
21
+
}
+22
.sqlx/query-bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224.json
+22
.sqlx/query-bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT COUNT(*) FROM plays WHERE release_mbid = $1",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "count",
9
+
"type_info": "Int8"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224"
22
+
}
+12
.sqlx/query-bf9c6d3bf0f9594ae1c02dc85c9887b747aaa5f0c3e67d9381c3867c4f67ae6d.json
+12
.sqlx/query-bf9c6d3bf0f9594ae1c02dc85c9887b747aaa5f0c3e67d9381c3867c4f67ae6d.json
···
+46
.sqlx/query-cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c.json
+46
.sqlx/query-cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n r1.name as release1_name,\n r2.name as release2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists,\n STRING_AGG(DISTINCT ae.name, ', ') as artist_names\n FROM releases r1\n CROSS JOIN releases r2\n INNER JOIN plays p1 ON p1.release_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.release_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0\n ORDER BY similarity_score DESC\n LIMIT 5\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "release1_name",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "release2_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "similarity_score",
19
+
"type_info": "Float4"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "shared_artists",
24
+
"type_info": "Int8"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "artist_names",
29
+
"type_info": "Text"
30
+
}
31
+
],
32
+
"parameters": {
33
+
"Left": [
34
+
"Float4"
35
+
]
36
+
},
37
+
"nullable": [
38
+
false,
39
+
false,
40
+
null,
41
+
null,
42
+
null
43
+
]
44
+
},
45
+
"hash": "cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c"
46
+
}
+15
.sqlx/query-cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2.json
+15
.sqlx/query-cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "UPDATE plays SET recording_mbid = $1 WHERE recording_mbid = $2",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Uuid",
9
+
"Uuid"
10
+
]
11
+
},
12
+
"nullable": []
13
+
},
14
+
"hash": "cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2"
15
+
}
+14
.sqlx/query-d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45.json
+14
.sqlx/query-d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM recordings WHERE mbid = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Uuid"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45"
14
+
}
+14
.sqlx/query-d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961.json
+14
.sqlx/query-d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n DELETE FROM statii WHERE uri = $1\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961"
14
+
}
+112
.sqlx/query-f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0.json
+112
.sqlx/query-f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays p\n LEFT JOIN play_to_artists as pta ON p.uri = pta.play_uri\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time DESC\n LIMIT $1\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "uri",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "did",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "rkey",
19
+
"type_info": "Text"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "cid",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "isrc",
29
+
"type_info": "Text"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "duration",
34
+
"type_info": "Int4"
35
+
},
36
+
{
37
+
"ordinal": 6,
38
+
"name": "track_name",
39
+
"type_info": "Text"
40
+
},
41
+
{
42
+
"ordinal": 7,
43
+
"name": "played_time",
44
+
"type_info": "Timestamptz"
45
+
},
46
+
{
47
+
"ordinal": 8,
48
+
"name": "processed_time",
49
+
"type_info": "Timestamptz"
50
+
},
51
+
{
52
+
"ordinal": 9,
53
+
"name": "release_mbid",
54
+
"type_info": "Uuid"
55
+
},
56
+
{
57
+
"ordinal": 10,
58
+
"name": "release_name",
59
+
"type_info": "Text"
60
+
},
61
+
{
62
+
"ordinal": 11,
63
+
"name": "recording_mbid",
64
+
"type_info": "Uuid"
65
+
},
66
+
{
67
+
"ordinal": 12,
68
+
"name": "submission_client_agent",
69
+
"type_info": "Text"
70
+
},
71
+
{
72
+
"ordinal": 13,
73
+
"name": "music_service_base_domain",
74
+
"type_info": "Text"
75
+
},
76
+
{
77
+
"ordinal": 14,
78
+
"name": "origin_url",
79
+
"type_info": "Text"
80
+
},
81
+
{
82
+
"ordinal": 15,
83
+
"name": "artists",
84
+
"type_info": "Json"
85
+
}
86
+
],
87
+
"parameters": {
88
+
"Left": [
89
+
"Int8"
90
+
]
91
+
},
92
+
"nullable": [
93
+
false,
94
+
false,
95
+
false,
96
+
false,
97
+
true,
98
+
true,
99
+
false,
100
+
true,
101
+
true,
102
+
true,
103
+
true,
104
+
true,
105
+
true,
106
+
true,
107
+
true,
108
+
null
109
+
]
110
+
},
111
+
"hash": "f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0"
112
+
}
+23
.sqlx/query-f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734.json
+23
.sqlx/query-f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'synthetic')\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n updated_at = NOW()\n RETURNING id;\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "id",
9
+
"type_info": "Int4"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid",
15
+
"Text"
16
+
]
17
+
},
18
+
"nullable": [
19
+
false
20
+
]
21
+
},
22
+
"hash": "f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734"
23
+
}
+24
.sqlx/query-f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672.json
+24
.sqlx/query-f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO releases (mbid, name, discriminant) VALUES ($1, $2, $3)\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n discriminant = COALESCE(EXCLUDED.discriminant, releases.discriminant)\n RETURNING mbid;\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid",
15
+
"Text",
16
+
"Text"
17
+
]
18
+
},
19
+
"nullable": [
20
+
false
21
+
]
22
+
},
23
+
"hash": "f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672"
24
+
}
+28
.sqlx/query-fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7.json
+28
.sqlx/query-fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n ae.id,\n ae.name\n FROM artists_extended ae\n WHERE ae.mbid_type = 'musicbrainz'\n AND (\n LOWER(TRIM(ae.name)) = $1\n OR LOWER(TRIM(ae.name)) LIKE '%' || $1 || '%'\n OR $1 LIKE '%' || LOWER(TRIM(ae.name)) || '%'\n OR similarity(LOWER(TRIM(ae.name)), $1) > 0.6\n )\n ORDER BY similarity(LOWER(TRIM(ae.name)), $1) DESC\n LIMIT 10\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "id",
9
+
"type_info": "Int4"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
}
16
+
],
17
+
"parameters": {
18
+
"Left": [
19
+
"Text"
20
+
]
21
+
},
22
+
"nullable": [
23
+
false,
24
+
false
25
+
]
26
+
},
27
+
"hash": "fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7"
28
+
}
+52
.sqlx/query-ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0.json
+52
.sqlx/query-ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0.json
···
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n r1.mbid as recording1_mbid,\n r1.name as recording1_name,\n r2.mbid as recording2_mbid,\n r2.name as recording2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists\n FROM recordings r1\n CROSS JOIN recordings r2\n INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id -- Same artist\n AND (\n (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR\n (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))\n ) -- Same or no discriminants\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist\n ORDER BY similarity_score DESC, shared_artists DESC\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "recording1_mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "recording1_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "recording2_mbid",
19
+
"type_info": "Uuid"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "recording2_name",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "similarity_score",
29
+
"type_info": "Float4"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "shared_artists",
34
+
"type_info": "Int8"
35
+
}
36
+
],
37
+
"parameters": {
38
+
"Left": [
39
+
"Float4"
40
+
]
41
+
},
42
+
"nullable": [
43
+
false,
44
+
false,
45
+
false,
46
+
false,
47
+
null,
48
+
null
49
+
]
50
+
},
51
+
"hash": "ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0"
52
+
}
+616
-241
Cargo.lock
+616
-241
Cargo.lock
···
121
dependencies = [
122
"anyhow",
123
"async-trait",
124
"atrium-api",
125
"axum",
126
-
"base64",
127
"chrono",
128
"clap",
129
"dotenvy",
···
165
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
166
167
[[package]]
168
name = "async-lock"
169
version = "3.4.0"
170
source = "registry+https://github.com/rust-lang/crates.io-index"
···
187
]
188
189
[[package]]
190
name = "atoi"
191
version = "2.0.0"
192
source = "registry+https://github.com/rust-lang/crates.io-index"
···
210
"atrium-common",
211
"atrium-xrpc",
212
"chrono",
213
-
"http",
214
"ipld-core",
215
"langtag",
216
"regex",
···
243
source = "registry+https://github.com/rust-lang/crates.io-index"
244
checksum = "0216ad50ce34e9ff982e171c3659e65dedaa2ed5ac2994524debdc9a9647ffa8"
245
dependencies = [
246
-
"http",
247
"serde",
248
"serde_html_form",
249
"serde_json",
···
259
260
[[package]]
261
name = "aws-lc-rs"
262
-
version = "1.13.2"
263
source = "registry+https://github.com/rust-lang/crates.io-index"
264
-
checksum = "08b5d4e069cbc868041a64bd68dc8cb39a0d79585cd6c5a24caa8c2d622121be"
265
dependencies = [
266
"aws-lc-sys",
267
"zeroize",
···
291
"bytes",
292
"form_urlencoded",
293
"futures-util",
294
-
"http",
295
"http-body",
296
"http-body-util",
297
"hyper",
···
324
dependencies = [
325
"bytes",
326
"futures-core",
327
-
"http",
328
"http-body",
329
"http-body-util",
330
"mime",
···
348
]
349
350
[[package]]
351
name = "backtrace"
352
version = "0.3.75"
353
source = "registry+https://github.com/rust-lang/crates.io-index"
···
369
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
370
371
[[package]]
372
name = "base64"
373
version = "0.22.1"
374
source = "registry+https://github.com/rust-lang/crates.io-index"
···
397
"proc-macro2",
398
"quote",
399
"regex",
400
-
"rustc-hash",
401
"shlex",
402
"syn 2.0.104",
403
"which",
···
504
version = "1.10.1"
505
source = "registry+https://github.com/rust-lang/crates.io-index"
506
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
507
508
[[package]]
509
name = "cadet"
···
511
dependencies = [
512
"anyhow",
513
"async-trait",
514
"atrium-api",
515
-
"base64",
516
"chrono",
517
"cid 0.11.1",
518
"dotenvy",
519
"flume",
520
"iroh-car",
521
"libipld",
522
"metrics 0.23.1",
···
528
"reqwest",
529
"rocketman",
530
"serde",
531
"serde_json",
532
"sqlx",
533
"time",
534
"tokio",
535
-
"tokio-tungstenite",
536
"tracing",
537
"tracing-subscriber",
538
"types",
···
583
584
[[package]]
585
name = "cc"
586
-
version = "1.2.30"
587
source = "registry+https://github.com/rust-lang/crates.io-index"
588
-
checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
589
dependencies = [
590
"jobserver",
591
"libc",
···
608
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
609
610
[[package]]
611
name = "chrono"
612
version = "0.4.41"
613
source = "registry+https://github.com/rust-lang/crates.io-index"
···
662
663
[[package]]
664
name = "clap"
665
-
version = "4.5.41"
666
source = "registry+https://github.com/rust-lang/crates.io-index"
667
-
checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
668
dependencies = [
669
"clap_builder",
670
"clap_derive",
···
672
673
[[package]]
674
name = "clap_builder"
675
-
version = "4.5.41"
676
source = "registry+https://github.com/rust-lang/crates.io-index"
677
-
checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
678
dependencies = [
679
"anstream",
680
"anstyle",
···
716
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
717
718
[[package]]
719
name = "combine"
720
version = "4.6.7"
721
source = "registry+https://github.com/rust-lang/crates.io-index"
···
810
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
811
812
[[package]]
813
name = "crossbeam-channel"
814
version = "0.5.15"
815
source = "registry+https://github.com/rust-lang/crates.io-index"
···
841
version = "0.8.21"
842
source = "registry+https://github.com/rust-lang/crates.io-index"
843
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
844
845
[[package]]
846
name = "crypto-common"
···
1026
]
1027
1028
[[package]]
1029
name = "displaydoc"
1030
version = "0.2.5"
1031
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1049
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
1050
1051
[[package]]
1052
name = "either"
1053
version = "1.15.0"
1054
source = "registry+https://github.com/rust-lang/crates.io-index"
1055
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
1056
dependencies = [
1057
"serde",
1058
]
1059
1060
[[package]]
···
1121
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
1122
1123
[[package]]
1124
name = "flume"
1125
version = "0.11.1"
1126
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1145
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
1146
1147
[[package]]
1148
-
name = "foreign-types"
1149
-
version = "0.3.2"
1150
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1151
-
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
1152
-
dependencies = [
1153
-
"foreign-types-shared",
1154
-
]
1155
-
1156
-
[[package]]
1157
-
name = "foreign-types-shared"
1158
-
version = "0.1.1"
1159
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1160
-
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
1161
-
1162
-
[[package]]
1163
name = "form_urlencoded"
1164
version = "1.2.1"
1165
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1296
dependencies = [
1297
"typenum",
1298
"version_check",
1299
]
1300
1301
[[package]]
···
1318
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
1319
dependencies = [
1320
"cfg-if",
1321
"libc",
1322
"r-efi",
1323
"wasi 0.14.2+wasi-0.2.4",
1324
]
1325
1326
[[package]]
···
1336
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
1337
1338
[[package]]
1339
name = "h2"
1340
version = "0.4.11"
1341
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1346
"fnv",
1347
"futures-core",
1348
"futures-sink",
1349
-
"http",
1350
"indexmap",
1351
"slab",
1352
"tokio",
···
1421
1422
[[package]]
1423
name = "http"
1424
version = "1.3.1"
1425
source = "registry+https://github.com/rust-lang/crates.io-index"
1426
checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
···
1437
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
1438
dependencies = [
1439
"bytes",
1440
-
"http",
1441
]
1442
1443
[[package]]
···
1448
dependencies = [
1449
"bytes",
1450
"futures-core",
1451
-
"http",
1452
"http-body",
1453
"pin-project-lite",
1454
]
···
1475
"futures-channel",
1476
"futures-util",
1477
"h2",
1478
-
"http",
1479
"http-body",
1480
"httparse",
1481
"httpdate",
···
1492
source = "registry+https://github.com/rust-lang/crates.io-index"
1493
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
1494
dependencies = [
1495
-
"http",
1496
"hyper",
1497
"hyper-util",
1498
-
"rustls",
1499
-
"rustls-native-certs",
1500
"rustls-pki-types",
1501
"tokio",
1502
-
"tokio-rustls",
1503
"tower-service",
1504
-
]
1505
-
1506
-
[[package]]
1507
-
name = "hyper-tls"
1508
-
version = "0.6.0"
1509
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1510
-
checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
1511
-
dependencies = [
1512
-
"bytes",
1513
-
"http-body-util",
1514
-
"hyper",
1515
-
"hyper-util",
1516
-
"native-tls",
1517
-
"tokio",
1518
-
"tokio-native-tls",
1519
-
"tower-service",
1520
]
1521
1522
[[package]]
1523
name = "hyper-util"
1524
-
version = "0.1.15"
1525
source = "registry+https://github.com/rust-lang/crates.io-index"
1526
-
checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df"
1527
dependencies = [
1528
-
"base64",
1529
"bytes",
1530
"futures-channel",
1531
"futures-core",
1532
"futures-util",
1533
-
"http",
1534
"http-body",
1535
"hyper",
1536
"ipnet",
1537
"libc",
1538
"percent-encoding",
1539
"pin-project-lite",
1540
-
"socket2 0.5.10",
1541
-
"system-configuration",
1542
"tokio",
1543
"tower-service",
1544
"tracing",
1545
-
"windows-registry",
1546
]
1547
1548
[[package]]
···
1694
1695
[[package]]
1696
name = "io-uring"
1697
-
version = "0.7.8"
1698
source = "registry+https://github.com/rust-lang/crates.io-index"
1699
-
checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
1700
dependencies = [
1701
"bitflags 2.9.1",
1702
"cfg-if",
···
1732
1733
[[package]]
1734
name = "iroh-car"
1735
-
version = "0.4.0"
1736
source = "registry+https://github.com/rust-lang/crates.io-index"
1737
-
checksum = "475a6f0ebd64c87ea011021c67f10b57930f6c286e0163807066bfb83553b1b6"
1738
dependencies = [
1739
"anyhow",
1740
-
"cid 0.10.1",
1741
"futures",
1742
-
"libipld",
1743
"thiserror 1.0.69",
1744
"tokio",
1745
"unsigned-varint 0.7.2",
···
1784
dependencies = [
1785
"once_cell",
1786
"wasm-bindgen",
1787
]
1788
1789
[[package]]
···
1921
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
1922
dependencies = [
1923
"cfg-if",
1924
-
"windows-targets 0.53.2",
1925
]
1926
1927
[[package]]
···
1931
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
1932
1933
[[package]]
1934
name = "libsqlite3-sys"
1935
version = "0.30.1"
1936
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1997
]
1998
1999
[[package]]
2000
name = "matchers"
2001
version = "0.1.0"
2002
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2053
source = "registry+https://github.com/rust-lang/crates.io-index"
2054
checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034"
2055
dependencies = [
2056
-
"base64",
2057
"http-body-util",
2058
"hyper",
2059
"hyper-rustls",
···
2079
"hashbrown 0.15.4",
2080
"metrics 0.24.2",
2081
"quanta",
2082
-
"rand 0.9.1",
2083
"rand_xoshiro",
2084
"sketches-ddsketch",
2085
]
···
2147
"bytes",
2148
"encoding_rs",
2149
"futures-util",
2150
-
"http",
2151
"httparse",
2152
"memchr",
2153
"mime",
···
2262
]
2263
2264
[[package]]
2265
-
name = "native-tls"
2266
-
version = "0.2.14"
2267
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2268
-
checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
2269
-
dependencies = [
2270
-
"libc",
2271
-
"log",
2272
-
"openssl",
2273
-
"openssl-probe",
2274
-
"openssl-sys",
2275
-
"schannel",
2276
-
"security-framework 2.11.1",
2277
-
"security-framework-sys",
2278
-
"tempfile",
2279
-
]
2280
-
2281
-
[[package]]
2282
name = "nom"
2283
version = "7.1.3"
2284
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2305
dependencies = [
2306
"overload",
2307
"winapi",
2308
]
2309
2310
[[package]]
···
2400
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
2401
2402
[[package]]
2403
-
name = "openssl"
2404
-
version = "0.10.73"
2405
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2406
-
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
2407
-
dependencies = [
2408
-
"bitflags 2.9.1",
2409
-
"cfg-if",
2410
-
"foreign-types",
2411
-
"libc",
2412
-
"once_cell",
2413
-
"openssl-macros",
2414
-
"openssl-sys",
2415
-
]
2416
-
2417
-
[[package]]
2418
-
name = "openssl-macros"
2419
-
version = "0.1.1"
2420
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2421
-
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
2422
-
dependencies = [
2423
-
"proc-macro2",
2424
-
"quote",
2425
-
"syn 2.0.104",
2426
-
]
2427
-
2428
-
[[package]]
2429
name = "openssl-probe"
2430
version = "0.1.6"
2431
source = "registry+https://github.com/rust-lang/crates.io-index"
2432
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
2433
2434
[[package]]
2435
-
name = "openssl-sys"
2436
-
version = "0.9.109"
2437
source = "registry+https://github.com/rust-lang/crates.io-index"
2438
-
checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
2439
-
dependencies = [
2440
-
"cc",
2441
-
"libc",
2442
-
"pkg-config",
2443
-
"vcpkg",
2444
-
]
2445
2446
[[package]]
2447
name = "overload"
···
2494
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
2495
2496
[[package]]
2497
-
name = "pin-project"
2498
-
version = "1.1.10"
2499
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2500
-
checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
2501
-
dependencies = [
2502
-
"pin-project-internal",
2503
-
]
2504
-
2505
-
[[package]]
2506
-
name = "pin-project-internal"
2507
-
version = "1.1.10"
2508
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2509
-
checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
2510
-
dependencies = [
2511
-
"proc-macro2",
2512
-
"quote",
2513
-
"syn 2.0.104",
2514
-
]
2515
-
2516
-
[[package]]
2517
name = "pin-project-lite"
2518
version = "0.2.16"
2519
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2584
2585
[[package]]
2586
name = "prettyplease"
2587
-
version = "0.2.35"
2588
source = "registry+https://github.com/rust-lang/crates.io-index"
2589
-
checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a"
2590
dependencies = [
2591
"proc-macro2",
2592
"syn 2.0.104",
···
2669
]
2670
2671
[[package]]
2672
name = "quote"
2673
version = "1.0.40"
2674
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2696
2697
[[package]]
2698
name = "rand"
2699
-
version = "0.9.1"
2700
source = "registry+https://github.com/rust-lang/crates.io-index"
2701
-
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
2702
dependencies = [
2703
"rand_chacha 0.9.0",
2704
"rand_core 0.9.3",
···
2762
2763
[[package]]
2764
name = "redis"
2765
-
version = "0.24.0"
2766
source = "registry+https://github.com/rust-lang/crates.io-index"
2767
-
checksum = "c580d9cbbe1d1b479e8d67cf9daf6a62c957e6846048408b80b43ac3f6af84cd"
2768
dependencies = [
2769
"arc-swap",
2770
-
"async-trait",
2771
"bytes",
2772
"combine",
2773
-
"futures",
2774
"futures-util",
2775
"itoa",
2776
"percent-encoding",
2777
"pin-project-lite",
2778
"ryu",
2779
"sha1_smol",
2780
-
"socket2 0.4.10",
2781
"tokio",
2782
-
"tokio-retry",
2783
"tokio-util",
2784
"url",
2785
]
2786
2787
[[package]]
2788
name = "redox_syscall"
2789
-
version = "0.5.13"
2790
source = "registry+https://github.com/rust-lang/crates.io-index"
2791
-
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
2792
dependencies = [
2793
"bitflags 2.9.1",
2794
]
2795
2796
[[package]]
2797
name = "regex"
2798
version = "1.11.1"
2799
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2843
source = "registry+https://github.com/rust-lang/crates.io-index"
2844
checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531"
2845
dependencies = [
2846
-
"base64",
2847
"bytes",
2848
-
"encoding_rs",
2849
"futures-core",
2850
-
"h2",
2851
-
"http",
2852
"http-body",
2853
"http-body-util",
2854
"hyper",
2855
"hyper-rustls",
2856
-
"hyper-tls",
2857
"hyper-util",
2858
"js-sys",
2859
"log",
2860
-
"mime",
2861
-
"native-tls",
2862
"percent-encoding",
2863
"pin-project-lite",
2864
"rustls-pki-types",
2865
"serde",
2866
"serde_json",
2867
"serde_urlencoded",
2868
"sync_wrapper",
2869
"tokio",
2870
-
"tokio-native-tls",
2871
"tower",
2872
"tower-http",
2873
"tower-service",
2874
"url",
2875
"wasm-bindgen",
2876
"wasm-bindgen-futures",
2877
"web-sys",
2878
]
2879
2880
[[package]]
···
2903
[[package]]
2904
name = "rocketman"
2905
version = "0.2.3"
2906
dependencies = [
2907
"anyhow",
2908
"async-trait",
···
2910
"derive_builder",
2911
"flume",
2912
"futures-util",
2913
-
"metrics 0.23.1",
2914
"rand 0.8.5",
2915
"serde",
2916
"serde_json",
2917
"tokio",
2918
-
"tokio-tungstenite",
2919
"tracing",
2920
"tracing-subscriber",
2921
"url",
···
2944
2945
[[package]]
2946
name = "rustc-demangle"
2947
-
version = "0.1.25"
2948
source = "registry+https://github.com/rust-lang/crates.io-index"
2949
-
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
2950
2951
[[package]]
2952
name = "rustc-hash"
2953
version = "1.1.0"
2954
source = "registry+https://github.com/rust-lang/crates.io-index"
2955
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
2956
2957
[[package]]
2958
name = "rustc_version"
···
2991
2992
[[package]]
2993
name = "rustls"
2994
-
version = "0.23.29"
2995
source = "registry+https://github.com/rust-lang/crates.io-index"
2996
-
checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1"
2997
dependencies = [
2998
"aws-lc-rs",
2999
"once_cell",
3000
"rustls-pki-types",
3001
-
"rustls-webpki",
3002
"subtle",
3003
"zeroize",
3004
]
3005
3006
[[package]]
3007
name = "rustls-native-certs"
3008
version = "0.8.1"
3009
source = "registry+https://github.com/rust-lang/crates.io-index"
3010
checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3"
···
3016
]
3017
3018
[[package]]
3019
name = "rustls-pki-types"
3020
version = "1.12.0"
3021
source = "registry+https://github.com/rust-lang/crates.io-index"
3022
checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
3023
dependencies = [
3024
"zeroize",
3025
]
3026
3027
[[package]]
···
3070
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
3071
3072
[[package]]
3073
name = "security-framework"
3074
version = "2.11.1"
3075
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3169
]
3170
3171
[[package]]
3172
name = "serde_json"
3173
-
version = "1.0.141"
3174
source = "registry+https://github.com/rust-lang/crates.io-index"
3175
-
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
3176
dependencies = [
3177
"itoa",
3178
"memchr",
···
3256
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
3257
3258
[[package]]
3259
name = "signature"
3260
version = "2.2.0"
3261
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3288
3289
[[package]]
3290
name = "socket2"
3291
-
version = "0.4.10"
3292
source = "registry+https://github.com/rust-lang/crates.io-index"
3293
-
checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
3294
dependencies = [
3295
"libc",
3296
-
"winapi",
3297
]
3298
3299
[[package]]
3300
name = "socket2"
3301
-
version = "0.5.10"
3302
source = "registry+https://github.com/rust-lang/crates.io-index"
3303
-
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
3304
dependencies = [
3305
"libc",
3306
-
"windows-sys 0.52.0",
3307
]
3308
3309
[[package]]
···
3344
source = "registry+https://github.com/rust-lang/crates.io-index"
3345
checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6"
3346
dependencies = [
3347
-
"base64",
3348
"bytes",
3349
"crc",
3350
"crossbeam-queue",
···
3361
"memchr",
3362
"once_cell",
3363
"percent-encoding",
3364
"serde",
3365
"serde_json",
3366
"sha2",
···
3372
"tracing",
3373
"url",
3374
"uuid",
3375
]
3376
3377
[[package]]
···
3419
checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526"
3420
dependencies = [
3421
"atoi",
3422
-
"base64",
3423
"bitflags 2.9.1",
3424
"byteorder",
3425
"bytes",
···
3463
checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46"
3464
dependencies = [
3465
"atoi",
3466
-
"base64",
3467
"bitflags 2.9.1",
3468
"byteorder",
3469
"crc",
···
3641
]
3642
3643
[[package]]
3644
-
name = "system-configuration"
3645
-
version = "0.6.1"
3646
source = "registry+https://github.com/rust-lang/crates.io-index"
3647
-
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
3648
-
dependencies = [
3649
-
"bitflags 2.9.1",
3650
-
"core-foundation 0.9.4",
3651
-
"system-configuration-sys",
3652
-
]
3653
3654
[[package]]
3655
-
name = "system-configuration-sys"
3656
-
version = "0.6.0"
3657
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3658
-
checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
3659
dependencies = [
3660
-
"core-foundation-sys",
3661
-
"libc",
3662
]
3663
-
3664
-
[[package]]
3665
-
name = "tagptr"
3666
-
version = "0.2.0"
3667
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3668
-
checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
3669
3670
[[package]]
3671
name = "tempfile"
···
3789
3790
[[package]]
3791
name = "tokio"
3792
-
version = "1.46.1"
3793
source = "registry+https://github.com/rust-lang/crates.io-index"
3794
-
checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
3795
dependencies = [
3796
"backtrace",
3797
"bytes",
3798
"io-uring",
3799
"libc",
3800
"mio",
3801
"pin-project-lite",
3802
"slab",
3803
-
"socket2 0.5.10",
3804
"tokio-macros",
3805
-
"windows-sys 0.52.0",
3806
]
3807
3808
[[package]]
···
3817
]
3818
3819
[[package]]
3820
-
name = "tokio-native-tls"
3821
-
version = "0.3.1"
3822
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3823
-
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
3824
-
dependencies = [
3825
-
"native-tls",
3826
-
"tokio",
3827
-
]
3828
-
3829
-
[[package]]
3830
-
name = "tokio-retry"
3831
-
version = "0.3.0"
3832
source = "registry+https://github.com/rust-lang/crates.io-index"
3833
-
checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f"
3834
dependencies = [
3835
-
"pin-project",
3836
-
"rand 0.8.5",
3837
"tokio",
3838
]
3839
···
3843
source = "registry+https://github.com/rust-lang/crates.io-index"
3844
checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
3845
dependencies = [
3846
-
"rustls",
3847
"tokio",
3848
]
3849
···
3860
3861
[[package]]
3862
name = "tokio-tungstenite"
3863
version = "0.24.0"
3864
source = "registry+https://github.com/rust-lang/crates.io-index"
3865
checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
3866
dependencies = [
3867
"futures-util",
3868
"log",
3869
"tokio",
3870
-
"tungstenite",
3871
]
3872
3873
[[package]]
···
3934
"bitflags 2.9.1",
3935
"bytes",
3936
"futures-util",
3937
-
"http",
3938
"http-body",
3939
"iri-string",
3940
"pin-project-lite",
···
4036
4037
[[package]]
4038
name = "tungstenite"
4039
version = "0.24.0"
4040
source = "registry+https://github.com/rust-lang/crates.io-index"
4041
checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
···
4043
"byteorder",
4044
"bytes",
4045
"data-encoding",
4046
-
"http",
4047
"httparse",
4048
"log",
4049
"rand 0.8.5",
4050
"sha1",
4051
"thiserror 1.0.69",
4052
"utf-8",
···
4065
"atrium-api",
4066
"atrium-xrpc",
4067
"chrono",
4068
-
"http",
4069
"ipld-core",
4070
"langtag",
4071
"regex",
···
4074
"serde_ipld_dagcbor",
4075
"serde_json",
4076
"thiserror 2.0.12",
4077
-
"uuid",
4078
]
4079
4080
[[package]]
···
4331
]
4332
4333
[[package]]
4334
name = "web-sys"
4335
version = "0.3.77"
4336
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4348
dependencies = [
4349
"js-sys",
4350
"wasm-bindgen",
4351
]
4352
4353
[[package]]
···
4523
]
4524
4525
[[package]]
4526
-
name = "windows-registry"
4527
-
version = "0.5.3"
4528
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4529
-
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
4530
-
dependencies = [
4531
-
"windows-link",
4532
-
"windows-result 0.3.4",
4533
-
"windows-strings",
4534
-
]
4535
-
4536
-
[[package]]
4537
name = "windows-result"
4538
version = "0.1.2"
4539
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4593
source = "registry+https://github.com/rust-lang/crates.io-index"
4594
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
4595
dependencies = [
4596
-
"windows-targets 0.53.2",
4597
]
4598
4599
[[package]]
···
4629
4630
[[package]]
4631
name = "windows-targets"
4632
-
version = "0.53.2"
4633
source = "registry+https://github.com/rust-lang/crates.io-index"
4634
-
checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
4635
dependencies = [
4636
"windows_aarch64_gnullvm 0.53.0",
4637
"windows_aarch64_msvc 0.53.0",
4638
"windows_i686_gnu 0.53.0",
···
121
dependencies = [
122
"anyhow",
123
"async-trait",
124
+
"atmst",
125
"atrium-api",
126
"axum",
127
+
"base64 0.22.1",
128
"chrono",
129
"clap",
130
"dotenvy",
···
166
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
167
168
[[package]]
169
+
name = "async-compression"
170
+
version = "0.4.27"
171
+
source = "registry+https://github.com/rust-lang/crates.io-index"
172
+
checksum = "ddb939d66e4ae03cee6091612804ba446b12878410cfa17f785f4dd67d4014e8"
173
+
dependencies = [
174
+
"flate2",
175
+
"futures-core",
176
+
"memchr",
177
+
"pin-project-lite",
178
+
"tokio",
179
+
]
180
+
181
+
[[package]]
182
name = "async-lock"
183
version = "3.4.0"
184
source = "registry+https://github.com/rust-lang/crates.io-index"
···
201
]
202
203
[[package]]
204
+
name = "atmst"
205
+
version = "0.0.1"
206
+
source = "registry+https://github.com/rust-lang/crates.io-index"
207
+
checksum = "aeb2a4631a64a242ae62c3ceb140adfa2a8bdacb1b22a6549db5de2ce3389c1d"
208
+
dependencies = [
209
+
"async-trait",
210
+
"bytes",
211
+
"cid 0.11.1",
212
+
"dashmap",
213
+
"futures",
214
+
"ipld-core",
215
+
"iroh-car",
216
+
"log",
217
+
"multihash 0.19.3",
218
+
"serde",
219
+
"serde_ipld_dagcbor",
220
+
"serde_ipld_dagjson",
221
+
"sha2",
222
+
"thiserror 1.0.69",
223
+
"tokio",
224
+
]
225
+
226
+
[[package]]
227
name = "atoi"
228
version = "2.0.0"
229
source = "registry+https://github.com/rust-lang/crates.io-index"
···
247
"atrium-common",
248
"atrium-xrpc",
249
"chrono",
250
+
"http 1.3.1",
251
"ipld-core",
252
"langtag",
253
"regex",
···
280
source = "registry+https://github.com/rust-lang/crates.io-index"
281
checksum = "0216ad50ce34e9ff982e171c3659e65dedaa2ed5ac2994524debdc9a9647ffa8"
282
dependencies = [
283
+
"http 1.3.1",
284
"serde",
285
"serde_html_form",
286
"serde_json",
···
296
297
[[package]]
298
name = "aws-lc-rs"
299
+
version = "1.13.3"
300
source = "registry+https://github.com/rust-lang/crates.io-index"
301
+
checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba"
302
dependencies = [
303
"aws-lc-sys",
304
"zeroize",
···
328
"bytes",
329
"form_urlencoded",
330
"futures-util",
331
+
"http 1.3.1",
332
"http-body",
333
"http-body-util",
334
"hyper",
···
361
dependencies = [
362
"bytes",
363
"futures-core",
364
+
"http 1.3.1",
365
"http-body",
366
"http-body-util",
367
"mime",
···
385
]
386
387
[[package]]
388
+
name = "backon"
389
+
version = "1.5.2"
390
+
source = "registry+https://github.com/rust-lang/crates.io-index"
391
+
checksum = "592277618714fbcecda9a02ba7a8781f319d26532a88553bbacc77ba5d2b3a8d"
392
+
dependencies = [
393
+
"fastrand",
394
+
]
395
+
396
+
[[package]]
397
name = "backtrace"
398
version = "0.3.75"
399
source = "registry+https://github.com/rust-lang/crates.io-index"
···
415
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
416
417
[[package]]
418
+
name = "base16ct"
419
+
version = "0.2.0"
420
+
source = "registry+https://github.com/rust-lang/crates.io-index"
421
+
checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
422
+
423
+
[[package]]
424
+
name = "base64"
425
+
version = "0.21.7"
426
+
source = "registry+https://github.com/rust-lang/crates.io-index"
427
+
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
428
+
429
+
[[package]]
430
name = "base64"
431
version = "0.22.1"
432
source = "registry+https://github.com/rust-lang/crates.io-index"
···
455
"proc-macro2",
456
"quote",
457
"regex",
458
+
"rustc-hash 1.1.0",
459
"shlex",
460
"syn 2.0.104",
461
"which",
···
562
version = "1.10.1"
563
source = "registry+https://github.com/rust-lang/crates.io-index"
564
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
565
+
dependencies = [
566
+
"serde",
567
+
]
568
569
[[package]]
570
name = "cadet"
···
572
dependencies = [
573
"anyhow",
574
"async-trait",
575
+
"atmst",
576
"atrium-api",
577
+
"base64 0.22.1",
578
"chrono",
579
"cid 0.11.1",
580
"dotenvy",
581
"flume",
582
+
"futures",
583
"iroh-car",
584
"libipld",
585
"metrics 0.23.1",
···
591
"reqwest",
592
"rocketman",
593
"serde",
594
+
"serde_ipld_dagcbor",
595
"serde_json",
596
"sqlx",
597
"time",
598
"tokio",
599
+
"tokio-tungstenite 0.24.0",
600
"tracing",
601
"tracing-subscriber",
602
"types",
···
647
648
[[package]]
649
name = "cc"
650
+
version = "1.2.31"
651
source = "registry+https://github.com/rust-lang/crates.io-index"
652
+
checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2"
653
dependencies = [
654
"jobserver",
655
"libc",
···
672
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
673
674
[[package]]
675
+
name = "cfg_aliases"
676
+
version = "0.2.1"
677
+
source = "registry+https://github.com/rust-lang/crates.io-index"
678
+
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
679
+
680
+
[[package]]
681
name = "chrono"
682
version = "0.4.41"
683
source = "registry+https://github.com/rust-lang/crates.io-index"
···
732
733
[[package]]
734
name = "clap"
735
+
version = "4.5.42"
736
source = "registry+https://github.com/rust-lang/crates.io-index"
737
+
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
738
dependencies = [
739
"clap_builder",
740
"clap_derive",
···
742
743
[[package]]
744
name = "clap_builder"
745
+
version = "4.5.42"
746
source = "registry+https://github.com/rust-lang/crates.io-index"
747
+
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
748
dependencies = [
749
"anstream",
750
"anstyle",
···
786
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
787
788
[[package]]
789
+
name = "colored"
790
+
version = "2.2.0"
791
+
source = "registry+https://github.com/rust-lang/crates.io-index"
792
+
checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
793
+
dependencies = [
794
+
"lazy_static",
795
+
"windows-sys 0.59.0",
796
+
]
797
+
798
+
[[package]]
799
name = "combine"
800
version = "4.6.7"
801
source = "registry+https://github.com/rust-lang/crates.io-index"
···
890
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
891
892
[[package]]
893
+
name = "crc32fast"
894
+
version = "1.5.0"
895
+
source = "registry+https://github.com/rust-lang/crates.io-index"
896
+
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
897
+
dependencies = [
898
+
"cfg-if",
899
+
]
900
+
901
+
[[package]]
902
name = "crossbeam-channel"
903
version = "0.5.15"
904
source = "registry+https://github.com/rust-lang/crates.io-index"
···
930
version = "0.8.21"
931
source = "registry+https://github.com/rust-lang/crates.io-index"
932
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
933
+
934
+
[[package]]
935
+
name = "crypto-bigint"
936
+
version = "0.5.5"
937
+
source = "registry+https://github.com/rust-lang/crates.io-index"
938
+
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
939
+
dependencies = [
940
+
"generic-array",
941
+
"rand_core 0.6.4",
942
+
"subtle",
943
+
"zeroize",
944
+
]
945
946
[[package]]
947
name = "crypto-common"
···
1127
]
1128
1129
[[package]]
1130
+
name = "dirs"
1131
+
version = "5.0.1"
1132
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1133
+
checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
1134
+
dependencies = [
1135
+
"dirs-sys",
1136
+
]
1137
+
1138
+
[[package]]
1139
+
name = "dirs-sys"
1140
+
version = "0.4.1"
1141
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1142
+
checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c"
1143
+
dependencies = [
1144
+
"libc",
1145
+
"option-ext",
1146
+
"redox_users",
1147
+
"windows-sys 0.48.0",
1148
+
]
1149
+
1150
+
[[package]]
1151
name = "displaydoc"
1152
version = "0.2.5"
1153
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1171
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
1172
1173
[[package]]
1174
+
name = "ecdsa"
1175
+
version = "0.16.9"
1176
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1177
+
checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
1178
+
dependencies = [
1179
+
"der",
1180
+
"digest",
1181
+
"elliptic-curve",
1182
+
"rfc6979",
1183
+
"signature",
1184
+
"spki",
1185
+
]
1186
+
1187
+
[[package]]
1188
name = "either"
1189
version = "1.15.0"
1190
source = "registry+https://github.com/rust-lang/crates.io-index"
1191
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
1192
dependencies = [
1193
"serde",
1194
+
]
1195
+
1196
+
[[package]]
1197
+
name = "elliptic-curve"
1198
+
version = "0.13.8"
1199
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1200
+
checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
1201
+
dependencies = [
1202
+
"base16ct",
1203
+
"crypto-bigint",
1204
+
"digest",
1205
+
"ff",
1206
+
"generic-array",
1207
+
"group",
1208
+
"pkcs8",
1209
+
"rand_core 0.6.4",
1210
+
"sec1",
1211
+
"subtle",
1212
+
"zeroize",
1213
]
1214
1215
[[package]]
···
1276
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
1277
1278
[[package]]
1279
+
name = "ff"
1280
+
version = "0.13.1"
1281
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1282
+
checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
1283
+
dependencies = [
1284
+
"rand_core 0.6.4",
1285
+
"subtle",
1286
+
]
1287
+
1288
+
[[package]]
1289
+
name = "flate2"
1290
+
version = "1.1.2"
1291
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1292
+
checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
1293
+
dependencies = [
1294
+
"crc32fast",
1295
+
"miniz_oxide",
1296
+
]
1297
+
1298
+
[[package]]
1299
name = "flume"
1300
version = "0.11.1"
1301
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1320
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
1321
1322
[[package]]
1323
name = "form_urlencoded"
1324
version = "1.2.1"
1325
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1456
dependencies = [
1457
"typenum",
1458
"version_check",
1459
+
"zeroize",
1460
]
1461
1462
[[package]]
···
1479
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
1480
dependencies = [
1481
"cfg-if",
1482
+
"js-sys",
1483
"libc",
1484
"r-efi",
1485
"wasi 0.14.2+wasi-0.2.4",
1486
+
"wasm-bindgen",
1487
]
1488
1489
[[package]]
···
1499
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
1500
1501
[[package]]
1502
+
name = "group"
1503
+
version = "0.13.0"
1504
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1505
+
checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
1506
+
dependencies = [
1507
+
"ff",
1508
+
"rand_core 0.6.4",
1509
+
"subtle",
1510
+
]
1511
+
1512
+
[[package]]
1513
name = "h2"
1514
version = "0.4.11"
1515
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1520
"fnv",
1521
"futures-core",
1522
"futures-sink",
1523
+
"http 1.3.1",
1524
"indexmap",
1525
"slab",
1526
"tokio",
···
1595
1596
[[package]]
1597
name = "http"
1598
+
version = "0.2.12"
1599
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1600
+
checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
1601
+
dependencies = [
1602
+
"bytes",
1603
+
"fnv",
1604
+
"itoa",
1605
+
]
1606
+
1607
+
[[package]]
1608
+
name = "http"
1609
version = "1.3.1"
1610
source = "registry+https://github.com/rust-lang/crates.io-index"
1611
checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
···
1622
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
1623
dependencies = [
1624
"bytes",
1625
+
"http 1.3.1",
1626
]
1627
1628
[[package]]
···
1633
dependencies = [
1634
"bytes",
1635
"futures-core",
1636
+
"http 1.3.1",
1637
"http-body",
1638
"pin-project-lite",
1639
]
···
1660
"futures-channel",
1661
"futures-util",
1662
"h2",
1663
+
"http 1.3.1",
1664
"http-body",
1665
"httparse",
1666
"httpdate",
···
1677
source = "registry+https://github.com/rust-lang/crates.io-index"
1678
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
1679
dependencies = [
1680
+
"http 1.3.1",
1681
"hyper",
1682
"hyper-util",
1683
+
"rustls 0.23.31",
1684
+
"rustls-native-certs 0.8.1",
1685
"rustls-pki-types",
1686
"tokio",
1687
+
"tokio-rustls 0.26.2",
1688
"tower-service",
1689
+
"webpki-roots 1.0.2",
1690
]
1691
1692
[[package]]
1693
name = "hyper-util"
1694
+
version = "0.1.16"
1695
source = "registry+https://github.com/rust-lang/crates.io-index"
1696
+
checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
1697
dependencies = [
1698
+
"base64 0.22.1",
1699
"bytes",
1700
"futures-channel",
1701
"futures-core",
1702
"futures-util",
1703
+
"http 1.3.1",
1704
"http-body",
1705
"hyper",
1706
"ipnet",
1707
"libc",
1708
"percent-encoding",
1709
"pin-project-lite",
1710
+
"socket2 0.6.0",
1711
"tokio",
1712
"tower-service",
1713
"tracing",
1714
]
1715
1716
[[package]]
···
1862
1863
[[package]]
1864
name = "io-uring"
1865
+
version = "0.7.9"
1866
source = "registry+https://github.com/rust-lang/crates.io-index"
1867
+
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
1868
dependencies = [
1869
"bitflags 2.9.1",
1870
"cfg-if",
···
1900
1901
[[package]]
1902
name = "iroh-car"
1903
+
version = "0.5.1"
1904
source = "registry+https://github.com/rust-lang/crates.io-index"
1905
+
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
1906
dependencies = [
1907
"anyhow",
1908
+
"cid 0.11.1",
1909
"futures",
1910
+
"serde",
1911
+
"serde_ipld_dagcbor",
1912
"thiserror 1.0.69",
1913
"tokio",
1914
"unsigned-varint 0.7.2",
···
1953
dependencies = [
1954
"once_cell",
1955
"wasm-bindgen",
1956
+
]
1957
+
1958
+
[[package]]
1959
+
name = "k256"
1960
+
version = "0.13.4"
1961
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1962
+
checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
1963
+
dependencies = [
1964
+
"cfg-if",
1965
+
"ecdsa",
1966
+
"elliptic-curve",
1967
+
"once_cell",
1968
+
"sha2",
1969
+
"signature",
1970
]
1971
1972
[[package]]
···
2104
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
2105
dependencies = [
2106
"cfg-if",
2107
+
"windows-targets 0.53.3",
2108
]
2109
2110
[[package]]
···
2114
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
2115
2116
[[package]]
2117
+
name = "libredox"
2118
+
version = "0.1.9"
2119
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2120
+
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
2121
+
dependencies = [
2122
+
"bitflags 2.9.1",
2123
+
"libc",
2124
+
]
2125
+
2126
+
[[package]]
2127
name = "libsqlite3-sys"
2128
version = "0.30.1"
2129
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2190
]
2191
2192
[[package]]
2193
+
name = "lru-slab"
2194
+
version = "0.1.2"
2195
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2196
+
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
2197
+
2198
+
[[package]]
2199
name = "matchers"
2200
version = "0.1.0"
2201
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2252
source = "registry+https://github.com/rust-lang/crates.io-index"
2253
checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034"
2254
dependencies = [
2255
+
"base64 0.22.1",
2256
"http-body-util",
2257
"hyper",
2258
"hyper-rustls",
···
2278
"hashbrown 0.15.4",
2279
"metrics 0.24.2",
2280
"quanta",
2281
+
"rand 0.9.2",
2282
"rand_xoshiro",
2283
"sketches-ddsketch",
2284
]
···
2346
"bytes",
2347
"encoding_rs",
2348
"futures-util",
2349
+
"http 1.3.1",
2350
"httparse",
2351
"memchr",
2352
"mime",
···
2461
]
2462
2463
[[package]]
2464
name = "nom"
2465
version = "7.1.3"
2466
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2487
dependencies = [
2488
"overload",
2489
"winapi",
2490
+
]
2491
+
2492
+
[[package]]
2493
+
name = "num-bigint"
2494
+
version = "0.4.6"
2495
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2496
+
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
2497
+
dependencies = [
2498
+
"num-integer",
2499
+
"num-traits",
2500
]
2501
2502
[[package]]
···
2592
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
2593
2594
[[package]]
2595
name = "openssl-probe"
2596
version = "0.1.6"
2597
source = "registry+https://github.com/rust-lang/crates.io-index"
2598
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
2599
2600
[[package]]
2601
+
name = "option-ext"
2602
+
version = "0.2.0"
2603
source = "registry+https://github.com/rust-lang/crates.io-index"
2604
+
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
2605
2606
[[package]]
2607
name = "overload"
···
2654
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
2655
2656
[[package]]
2657
name = "pin-project-lite"
2658
version = "0.2.16"
2659
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2724
2725
[[package]]
2726
name = "prettyplease"
2727
+
version = "0.2.36"
2728
source = "registry+https://github.com/rust-lang/crates.io-index"
2729
+
checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2"
2730
dependencies = [
2731
"proc-macro2",
2732
"syn 2.0.104",
···
2809
]
2810
2811
[[package]]
2812
+
name = "quinn"
2813
+
version = "0.11.8"
2814
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2815
+
checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8"
2816
+
dependencies = [
2817
+
"bytes",
2818
+
"cfg_aliases",
2819
+
"pin-project-lite",
2820
+
"quinn-proto",
2821
+
"quinn-udp",
2822
+
"rustc-hash 2.1.1",
2823
+
"rustls 0.23.31",
2824
+
"socket2 0.5.10",
2825
+
"thiserror 2.0.12",
2826
+
"tokio",
2827
+
"tracing",
2828
+
"web-time",
2829
+
]
2830
+
2831
+
[[package]]
2832
+
name = "quinn-proto"
2833
+
version = "0.11.12"
2834
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2835
+
checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e"
2836
+
dependencies = [
2837
+
"bytes",
2838
+
"getrandom 0.3.3",
2839
+
"lru-slab",
2840
+
"rand 0.9.2",
2841
+
"ring",
2842
+
"rustc-hash 2.1.1",
2843
+
"rustls 0.23.31",
2844
+
"rustls-pki-types",
2845
+
"slab",
2846
+
"thiserror 2.0.12",
2847
+
"tinyvec",
2848
+
"tracing",
2849
+
"web-time",
2850
+
]
2851
+
2852
+
[[package]]
2853
+
name = "quinn-udp"
2854
+
version = "0.5.13"
2855
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2856
+
checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970"
2857
+
dependencies = [
2858
+
"cfg_aliases",
2859
+
"libc",
2860
+
"once_cell",
2861
+
"socket2 0.5.10",
2862
+
"tracing",
2863
+
"windows-sys 0.59.0",
2864
+
]
2865
+
2866
+
[[package]]
2867
name = "quote"
2868
version = "1.0.40"
2869
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2891
2892
[[package]]
2893
name = "rand"
2894
+
version = "0.9.2"
2895
source = "registry+https://github.com/rust-lang/crates.io-index"
2896
+
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
2897
dependencies = [
2898
"rand_chacha 0.9.0",
2899
"rand_core 0.9.3",
···
2957
2958
[[package]]
2959
name = "redis"
2960
+
version = "0.32.4"
2961
source = "registry+https://github.com/rust-lang/crates.io-index"
2962
+
checksum = "e1f66bf4cac9733a23bcdf1e0e01effbaaad208567beba68be8f67e5f4af3ee1"
2963
dependencies = [
2964
"arc-swap",
2965
+
"backon",
2966
"bytes",
2967
+
"cfg-if",
2968
"combine",
2969
+
"futures-channel",
2970
"futures-util",
2971
"itoa",
2972
+
"num-bigint",
2973
"percent-encoding",
2974
"pin-project-lite",
2975
"ryu",
2976
"sha1_smol",
2977
+
"socket2 0.6.0",
2978
"tokio",
2979
"tokio-util",
2980
"url",
2981
]
2982
2983
[[package]]
2984
name = "redox_syscall"
2985
+
version = "0.5.17"
2986
source = "registry+https://github.com/rust-lang/crates.io-index"
2987
+
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
2988
dependencies = [
2989
"bitflags 2.9.1",
2990
]
2991
2992
[[package]]
2993
+
name = "redox_users"
2994
+
version = "0.4.6"
2995
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2996
+
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
2997
+
dependencies = [
2998
+
"getrandom 0.2.16",
2999
+
"libredox",
3000
+
"thiserror 1.0.69",
3001
+
]
3002
+
3003
+
[[package]]
3004
name = "regex"
3005
version = "1.11.1"
3006
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3050
source = "registry+https://github.com/rust-lang/crates.io-index"
3051
checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531"
3052
dependencies = [
3053
+
"async-compression",
3054
+
"base64 0.22.1",
3055
"bytes",
3056
"futures-core",
3057
+
"futures-util",
3058
+
"http 1.3.1",
3059
"http-body",
3060
"http-body-util",
3061
"hyper",
3062
"hyper-rustls",
3063
"hyper-util",
3064
"js-sys",
3065
"log",
3066
"percent-encoding",
3067
"pin-project-lite",
3068
+
"quinn",
3069
+
"rustls 0.23.31",
3070
"rustls-pki-types",
3071
"serde",
3072
"serde_json",
3073
"serde_urlencoded",
3074
"sync_wrapper",
3075
"tokio",
3076
+
"tokio-rustls 0.26.2",
3077
+
"tokio-util",
3078
"tower",
3079
"tower-http",
3080
"tower-service",
3081
"url",
3082
"wasm-bindgen",
3083
"wasm-bindgen-futures",
3084
+
"wasm-streams",
3085
"web-sys",
3086
+
"webpki-roots 1.0.2",
3087
+
]
3088
+
3089
+
[[package]]
3090
+
name = "rfc6979"
3091
+
version = "0.4.0"
3092
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3093
+
checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
3094
+
dependencies = [
3095
+
"hmac",
3096
+
"subtle",
3097
]
3098
3099
[[package]]
···
3122
[[package]]
3123
name = "rocketman"
3124
version = "0.2.3"
3125
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3126
+
checksum = "9928fe43979c19ff1f46f7920c30b76dfcead7a4d571c9836c4d02da8587f844"
3127
dependencies = [
3128
"anyhow",
3129
"async-trait",
···
3131
"derive_builder",
3132
"flume",
3133
"futures-util",
3134
+
"metrics 0.24.2",
3135
"rand 0.8.5",
3136
"serde",
3137
"serde_json",
3138
"tokio",
3139
+
"tokio-tungstenite 0.20.1",
3140
"tracing",
3141
"tracing-subscriber",
3142
"url",
···
3165
3166
[[package]]
3167
name = "rustc-demangle"
3168
+
version = "0.1.26"
3169
source = "registry+https://github.com/rust-lang/crates.io-index"
3170
+
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
3171
3172
[[package]]
3173
name = "rustc-hash"
3174
version = "1.1.0"
3175
source = "registry+https://github.com/rust-lang/crates.io-index"
3176
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
3177
+
3178
+
[[package]]
3179
+
name = "rustc-hash"
3180
+
version = "2.1.1"
3181
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3182
+
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
3183
3184
[[package]]
3185
name = "rustc_version"
···
3218
3219
[[package]]
3220
name = "rustls"
3221
+
version = "0.21.12"
3222
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3223
+
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
3224
+
dependencies = [
3225
+
"log",
3226
+
"ring",
3227
+
"rustls-webpki 0.101.7",
3228
+
"sct",
3229
+
]
3230
+
3231
+
[[package]]
3232
+
name = "rustls"
3233
+
version = "0.23.31"
3234
source = "registry+https://github.com/rust-lang/crates.io-index"
3235
+
checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc"
3236
dependencies = [
3237
"aws-lc-rs",
3238
"once_cell",
3239
+
"ring",
3240
"rustls-pki-types",
3241
+
"rustls-webpki 0.103.4",
3242
"subtle",
3243
"zeroize",
3244
]
3245
3246
[[package]]
3247
name = "rustls-native-certs"
3248
+
version = "0.6.3"
3249
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3250
+
checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00"
3251
+
dependencies = [
3252
+
"openssl-probe",
3253
+
"rustls-pemfile",
3254
+
"schannel",
3255
+
"security-framework 2.11.1",
3256
+
]
3257
+
3258
+
[[package]]
3259
+
name = "rustls-native-certs"
3260
version = "0.8.1"
3261
source = "registry+https://github.com/rust-lang/crates.io-index"
3262
checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3"
···
3268
]
3269
3270
[[package]]
3271
+
name = "rustls-pemfile"
3272
+
version = "1.0.4"
3273
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3274
+
checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
3275
+
dependencies = [
3276
+
"base64 0.21.7",
3277
+
]
3278
+
3279
+
[[package]]
3280
name = "rustls-pki-types"
3281
version = "1.12.0"
3282
source = "registry+https://github.com/rust-lang/crates.io-index"
3283
checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
3284
dependencies = [
3285
+
"web-time",
3286
"zeroize",
3287
+
]
3288
+
3289
+
[[package]]
3290
+
name = "rustls-webpki"
3291
+
version = "0.101.7"
3292
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3293
+
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
3294
+
dependencies = [
3295
+
"ring",
3296
+
"untrusted",
3297
]
3298
3299
[[package]]
···
3342
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
3343
3344
[[package]]
3345
+
name = "sct"
3346
+
version = "0.7.1"
3347
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3348
+
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
3349
+
dependencies = [
3350
+
"ring",
3351
+
"untrusted",
3352
+
]
3353
+
3354
+
[[package]]
3355
+
name = "sec1"
3356
+
version = "0.7.3"
3357
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3358
+
checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
3359
+
dependencies = [
3360
+
"base16ct",
3361
+
"der",
3362
+
"generic-array",
3363
+
"pkcs8",
3364
+
"subtle",
3365
+
"zeroize",
3366
+
]
3367
+
3368
+
[[package]]
3369
name = "security-framework"
3370
version = "2.11.1"
3371
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3465
]
3466
3467
[[package]]
3468
+
name = "serde_ipld_dagjson"
3469
+
version = "0.2.0"
3470
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3471
+
checksum = "3359b47ba7f4a306ef5984665e10539e212e97217afa489437d533208eecda36"
3472
+
dependencies = [
3473
+
"ipld-core",
3474
+
"serde",
3475
+
"serde_json",
3476
+
]
3477
+
3478
+
[[package]]
3479
name = "serde_json"
3480
+
version = "1.0.142"
3481
source = "registry+https://github.com/rust-lang/crates.io-index"
3482
+
checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7"
3483
dependencies = [
3484
"itoa",
3485
"memchr",
···
3563
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
3564
3565
[[package]]
3566
+
name = "signal-hook-registry"
3567
+
version = "1.4.5"
3568
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3569
+
checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
3570
+
dependencies = [
3571
+
"libc",
3572
+
]
3573
+
3574
+
[[package]]
3575
name = "signature"
3576
version = "2.2.0"
3577
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3604
3605
[[package]]
3606
name = "socket2"
3607
+
version = "0.5.10"
3608
source = "registry+https://github.com/rust-lang/crates.io-index"
3609
+
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
3610
dependencies = [
3611
"libc",
3612
+
"windows-sys 0.52.0",
3613
]
3614
3615
[[package]]
3616
name = "socket2"
3617
+
version = "0.6.0"
3618
source = "registry+https://github.com/rust-lang/crates.io-index"
3619
+
checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
3620
dependencies = [
3621
"libc",
3622
+
"windows-sys 0.59.0",
3623
]
3624
3625
[[package]]
···
3660
source = "registry+https://github.com/rust-lang/crates.io-index"
3661
checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6"
3662
dependencies = [
3663
+
"base64 0.22.1",
3664
"bytes",
3665
"crc",
3666
"crossbeam-queue",
···
3677
"memchr",
3678
"once_cell",
3679
"percent-encoding",
3680
+
"rustls 0.23.31",
3681
"serde",
3682
"serde_json",
3683
"sha2",
···
3689
"tracing",
3690
"url",
3691
"uuid",
3692
+
"webpki-roots 0.26.11",
3693
]
3694
3695
[[package]]
···
3737
checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526"
3738
dependencies = [
3739
"atoi",
3740
+
"base64 0.22.1",
3741
"bitflags 2.9.1",
3742
"byteorder",
3743
"bytes",
···
3781
checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46"
3782
dependencies = [
3783
"atoi",
3784
+
"base64 0.22.1",
3785
"bitflags 2.9.1",
3786
"byteorder",
3787
"crc",
···
3959
]
3960
3961
[[package]]
3962
+
name = "tagptr"
3963
+
version = "0.2.0"
3964
source = "registry+https://github.com/rust-lang/crates.io-index"
3965
+
checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
3966
3967
[[package]]
3968
+
name = "teal-cli"
3969
+
version = "0.1.0"
3970
dependencies = [
3971
+
"anyhow",
3972
+
"chrono",
3973
+
"clap",
3974
+
"colored",
3975
+
"dirs",
3976
+
"hex",
3977
+
"k256",
3978
+
"multibase",
3979
+
"rand 0.8.5",
3980
+
"serde",
3981
+
"serde_json",
3982
+
"tempfile",
3983
+
"tokio",
3984
]
3985
3986
[[package]]
3987
name = "tempfile"
···
4105
4106
[[package]]
4107
name = "tokio"
4108
+
version = "1.47.1"
4109
source = "registry+https://github.com/rust-lang/crates.io-index"
4110
+
checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
4111
dependencies = [
4112
"backtrace",
4113
"bytes",
4114
"io-uring",
4115
"libc",
4116
"mio",
4117
+
"parking_lot",
4118
"pin-project-lite",
4119
+
"signal-hook-registry",
4120
"slab",
4121
+
"socket2 0.6.0",
4122
"tokio-macros",
4123
+
"windows-sys 0.59.0",
4124
]
4125
4126
[[package]]
···
4135
]
4136
4137
[[package]]
4138
+
name = "tokio-rustls"
4139
+
version = "0.24.1"
4140
source = "registry+https://github.com/rust-lang/crates.io-index"
4141
+
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
4142
dependencies = [
4143
+
"rustls 0.21.12",
4144
"tokio",
4145
]
4146
···
4150
source = "registry+https://github.com/rust-lang/crates.io-index"
4151
checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
4152
dependencies = [
4153
+
"rustls 0.23.31",
4154
"tokio",
4155
]
4156
···
4167
4168
[[package]]
4169
name = "tokio-tungstenite"
4170
+
version = "0.20.1"
4171
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4172
+
checksum = "212d5dcb2a1ce06d81107c3d0ffa3121fe974b73f068c8282cb1c32328113b6c"
4173
+
dependencies = [
4174
+
"futures-util",
4175
+
"log",
4176
+
"rustls 0.21.12",
4177
+
"rustls-native-certs 0.6.3",
4178
+
"tokio",
4179
+
"tokio-rustls 0.24.1",
4180
+
"tungstenite 0.20.1",
4181
+
"webpki-roots 0.25.4",
4182
+
]
4183
+
4184
+
[[package]]
4185
+
name = "tokio-tungstenite"
4186
version = "0.24.0"
4187
source = "registry+https://github.com/rust-lang/crates.io-index"
4188
checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
4189
dependencies = [
4190
"futures-util",
4191
"log",
4192
+
"rustls 0.23.31",
4193
+
"rustls-pki-types",
4194
"tokio",
4195
+
"tokio-rustls 0.26.2",
4196
+
"tungstenite 0.24.0",
4197
+
"webpki-roots 0.26.11",
4198
]
4199
4200
[[package]]
···
4261
"bitflags 2.9.1",
4262
"bytes",
4263
"futures-util",
4264
+
"http 1.3.1",
4265
"http-body",
4266
"iri-string",
4267
"pin-project-lite",
···
4363
4364
[[package]]
4365
name = "tungstenite"
4366
+
version = "0.20.1"
4367
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4368
+
checksum = "9e3dac10fd62eaf6617d3a904ae222845979aec67c615d1c842b4002c7666fb9"
4369
+
dependencies = [
4370
+
"byteorder",
4371
+
"bytes",
4372
+
"data-encoding",
4373
+
"http 0.2.12",
4374
+
"httparse",
4375
+
"log",
4376
+
"rand 0.8.5",
4377
+
"rustls 0.21.12",
4378
+
"sha1",
4379
+
"thiserror 1.0.69",
4380
+
"url",
4381
+
"utf-8",
4382
+
]
4383
+
4384
+
[[package]]
4385
+
name = "tungstenite"
4386
version = "0.24.0"
4387
source = "registry+https://github.com/rust-lang/crates.io-index"
4388
checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
···
4390
"byteorder",
4391
"bytes",
4392
"data-encoding",
4393
+
"http 1.3.1",
4394
"httparse",
4395
"log",
4396
"rand 0.8.5",
4397
+
"rustls 0.23.31",
4398
+
"rustls-pki-types",
4399
"sha1",
4400
"thiserror 1.0.69",
4401
"utf-8",
···
4414
"atrium-api",
4415
"atrium-xrpc",
4416
"chrono",
4417
+
"http 1.3.1",
4418
"ipld-core",
4419
"langtag",
4420
"regex",
···
4423
"serde_ipld_dagcbor",
4424
"serde_json",
4425
"thiserror 2.0.12",
4426
]
4427
4428
[[package]]
···
4679
]
4680
4681
[[package]]
4682
+
name = "wasm-streams"
4683
+
version = "0.4.2"
4684
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4685
+
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
4686
+
dependencies = [
4687
+
"futures-util",
4688
+
"js-sys",
4689
+
"wasm-bindgen",
4690
+
"wasm-bindgen-futures",
4691
+
"web-sys",
4692
+
]
4693
+
4694
+
[[package]]
4695
name = "web-sys"
4696
version = "0.3.77"
4697
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4709
dependencies = [
4710
"js-sys",
4711
"wasm-bindgen",
4712
+
]
4713
+
4714
+
[[package]]
4715
+
name = "webpki-roots"
4716
+
version = "0.25.4"
4717
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4718
+
checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1"
4719
+
4720
+
[[package]]
4721
+
name = "webpki-roots"
4722
+
version = "0.26.11"
4723
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4724
+
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
4725
+
dependencies = [
4726
+
"webpki-roots 1.0.2",
4727
+
]
4728
+
4729
+
[[package]]
4730
+
name = "webpki-roots"
4731
+
version = "1.0.2"
4732
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4733
+
checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2"
4734
+
dependencies = [
4735
+
"rustls-pki-types",
4736
]
4737
4738
[[package]]
···
4908
]
4909
4910
[[package]]
4911
name = "windows-result"
4912
version = "0.1.2"
4913
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4967
source = "registry+https://github.com/rust-lang/crates.io-index"
4968
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
4969
dependencies = [
4970
+
"windows-targets 0.53.3",
4971
]
4972
4973
[[package]]
···
5003
5004
[[package]]
5005
name = "windows-targets"
5006
+
version = "0.53.3"
5007
source = "registry+https://github.com/rust-lang/crates.io-index"
5008
+
checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
5009
dependencies = [
5010
+
"windows-link",
5011
"windows_aarch64_gnullvm 0.53.0",
5012
"windows_aarch64_msvc 0.53.0",
5013
"windows_i686_gnu 0.53.0",
+30
-8
Cargo.toml
+30
-8
Cargo.toml
···
1
[workspace]
2
-
members = ["apps/aqua", "services/cadet", "services/rocketman"]
3
resolver = "2"
4
5
[workspace.dependencies]
6
# Shared dependencies
7
-
tokio = { version = "1.0", features = ["rt-multi-thread", "macros"] }
8
axum = { version = "0.8", features = ["macros"] }
9
tower-http = { version = "0.6", features = ["cors"] }
10
-
sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "uuid"] }
11
serde = { version = "1.0", features = ["derive"] }
12
anyhow = "1.0"
13
serde_json = "1.0"
14
tracing = "0.1"
15
tracing-subscriber = "0.3"
16
metrics = "0.23"
17
-
reqwest = { version = "0.12", features = ["json"] }
18
url = "2.5"
19
rand = "0.8"
20
flume = "0.11"
21
async-trait = "0.1"
22
time = "0.3"
23
dotenvy = "0.15"
24
-
tokio-tungstenite = "0.24"
25
atrium-api = "0.25"
26
chrono = "0.4"
27
uuid = { version = "1.0", features = ["v4", "serde"] }
28
types = { path = "services/types" }
29
-
rocketman = { path = "services/rocketman" }
30
31
# CAR and IPLD dependencies
32
-
iroh-car = "0.4"
33
libipld = { version = "0.16", features = ["dag-cbor", "dag-json"] }
34
cid = "0.11"
35
base64 = "0.22"
36
37
# Redis for job queues and caching
38
-
redis = { version = "0.24", features = ["tokio-comp", "connection-manager"] }
···
1
[workspace]
2
+
members = ["apps/aqua", "services/cadet", "tools/teal-cli"]
3
+
default-members = ["services/types"]
4
resolver = "2"
5
6
[workspace.dependencies]
7
# Shared dependencies
8
+
tokio = { version = "1.0", features = [
9
+
"rt-multi-thread",
10
+
"macros",
11
+
"time",
12
+
"net",
13
+
"sync",
14
+
] }
15
axum = { version = "0.8", features = ["macros"] }
16
tower-http = { version = "0.6", features = ["cors"] }
17
+
sqlx = { version = "0.8", features = [
18
+
"runtime-tokio",
19
+
"postgres",
20
+
"uuid",
21
+
"tls-rustls",
22
+
] }
23
serde = { version = "1.0", features = ["derive"] }
24
anyhow = "1.0"
25
serde_json = "1.0"
26
tracing = "0.1"
27
tracing-subscriber = "0.3"
28
metrics = "0.23"
29
+
reqwest = { version = "0.12", default-features = false, features = [
30
+
"json",
31
+
"rustls-tls",
32
+
"stream",
33
+
"gzip",
34
+
] }
35
url = "2.5"
36
rand = "0.8"
37
flume = "0.11"
38
async-trait = "0.1"
39
time = "0.3"
40
dotenvy = "0.15"
41
+
tokio-tungstenite = { version = "*", default-features = false, features = [
42
+
"rustls-tls-webpki-roots",
43
+
"connect",
44
+
"handshake",
45
+
] }
46
atrium-api = "0.25"
47
chrono = "0.4"
48
uuid = { version = "1.0", features = ["v4", "serde"] }
49
types = { path = "services/types" }
50
+
rocketman = "0.2.3"
51
52
# CAR and IPLD dependencies
53
+
iroh-car = "0.5"
54
libipld = { version = "0.16", features = ["dag-cbor", "dag-json"] }
55
cid = "0.11"
56
base64 = "0.22"
57
+
atmst = "0.0.1"
58
59
# Redis for job queues and caching
60
+
redis = { version = "0.32", features = ["tokio-comp", "connection-manager"] }
+18
Cross.toml
+18
Cross.toml
···
···
1
+
[build.env]
2
+
passthrough = [
3
+
"CARGO_HOME",
4
+
"CARGO_TARGET_DIR",
5
+
"SQLX_OFFLINE",
6
+
"PKG_CONFIG_ALLOW_CROSS",
7
+
]
8
+
9
+
[target.aarch64-unknown-linux-gnu]
10
+
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main"
11
+
12
+
[target.aarch64-unknown-linux-gnu.env]
13
+
passthrough = [
14
+
"CARGO_HOME",
15
+
"CARGO_TARGET_DIR",
16
+
"SQLX_OFFLINE",
17
+
"PKG_CONFIG_ALLOW_CROSS",
18
+
]
+14
-9
README.md
+14
-9
README.md
···
29
# Install all dependencies (Node.js and Rust)
30
pnpm install
31
32
# Set up environment configuration
33
cp apps/aqua/.env.example apps/aqua/.env
34
···
90
- **Format**: `YYYYMMDDHHMMSS_description.sql` (timestamped SQL files)
91
- **Type**: Forward-only SQL migrations managed by SQLx
92
93
-
#### Database Schema
94
-
95
-
The database includes tables for:
96
-
- **Music data**: `artists`, `releases`, `recordings`, `plays`
97
-
- **User data**: `profiles`, `statii` (status records), `featured_items`
98
-
- **CAR imports**: `car_import_requests`, `car_blocks`, `car_extracted_records`
99
-
- **Analytics**: Materialized views for play counts and top charts
100
-
101
## Development
102
103
To start the development server run:
···
106
turbo dev --filter=@teal/aqua
107
```
108
109
-
Open http://localhost:3000/ with your browser to see the home page. You will need to login with Bluesky to test the posting functionality of the app. Note: if the redirect back to the app after you login isn't working correctly, you may need to replace the `127.0.0.1` with `localhost`.
110
111
### Running the full stack in docker for development
112
···
153
154
# Show lexicon change impact
155
pnpm lex:diff
156
```
157
158
See [`tools/lexicon-cli/README.md`](tools/lexicon-cli/README.md) for detailed documentation.
···
29
# Install all dependencies (Node.js and Rust)
30
pnpm install
31
32
+
# clone submodules
33
+
git submodule update --init --recursive
34
+
35
# Set up environment configuration
36
cp apps/aqua/.env.example apps/aqua/.env
37
···
93
- **Format**: `YYYYMMDDHHMMSS_description.sql` (timestamped SQL files)
94
- **Type**: Forward-only SQL migrations managed by SQLx
95
96
## Development
97
98
To start the development server run:
···
101
turbo dev --filter=@teal/aqua
102
```
103
104
+
Open http://localhost:3000/ with your browser to see the home page. Note: if the redirect back to the app after you login isn't working correctly, you may need to replace the `127.0.0.1` with `localhost`, or you may need to set up a publicly accessible endpoint for the app to post to (see below).
105
106
### Running the full stack in docker for development
107
···
148
149
# Show lexicon change impact
150
pnpm lex:diff
151
+
```
152
+
153
+
# Updating Vendored Lexicons
154
+
To update vendored lexicons (anything that's not under fm.teal), follow these steps:
155
+
```bash
156
+
cd vendor/atproto
157
+
git pull origin main
158
+
cd ../..
159
+
git add vendor/atproto
160
+
git commit -m "Update atproto lexicons to latest"
161
```
162
163
See [`tools/lexicon-cli/README.md`](tools/lexicon-cli/README.md) for detailed documentation.
+24
-14
apps/amethyst/Dockerfile
+24
-14
apps/amethyst/Dockerfile
···
18
COPY packages/lexicons/ ./packages/lexicons/
19
COPY packages/tsconfig/ ./packages/tsconfig/
20
21
# Copy the aqua app
22
COPY apps/amethyst/ ./apps/amethyst/
23
24
# Copy .env
25
COPY ../../.env ./apps/amethyst/.env
26
27
-
# Build the aqua app
28
WORKDIR /app/apps/amethyst
29
-
RUN pnpm install
30
RUN pnpm run build:web
31
32
#create the client-json
33
RUN echo '{ \
34
-
"redirect_uris": ["https://'"${CLIENT_ADDRESS}"'/auth/callback"], \
35
-
"response_types": ["code"], \
36
-
"grant_types": ["authorization_code", "refresh_token"], \
37
-
"scope": "atproto transition:generic", \
38
-
"token_endpoint_auth_method": "none", \
39
-
"application_type": "web", \
40
-
"client_id": "https://'"${CLIENT_ADDRESS}"'/client-metadata.json", \
41
-
"client_name": "teal", \
42
-
"client_uri": "https://'"${CLIENT_ADDRESS}"'", \
43
-
"dpop_bound_access_tokens": true \
44
-
}' > /app/client-metadata.json
45
46
47
FROM caddy:2.1.0-alpine AS caddy
···
50
EXPOSE 443/udp
51
COPY /apps/amethyst/Caddyfile /etc/caddy/Caddyfile
52
COPY --from=builder /app/apps/amethyst/build /srv
53
-
COPY --from=builder /app/client-metadata.json /srv/client-metadata.json
···
18
COPY packages/lexicons/ ./packages/lexicons/
19
COPY packages/tsconfig/ ./packages/tsconfig/
20
21
+
# Copy lexicons source data
22
+
COPY lexicons/ ./lexicons/
23
+
24
# Copy the aqua app
25
COPY apps/amethyst/ ./apps/amethyst/
26
27
# Copy .env
28
COPY ../../.env ./apps/amethyst/.env
29
30
+
# Install dependencies and generate lexicons
31
+
RUN cd tools/lexicon-cli && pnpm build
32
+
33
+
# Generate lexicons before building amethyst
34
+
RUN pnpm lex:gen-server
35
+
36
+
RUN pnpm install
37
+
38
+
# Build the amethyst app
39
WORKDIR /app/apps/amethyst
40
RUN pnpm run build:web
41
42
#create the client-json
43
RUN echo '{ \
44
+
"redirect_uris": ["https://'"${CLIENT_ADDRESS}"'/auth/callback"], \
45
+
"response_types": ["code"], \
46
+
"grant_types": ["authorization_code", "refresh_token"], \
47
+
"scope": "atproto transition:generic", \
48
+
"token_endpoint_auth_method": "none", \
49
+
"application_type": "web", \
50
+
"client_id": "https://'"${CLIENT_ADDRESS}"'/client-metadata.json", \
51
+
"client_name": "teal", \
52
+
"client_uri": "https://'"${CLIENT_ADDRESS}"'", \
53
+
"dpop_bound_access_tokens": true \
54
+
}' > /app/client-metadata.json
55
56
57
FROM caddy:2.1.0-alpine AS caddy
···
60
EXPOSE 443/udp
61
COPY /apps/amethyst/Caddyfile /etc/caddy/Caddyfile
62
COPY --from=builder /app/apps/amethyst/build /srv
63
+
COPY --from=builder /app/client-metadata.json /srv/client-metadata.json
+2
-2
apps/aqua/Cargo.toml
+2
-2
apps/aqua/Cargo.toml
···
19
tracing-subscriber.workspace = true
20
sqlx = { workspace = true, features = ["time"] }
21
dotenvy.workspace = true
22
-
23
types.workspace = true
24
-
chrono = "0.4.41"
25
26
# CAR import functionality
27
iroh-car.workspace = true
···
29
reqwest.workspace = true
30
url.workspace = true
31
clap = { version = "4.0", features = ["derive"] }
32
33
# Redis for job queues
34
redis.workspace = true
···
19
tracing-subscriber.workspace = true
20
sqlx = { workspace = true, features = ["time"] }
21
dotenvy.workspace = true
22
types.workspace = true
23
+
chrono.workspace = true
24
25
# CAR import functionality
26
iroh-car.workspace = true
···
28
reqwest.workspace = true
29
url.workspace = true
30
clap = { version = "4.0", features = ["derive"] }
31
+
atmst.workspace = true
32
33
# Redis for job queues
34
redis.workspace = true
+20
apps/aqua/Cross.toml
+20
apps/aqua/Cross.toml
···
···
1
+
[build.env]
2
+
passthrough = [
3
+
"CARGO_HOME",
4
+
"CARGO_TARGET_DIR",
5
+
"SQLX_OFFLINE",
6
+
"PKG_CONFIG_ALLOW_CROSS",
7
+
]
8
+
9
+
[target.aarch64-unknown-linux-gnu]
10
+
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main"
11
+
12
+
[target.aarch64-unknown-linux-gnu.env]
13
+
passthrough = ["CARGO_HOME", "CARGO_TARGET_DIR", "SQLX_OFFLINE"]
14
+
# Allow cross-compilation of native dependencies
15
+
PKG_CONFIG_ALLOW_CROSS = "1"
16
+
# Use static linking to reduce runtime dependencies
17
+
RUSTFLAGS = "-C target-feature=+crt-static -C link-arg=-s"
18
+
# Disable problematic features that might require OpenSSL
19
+
CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc"
20
+
CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"
+90
apps/aqua/Dockerfile
+90
apps/aqua/Dockerfile
···
···
1
+
# Docker build args for cross-platform builds (must be at the top)
2
+
ARG TARGETPLATFORM
3
+
ARG BUILDPLATFORM
4
+
ARG TARGETARCH
5
+
ARG TARGETOS
6
+
7
+
FROM --platform=${BUILDPLATFORM} rust:latest AS buildah
8
+
9
+
# Create appuser
10
+
ENV USER=app
11
+
ENV UID=10001
12
+
13
+
RUN adduser \
14
+
--disabled-password \
15
+
--gecos "" \
16
+
--home "/nonexistent" \
17
+
--shell "/sbin/nologin" \
18
+
--no-create-home \
19
+
--uid "${UID}" \
20
+
"${USER}"
21
+
22
+
WORKDIR /buildah
23
+
24
+
# Re-declare ARGs after FROM (Docker requirement)
25
+
ARG TARGETPLATFORM
26
+
ARG BUILDPLATFORM
27
+
ARG TARGETARCH
28
+
ARG TARGETOS
29
+
30
+
# Debug platform detection before copying files
31
+
RUN echo "DEBUG Before copy: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH BUILDPLATFORM=$BUILDPLATFORM"
32
+
33
+
COPY ./ .
34
+
35
+
# Setup lexicons and install dependencies
36
+
RUN ./scripts/setup-lexicons.sh
37
+
38
+
# Install Node.js and pnpm for lexicon generation
39
+
RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/*
40
+
RUN npm install -g pnpm
41
+
42
+
# Install dependencies and generate lexicons
43
+
RUN pnpm install
44
+
RUN cd tools/lexicon-cli && pnpm build
45
+
RUN pnpm lex:gen --rust-only
46
+
47
+
# Install cross-compilation toolchains
48
+
RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu
49
+
50
+
# Enable ARM64 architecture and install cross-compilation tools
51
+
RUN dpkg --add-architecture arm64 && \
52
+
apt-get update && \
53
+
apt-get install -y \
54
+
gcc-aarch64-linux-gnu \
55
+
libssl-dev:arm64 \
56
+
libssl-dev \
57
+
pkg-config \
58
+
&& rm -rf /var/lib/apt/lists/*
59
+
60
+
# Set up cross-compilation environment
61
+
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc
62
+
ENV PKG_CONFIG_ALLOW_CROSS=1
63
+
ENV PKG_CONFIG_PATH_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu/pkgconfig
64
+
ENV OPENSSL_DIR_aarch64_unknown_linux_gnu=/usr
65
+
ENV OPENSSL_LIB_DIR_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu
66
+
ENV OPENSSL_INCLUDE_DIR_aarch64_unknown_linux_gnu=/usr/include/openssl
67
+
68
+
69
+
# Debug platform detection and run build
70
+
RUN . ./target.sh && \
71
+
touch apps/aqua/src/main.rs && \
72
+
echo "Building for $TARGET_ARCH" && \
73
+
cargo build --release --target $RUST_TARGET --package aqua && \
74
+
cp target/$RUST_TARGET/release/aqua target/aqua
75
+
76
+
FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc
77
+
78
+
# Import from builder.
79
+
COPY --from=buildah /etc/passwd /etc/passwd
80
+
COPY --from=buildah /etc/group /etc/group
81
+
82
+
WORKDIR /app
83
+
84
+
# Copy our build
85
+
COPY --from=buildah /buildah/target/aqua ./
86
+
87
+
# Use an unprivileged user.
88
+
USER app:app
89
+
90
+
CMD ["/app/aqua"]
+62
apps/aqua/examples/did_demo.rs
+62
apps/aqua/examples/did_demo.rs
···
···
1
+
use serde_json::json;
2
+
3
+
/// Generate a DID document for did:web
4
+
fn generate_did_document(host: &str) -> serde_json::Value {
5
+
json!({
6
+
"@context": [
7
+
"https://www.w3.org/ns/did/v1",
8
+
"https://w3id.org/security/multikey/v1",
9
+
"https://w3id.org/security/suites/secp256k1-2019/v1"
10
+
],
11
+
"id": format!("did:web:{}", host),
12
+
"alsoKnownAs": [
13
+
format!("at://{}", host)
14
+
],
15
+
"service": [
16
+
{
17
+
"id": "#bsky_fg",
18
+
"type": "BskyFeedGenerator",
19
+
"serviceEndpoint": format!("https://{}", host)
20
+
},
21
+
{
22
+
"id": "#atproto_pds",
23
+
"type": "AtprotoPersonalDataServer",
24
+
"serviceEndpoint": format!("https://{}", host)
25
+
}
26
+
],
27
+
"verificationMethod": [
28
+
{
29
+
"id": format!("did:web:{}#atproto", host),
30
+
"type": "Multikey",
31
+
"controller": format!("did:web:{}", host),
32
+
"publicKeyMultibase": "z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK"
33
+
}
34
+
]
35
+
})
36
+
}
37
+
38
+
fn main() {
39
+
println!("DID Document Generation Demo");
40
+
println!("===========================\n");
41
+
42
+
let test_hosts = vec![
43
+
"localhost:3000",
44
+
"bsky.social",
45
+
"my-atproto-service.com",
46
+
"example.org:8080",
47
+
];
48
+
49
+
for host in test_hosts {
50
+
println!("DID Document for host: {}", host);
51
+
println!("URL: https://{}/.well-known/did.json", host);
52
+
println!("DID: did:web:{}", host);
53
+
println!();
54
+
55
+
let did_doc = generate_did_document(host);
56
+
println!("{}", serde_json::to_string_pretty(&did_doc).unwrap());
57
+
println!("\n{}\n", "=".repeat(80));
58
+
}
59
+
60
+
println!("The well-known endpoint /.well-known/did.json will serve this JSON structure");
61
+
println!("when accessed via HTTP GET request to your Aqua server.");
62
+
}
+275
-82
apps/aqua/src/api/mod.rs
+275
-82
apps/aqua/src/api/mod.rs
···
1
use axum::{Extension, Json, extract::Multipart, extract::Path, http::StatusCode};
2
use serde::{Deserialize, Serialize};
3
-
use tracing::{info, error};
4
-
use anyhow::Result;
5
-
use uuid;
6
-
7
-
use sys_info;
8
9
use crate::ctx::Context;
10
use crate::redis_client::RedisClient;
11
12
#[derive(Debug, Serialize, Deserialize)]
13
pub struct MetaOsInfo {
···
61
/// Get CAR import job status
62
pub async fn get_car_import_job_status(
63
Path(job_id): Path<String>,
64
-
) -> Result<Json<types::jobs::CarImportJobStatus>, (StatusCode, Json<ErrorResponse>)> {
65
-
use types::jobs::queue_keys;
66
-
67
info!("Getting status for job: {}", job_id);
68
-
69
// Parse job ID
70
let job_uuid = match uuid::Uuid::parse_str(&job_id) {
71
Ok(uuid) => uuid,
···
77
return Err((StatusCode::BAD_REQUEST, Json(error_response)));
78
}
79
};
80
-
81
// Connect to Redis
82
-
let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
83
let redis_client = match RedisClient::new(&redis_url) {
84
Ok(client) => client,
85
Err(e) => {
···
91
return Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response)));
92
}
93
};
94
-
95
// Get job status
96
-
match redis_client.get_job_status(&queue_keys::job_status_key(&job_uuid)).await {
97
-
Ok(Some(status_data)) => {
98
-
match serde_json::from_str::<types::jobs::CarImportJobStatus>(&status_data) {
99
-
Ok(status) => Ok(Json(status)),
100
-
Err(e) => {
101
-
error!("Failed to parse job status: {}", e);
102
-
let error_response = ErrorResponse {
103
-
error: "Failed to parse job status".to_string(),
104
-
details: Some(e.to_string()),
105
-
};
106
-
Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response)))
107
-
}
108
}
109
-
}
110
Ok(None) => {
111
let error_response = ErrorResponse {
112
error: "Job not found".to_string(),
···
165
mut multipart: Multipart,
166
) -> Result<Json<CarImportResponse>, StatusCode> {
167
info!("Received CAR file upload request");
168
-
169
let mut car_data: Option<Vec<u8>> = None;
170
let mut import_id: Option<String> = None;
171
let mut description: Option<String> = None;
172
-
173
// Process multipart form data
174
-
while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? {
175
let name = field.name().unwrap_or("").to_string();
176
-
177
match name.as_str() {
178
"car_file" => {
179
let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?;
···
192
}
193
}
194
}
195
-
196
let car_bytes = car_data.ok_or(StatusCode::BAD_REQUEST)?;
197
let final_import_id = import_id.unwrap_or_else(|| {
198
// Generate a unique import ID
199
format!("car-import-{}", chrono::Utc::now().timestamp())
200
});
201
-
202
// Validate CAR file format
203
match validate_car_file(&car_bytes).await {
204
Ok(_) => {
205
-
info!("CAR file validation successful for import {}", final_import_id);
206
}
207
Err(e) => {
208
error!("CAR file validation failed: {}", e);
209
return Err(StatusCode::BAD_REQUEST);
210
}
211
}
212
-
213
// Store CAR import request in database for processing
214
-
match store_car_import_request(&ctx, &final_import_id, &car_bytes, description.as_deref()).await {
215
Ok(_) => {
216
-
info!("CAR import request stored successfully: {}", final_import_id);
217
Ok(Json(CarImportResponse {
218
import_id: final_import_id,
219
status: "queued".to_string(),
···
232
axum::extract::Path(import_id): axum::extract::Path<String>,
233
) -> Result<Json<CarImportResponse>, StatusCode> {
234
match get_import_status(&ctx, &import_id).await {
235
-
Ok(Some(status)) => {
236
-
Ok(Json(CarImportResponse {
237
-
import_id,
238
-
status: status.status,
239
-
message: status.message,
240
-
}))
241
-
}
242
Ok(None) => Err(StatusCode::NOT_FOUND),
243
Err(e) => {
244
error!("Failed to get import status: {}", e);
···
248
}
249
250
async fn validate_car_file(car_data: &[u8]) -> Result<()> {
251
-
use std::io::Cursor;
252
use iroh_car::CarReader;
253
-
254
let cursor = Cursor::new(car_data);
255
let reader = CarReader::new(cursor).await?;
256
let header = reader.header();
257
-
258
// Basic validation - ensure we have at least one root CID
259
if header.roots().is_empty() {
260
return Err(anyhow::anyhow!("CAR file has no root CIDs"));
261
}
262
-
263
info!("CAR file validated: {} root CIDs", header.roots().len());
264
Ok(())
265
}
···
293
Extension(ctx): Extension<Context>,
294
Json(request): Json<FetchCarRequest>,
295
) -> Result<Json<FetchCarResponse>, (StatusCode, Json<ErrorResponse>)> {
296
-
info!("Received CAR fetch request for user: {}", request.user_identifier);
297
-
298
// Resolve user identifier to DID and PDS
299
let (user_did, pds_host) = match resolve_user_to_pds(&request.user_identifier).await {
300
Ok(result) => result,
···
302
error!("Failed to resolve user {}: {}", request.user_identifier, e);
303
let error_response = ErrorResponse {
304
error: "Failed to resolve user".to_string(),
305
-
details: if request.debug.unwrap_or(false) { Some(e.to_string()) } else { None },
306
};
307
return Err((StatusCode::BAD_REQUEST, Json(error_response)));
308
}
309
};
310
-
311
-
info!("Resolved {} to DID {} on PDS {}", request.user_identifier, user_did, pds_host);
312
-
313
// Generate import ID
314
-
let import_id = format!("pds-fetch-{}-{}",
315
-
user_did.replace(":", "-"),
316
chrono::Utc::now().timestamp()
317
);
318
-
319
// Fetch CAR file from PDS
320
match fetch_car_from_pds(&pds_host, &user_did, request.since.as_deref()).await {
321
Ok(car_data) => {
322
-
info!("Successfully fetched CAR file for {} ({} bytes)", user_did, car_data.len());
323
-
324
// Store the fetched CAR file for processing
325
-
let description = Some(format!("Fetched from PDS {} for user {}", pds_host, request.user_identifier));
326
-
match store_car_import_request(&ctx, &import_id, &car_data, description.as_deref()).await {
327
Ok(_) => {
328
info!("CAR import request stored successfully: {}", import_id);
329
Ok(Json(FetchCarResponse {
···
371
372
/// Resolve a handle to a DID using com.atproto.identity.resolveHandle
373
async fn resolve_handle_to_did(handle: &str) -> Result<String> {
374
-
let url = format!("https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", handle);
375
-
376
let response = reqwest::get(&url).await?;
377
if !response.status().is_success() {
378
-
return Err(anyhow::anyhow!("Failed to resolve handle {}: {}", handle, response.status()));
379
}
380
-
381
let json: serde_json::Value = response.json().await?;
382
-
let did = json["did"].as_str()
383
.ok_or_else(|| anyhow::anyhow!("No DID found in response for handle {}", handle))?;
384
-
385
Ok(did.to_string())
386
}
387
···
390
// For DID:plc, use the PLC directory
391
if did.starts_with("did:plc:") {
392
let url = format!("https://plc.directory/{}", did);
393
-
394
let response = reqwest::get(&url).await?;
395
if !response.status().is_success() {
396
-
return Err(anyhow::anyhow!("Failed to resolve DID {}: {}", did, response.status()));
397
}
398
-
399
let doc: serde_json::Value = response.json().await?;
400
-
401
// Find the PDS service endpoint
402
if let Some(services) = doc["service"].as_array() {
403
for service in services {
···
405
if let Some(endpoint) = service["serviceEndpoint"].as_str() {
406
// Extract hostname from URL
407
let url = url::Url::parse(endpoint)?;
408
-
let host = url.host_str()
409
-
.ok_or_else(|| anyhow::anyhow!("Invalid PDS endpoint URL: {}", endpoint))?;
410
return Ok(host.to_string());
411
}
412
}
413
}
414
}
415
-
416
-
Err(anyhow::anyhow!("No PDS service found in DID document for {}", did))
417
} else {
418
Err(anyhow::anyhow!("Unsupported DID method: {}", did))
419
}
···
421
422
/// Fetch CAR file from PDS using com.atproto.sync.getRepo
423
pub async fn fetch_car_from_pds(pds_host: &str, did: &str, since: Option<&str>) -> Result<Vec<u8>> {
424
-
let mut url = format!("https://{}/xrpc/com.atproto.sync.getRepo?did={}", pds_host, did);
425
-
426
if let Some(since_rev) = since {
427
url.push_str(&format!("&since={}", since_rev));
428
}
429
-
430
info!("Fetching CAR file from: {}", url);
431
-
432
let response = reqwest::get(&url).await?;
433
if !response.status().is_success() {
434
-
return Err(anyhow::anyhow!("Failed to fetch CAR from PDS {}: {}", pds_host, response.status()));
435
}
436
-
437
// Verify content type
438
-
let content_type = response.headers()
439
.get("content-type")
440
.and_then(|h| h.to_str().ok())
441
.unwrap_or("");
442
-
443
if !content_type.contains("application/vnd.ipld.car") {
444
return Err(anyhow::anyhow!("Unexpected content type: {}", content_type));
445
}
446
-
447
let car_data = response.bytes().await?;
448
Ok(car_data.to_vec())
449
}
···
1
+
use anyhow::Result;
2
use axum::{Extension, Json, extract::Multipart, extract::Path, http::StatusCode};
3
use serde::{Deserialize, Serialize};
4
+
use serde_json::{Value, json};
5
+
use tracing::{error, info};
6
7
use crate::ctx::Context;
8
use crate::redis_client::RedisClient;
9
+
use crate::types::CarImportJobStatus;
10
11
#[derive(Debug, Serialize, Deserialize)]
12
pub struct MetaOsInfo {
···
60
/// Get CAR import job status
61
pub async fn get_car_import_job_status(
62
Path(job_id): Path<String>,
63
+
) -> Result<Json<CarImportJobStatus>, (StatusCode, Json<ErrorResponse>)> {
64
+
use crate::types::queue_keys;
65
+
66
info!("Getting status for job: {}", job_id);
67
+
68
// Parse job ID
69
let job_uuid = match uuid::Uuid::parse_str(&job_id) {
70
Ok(uuid) => uuid,
···
76
return Err((StatusCode::BAD_REQUEST, Json(error_response)));
77
}
78
};
79
+
80
// Connect to Redis
81
+
let redis_url =
82
+
std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
83
let redis_client = match RedisClient::new(&redis_url) {
84
Ok(client) => client,
85
Err(e) => {
···
91
return Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response)));
92
}
93
};
94
+
95
// Get job status
96
+
match redis_client
97
+
.get_job_status(&queue_keys::job_status_key(&job_uuid))
98
+
.await
99
+
{
100
+
Ok(Some(status_data)) => match serde_json::from_str::<CarImportJobStatus>(&status_data) {
101
+
Ok(status) => Ok(Json(status)),
102
+
Err(e) => {
103
+
error!("Failed to parse job status: {}", e);
104
+
let error_response = ErrorResponse {
105
+
error: "Failed to parse job status".to_string(),
106
+
details: Some(e.to_string()),
107
+
};
108
+
Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response)))
109
}
110
+
},
111
Ok(None) => {
112
let error_response = ErrorResponse {
113
error: "Job not found".to_string(),
···
166
mut multipart: Multipart,
167
) -> Result<Json<CarImportResponse>, StatusCode> {
168
info!("Received CAR file upload request");
169
+
170
let mut car_data: Option<Vec<u8>> = None;
171
let mut import_id: Option<String> = None;
172
let mut description: Option<String> = None;
173
+
174
// Process multipart form data
175
+
while let Some(field) = multipart
176
+
.next_field()
177
+
.await
178
+
.map_err(|_| StatusCode::BAD_REQUEST)?
179
+
{
180
let name = field.name().unwrap_or("").to_string();
181
+
182
match name.as_str() {
183
"car_file" => {
184
let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?;
···
197
}
198
}
199
}
200
+
201
let car_bytes = car_data.ok_or(StatusCode::BAD_REQUEST)?;
202
let final_import_id = import_id.unwrap_or_else(|| {
203
// Generate a unique import ID
204
format!("car-import-{}", chrono::Utc::now().timestamp())
205
});
206
+
207
// Validate CAR file format
208
match validate_car_file(&car_bytes).await {
209
Ok(_) => {
210
+
info!(
211
+
"CAR file validation successful for import {}",
212
+
final_import_id
213
+
);
214
}
215
Err(e) => {
216
error!("CAR file validation failed: {}", e);
217
return Err(StatusCode::BAD_REQUEST);
218
}
219
}
220
+
221
// Store CAR import request in database for processing
222
+
match store_car_import_request(&ctx, &final_import_id, &car_bytes, description.as_deref()).await
223
+
{
224
Ok(_) => {
225
+
info!(
226
+
"CAR import request stored successfully: {}",
227
+
final_import_id
228
+
);
229
Ok(Json(CarImportResponse {
230
import_id: final_import_id,
231
status: "queued".to_string(),
···
244
axum::extract::Path(import_id): axum::extract::Path<String>,
245
) -> Result<Json<CarImportResponse>, StatusCode> {
246
match get_import_status(&ctx, &import_id).await {
247
+
Ok(Some(status)) => Ok(Json(CarImportResponse {
248
+
import_id,
249
+
status: status.status,
250
+
message: status.message,
251
+
})),
252
Ok(None) => Err(StatusCode::NOT_FOUND),
253
Err(e) => {
254
error!("Failed to get import status: {}", e);
···
258
}
259
260
async fn validate_car_file(car_data: &[u8]) -> Result<()> {
261
use iroh_car::CarReader;
262
+
use std::io::Cursor;
263
+
264
let cursor = Cursor::new(car_data);
265
let reader = CarReader::new(cursor).await?;
266
let header = reader.header();
267
+
268
// Basic validation - ensure we have at least one root CID
269
if header.roots().is_empty() {
270
return Err(anyhow::anyhow!("CAR file has no root CIDs"));
271
}
272
+
273
info!("CAR file validated: {} root CIDs", header.roots().len());
274
Ok(())
275
}
···
303
Extension(ctx): Extension<Context>,
304
Json(request): Json<FetchCarRequest>,
305
) -> Result<Json<FetchCarResponse>, (StatusCode, Json<ErrorResponse>)> {
306
+
info!(
307
+
"Received CAR fetch request for user: {}",
308
+
request.user_identifier
309
+
);
310
+
311
// Resolve user identifier to DID and PDS
312
let (user_did, pds_host) = match resolve_user_to_pds(&request.user_identifier).await {
313
Ok(result) => result,
···
315
error!("Failed to resolve user {}: {}", request.user_identifier, e);
316
let error_response = ErrorResponse {
317
error: "Failed to resolve user".to_string(),
318
+
details: if request.debug.unwrap_or(false) {
319
+
Some(e.to_string())
320
+
} else {
321
+
None
322
+
},
323
};
324
return Err((StatusCode::BAD_REQUEST, Json(error_response)));
325
}
326
};
327
+
328
+
info!(
329
+
"Resolved {} to DID {} on PDS {}",
330
+
request.user_identifier, user_did, pds_host
331
+
);
332
+
333
// Generate import ID
334
+
let import_id = format!(
335
+
"pds-fetch-{}-{}",
336
+
user_did.replace(":", "-"),
337
chrono::Utc::now().timestamp()
338
);
339
+
340
// Fetch CAR file from PDS
341
match fetch_car_from_pds(&pds_host, &user_did, request.since.as_deref()).await {
342
Ok(car_data) => {
343
+
info!(
344
+
"Successfully fetched CAR file for {} ({} bytes)",
345
+
user_did,
346
+
car_data.len()
347
+
);
348
+
349
// Store the fetched CAR file for processing
350
+
let description = Some(format!(
351
+
"Fetched from PDS {} for user {}",
352
+
pds_host, request.user_identifier
353
+
));
354
+
match store_car_import_request(&ctx, &import_id, &car_data, description.as_deref())
355
+
.await
356
+
{
357
Ok(_) => {
358
info!("CAR import request stored successfully: {}", import_id);
359
Ok(Json(FetchCarResponse {
···
401
402
/// Resolve a handle to a DID using com.atproto.identity.resolveHandle
403
async fn resolve_handle_to_did(handle: &str) -> Result<String> {
404
+
let url = format!(
405
+
"https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}",
406
+
handle
407
+
);
408
+
409
let response = reqwest::get(&url).await?;
410
if !response.status().is_success() {
411
+
return Err(anyhow::anyhow!(
412
+
"Failed to resolve handle {}: {}",
413
+
handle,
414
+
response.status()
415
+
));
416
}
417
+
418
let json: serde_json::Value = response.json().await?;
419
+
let did = json["did"]
420
+
.as_str()
421
.ok_or_else(|| anyhow::anyhow!("No DID found in response for handle {}", handle))?;
422
+
423
Ok(did.to_string())
424
}
425
···
428
// For DID:plc, use the PLC directory
429
if did.starts_with("did:plc:") {
430
let url = format!("https://plc.directory/{}", did);
431
+
432
let response = reqwest::get(&url).await?;
433
if !response.status().is_success() {
434
+
return Err(anyhow::anyhow!(
435
+
"Failed to resolve DID {}: {}",
436
+
did,
437
+
response.status()
438
+
));
439
}
440
+
441
let doc: serde_json::Value = response.json().await?;
442
+
443
// Find the PDS service endpoint
444
if let Some(services) = doc["service"].as_array() {
445
for service in services {
···
447
if let Some(endpoint) = service["serviceEndpoint"].as_str() {
448
// Extract hostname from URL
449
let url = url::Url::parse(endpoint)?;
450
+
let host = url.host_str().ok_or_else(|| {
451
+
anyhow::anyhow!("Invalid PDS endpoint URL: {}", endpoint)
452
+
})?;
453
return Ok(host.to_string());
454
}
455
}
456
}
457
}
458
+
459
+
Err(anyhow::anyhow!(
460
+
"No PDS service found in DID document for {}",
461
+
did
462
+
))
463
} else {
464
Err(anyhow::anyhow!("Unsupported DID method: {}", did))
465
}
···
467
468
/// Fetch CAR file from PDS using com.atproto.sync.getRepo
469
pub async fn fetch_car_from_pds(pds_host: &str, did: &str, since: Option<&str>) -> Result<Vec<u8>> {
470
+
let mut url = format!(
471
+
"https://{}/xrpc/com.atproto.sync.getRepo?did={}",
472
+
pds_host, did
473
+
);
474
+
475
if let Some(since_rev) = since {
476
url.push_str(&format!("&since={}", since_rev));
477
}
478
+
479
info!("Fetching CAR file from: {}", url);
480
+
481
let response = reqwest::get(&url).await?;
482
if !response.status().is_success() {
483
+
return Err(anyhow::anyhow!(
484
+
"Failed to fetch CAR from PDS {}: {}",
485
+
pds_host,
486
+
response.status()
487
+
));
488
}
489
+
490
// Verify content type
491
+
let content_type = response
492
+
.headers()
493
.get("content-type")
494
.and_then(|h| h.to_str().ok())
495
.unwrap_or("");
496
+
497
if !content_type.contains("application/vnd.ipld.car") {
498
return Err(anyhow::anyhow!("Unexpected content type: {}", content_type));
499
}
500
+
501
let car_data = response.bytes().await?;
502
Ok(car_data.to_vec())
503
}
504
+
505
+
/// Generate a DID document for did:web
506
+
fn generate_did_document(host: &str, pubkey: &str) -> Value {
507
+
json!({
508
+
"@context": [
509
+
"https://www.w3.org/ns/did/v1",
510
+
"https://w3id.org/security/multikey/v1",
511
+
"https://w3id.org/security/suites/secp256k1-2019/v1"
512
+
],
513
+
"id": format!("did:web:{}", host),
514
+
"alsoKnownAs": [
515
+
format!("at://{}", host)
516
+
],
517
+
"service": [
518
+
{
519
+
"id": "#bsky_fg",
520
+
"type": "BskyFeedGenerator",
521
+
"serviceEndpoint": format!("https://{}", host)
522
+
},
523
+
{
524
+
"id": "#atproto_pds",
525
+
"type": "AtprotoPersonalDataServer",
526
+
"serviceEndpoint": format!("https://{}", host)
527
+
}
528
+
],
529
+
"verificationMethod": [
530
+
{
531
+
"id": format!("did:web:{}#atproto", host),
532
+
"type": "Multikey",
533
+
"controller": format!("did:web:{}", host),
534
+
"publicKeyMultibase": pubkey
535
+
}
536
+
]
537
+
})
538
+
}
539
+
540
+
/// Handler for /.well-known/did.json endpoint
541
+
pub async fn get_did_document(
542
+
Extension(_ctx): Extension<Context>,
543
+
) -> impl axum::response::IntoResponse {
544
+
// Get the host from environment variable or use default
545
+
let host = std::env::var("APP_HOST")
546
+
.or_else(|_| std::env::var("HOST"))
547
+
.unwrap_or_else(|_| "localhost:3000".to_string());
548
+
549
+
// get pubkey from environment variable or use default
550
+
let pubkey = std::env::var("TEST_PUBKEY").unwrap_or_else(|_| {
551
+
"z6Mkw5f8g3h4j5k6l7m8n9o0p1q2r3s4t5u6v7w8x9y0z1a2b3c4d5e6f7g8h9i".to_string()
552
+
});
553
+
554
+
let did_doc = generate_did_document(&host, &pubkey);
555
+
556
+
(
557
+
StatusCode::OK,
558
+
[("Content-Type", "application/json")],
559
+
Json(did_doc),
560
+
)
561
+
}
562
+
563
+
#[cfg(test)]
564
+
mod tests {
565
+
use super::*;
566
+
567
+
const TEST_PUBKEY: &str = "z6Mkw5f8g3h4j5k6l7m8n9o0p1q2r3s4t5u6v7w8x9y0z1a2b3c4d5e6f7g8h9i";
568
+
569
+
#[test]
570
+
fn test_generate_did_document() {
571
+
let host = "example.com";
572
+
let did_doc = generate_did_document(host, TEST_PUBKEY);
573
+
574
+
// Verify the structure of the generated DID document
575
+
assert_eq!(did_doc["id"], format!("did:web:{}", host));
576
+
assert_eq!(did_doc["alsoKnownAs"][0], format!("at://{}", host));
577
+
578
+
// Check services
579
+
let services = did_doc["service"].as_array().unwrap();
580
+
assert_eq!(services.len(), 2);
581
+
582
+
let bsky_fg = &services[0];
583
+
assert_eq!(bsky_fg["id"], "#bsky_fg");
584
+
assert_eq!(bsky_fg["type"], "BskyFeedGenerator");
585
+
assert_eq!(bsky_fg["serviceEndpoint"], format!("https://{}", host));
586
+
587
+
let atproto_pds = &services[1];
588
+
assert_eq!(atproto_pds["id"], "#atproto_pds");
589
+
assert_eq!(atproto_pds["type"], "AtprotoPersonalDataServer");
590
+
assert_eq!(atproto_pds["serviceEndpoint"], format!("https://{}", host));
591
+
592
+
// Check verification method
593
+
let verification_methods = did_doc["verificationMethod"].as_array().unwrap();
594
+
assert_eq!(verification_methods.len(), 1);
595
+
596
+
let vm = &verification_methods[0];
597
+
assert_eq!(vm["id"], format!("did:web:{}#atproto", host));
598
+
assert_eq!(vm["type"], "Multikey");
599
+
assert_eq!(vm["controller"], format!("did:web:{}", host));
600
+
assert!(vm["publicKeyMultibase"].as_str().unwrap().starts_with("z"));
601
+
}
602
+
603
+
#[test]
604
+
fn test_did_document_context() {
605
+
let host = "test.example.org";
606
+
let did_doc = generate_did_document(host, TEST_PUBKEY);
607
+
608
+
let context = did_doc["@context"].as_array().unwrap();
609
+
assert_eq!(context.len(), 3);
610
+
assert_eq!(context[0], "https://www.w3.org/ns/did/v1");
611
+
assert_eq!(context[1], "https://w3id.org/security/multikey/v1");
612
+
assert_eq!(
613
+
context[2],
614
+
"https://w3id.org/security/suites/secp256k1-2019/v1"
615
+
);
616
+
}
617
+
618
+
#[test]
619
+
fn test_different_hosts() {
620
+
// Test with different host formats
621
+
let hosts = vec![
622
+
"localhost:3000",
623
+
"bsky.social",
624
+
"example.org:8080",
625
+
"my-service.com",
626
+
];
627
+
628
+
for host in hosts {
629
+
let did_doc = generate_did_document(host, TEST_PUBKEY);
630
+
631
+
// Verify basic structure for each host
632
+
assert_eq!(did_doc["id"], format!("did:web:{}", host));
633
+
assert_eq!(did_doc["alsoKnownAs"][0], format!("at://{}", host));
634
+
635
+
let services = did_doc["service"].as_array().unwrap();
636
+
assert_eq!(services.len(), 2);
637
+
638
+
let verification_methods = did_doc["verificationMethod"].as_array().unwrap();
639
+
assert_eq!(verification_methods.len(), 1);
640
+
}
641
+
}
642
+
}
+50
-25
apps/aqua/src/main.rs
+50
-25
apps/aqua/src/main.rs
···
1
-
use axum::{Router, extract::Extension, routing::{get, post}};
2
use std::net::SocketAddr;
3
use tower_http::cors::CorsLayer;
4
-
use clap::{Arg, Command};
5
use uuid::Uuid;
6
-
use chrono::Utc;
7
8
use ctx::RawContext;
9
use repos::DataSource;
10
use repos::pg::PgDataSource;
11
-
use redis_client::RedisClient;
12
13
mod api;
14
mod ctx;
15
mod db;
16
mod repos;
17
mod xrpc;
18
-
mod redis_client;
19
20
#[tokio::main]
21
async fn main() -> Result<(), String> {
···
32
.long("import-identity-car")
33
.value_name("HANDLE_OR_DID")
34
.help("Import CAR file for a specific identity (handle or DID)")
35
-
.action(clap::ArgAction::Set)
36
)
37
.get_matches();
38
39
let db = db::init_pool().await.expect("failed to init db");
40
let pgds = PgDataSource::new(db.clone()).boxed();
41
-
let ctx = RawContext::new(pgds).build();
42
43
// Check if we should import a CAR file instead of starting the server
44
if let Some(identity) = matches.get_one::<String>("import-identity-car") {
···
50
51
let app = Router::new()
52
.route("/meta_info", get(api::get_meta_info))
53
.route("/api/car/upload", post(api::upload_car_import))
54
.route("/api/car/fetch", post(api::fetch_car_from_user))
55
-
.route("/api/car/status/{import_id}", get(api::get_car_import_status))
56
-
.route("/api/car/job-status/{job_id}", get(api::get_car_import_job_status))
57
.nest("/xrpc/", xrpc::actor::actor_routes())
58
.nest("/xrpc/", xrpc::feed::feed_routes())
59
.nest("/xrpc/", xrpc::stats::stats_routes())
···
69
}
70
71
async fn import_identity_car(_ctx: &ctx::Context, identity: &str) -> Result<(), String> {
72
-
use tracing::{info, error};
73
-
use types::jobs::{CarImportJob, CarImportJobStatus, JobStatus, queue_keys};
74
-
75
info!("Submitting CAR import job for identity: {}", identity);
76
-
77
// Connect to Redis
78
-
let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
79
-
let redis_client = RedisClient::new(&redis_url).map_err(|e| format!("Failed to connect to Redis: {}", e))?;
80
-
81
// Create job
82
let job = CarImportJob {
83
request_id: Uuid::new_v4(),
···
86
created_at: Utc::now(),
87
description: Some(format!("CLI import request for {}", identity)),
88
};
89
-
90
// Serialize job for queue
91
-
let job_data = serde_json::to_string(&job).map_err(|e| format!("Failed to serialize job: {}", e))?;
92
-
93
// Initialize job status
94
let status = CarImportJobStatus {
95
status: JobStatus::Pending,
···
99
error_message: None,
100
progress: None,
101
};
102
-
let status_data = serde_json::to_string(&status).map_err(|e| format!("Failed to serialize status: {}", e))?;
103
-
104
// Submit to queue and set initial status
105
-
match redis_client.queue_job(queue_keys::CAR_IMPORT_JOBS, &job_data).await {
106
Ok(_) => {
107
// Set initial status
108
-
if let Err(e) = redis_client.set_job_status(&queue_keys::job_status_key(&job.request_id), &status_data).await {
109
error!("Failed to set job status: {}", e);
110
}
111
-
112
info!("โ
CAR import job queued successfully!");
113
info!("Job ID: {}", job.request_id);
114
info!("Identity: {}", identity);
115
-
info!("Monitor status with: curl http://localhost:3000/api/car/status/{}", job.request_id);
116
Ok(())
117
}
118
Err(e) => {
···
1
+
use axum::{
2
+
Router,
3
+
extract::Extension,
4
+
routing::{get, post},
5
+
};
6
+
use chrono::Utc;
7
+
use clap::{Arg, Command};
8
use std::net::SocketAddr;
9
use tower_http::cors::CorsLayer;
10
use uuid::Uuid;
11
12
use ctx::RawContext;
13
+
use redis_client::RedisClient;
14
use repos::DataSource;
15
use repos::pg::PgDataSource;
16
17
mod api;
18
mod ctx;
19
mod db;
20
+
mod redis_client;
21
mod repos;
22
+
mod types;
23
mod xrpc;
24
25
#[tokio::main]
26
async fn main() -> Result<(), String> {
···
37
.long("import-identity-car")
38
.value_name("HANDLE_OR_DID")
39
.help("Import CAR file for a specific identity (handle or DID)")
40
+
.action(clap::ArgAction::Set),
41
)
42
.get_matches();
43
44
let db = db::init_pool().await.expect("failed to init db");
45
let pgds = PgDataSource::new(db.clone()).boxed();
46
+
let ctx = RawContext::new(pgds).build(); // Arc<RawContext>
47
48
// Check if we should import a CAR file instead of starting the server
49
if let Some(identity) = matches.get_one::<String>("import-identity-car") {
···
55
56
let app = Router::new()
57
.route("/meta_info", get(api::get_meta_info))
58
+
.route("/.well-known/did.json", get(api::get_did_document))
59
.route("/api/car/upload", post(api::upload_car_import))
60
.route("/api/car/fetch", post(api::fetch_car_from_user))
61
+
.route(
62
+
"/api/car/status/{import_id}",
63
+
get(api::get_car_import_status),
64
+
)
65
+
.route(
66
+
"/api/car/job-status/{job_id}",
67
+
get(api::get_car_import_job_status),
68
+
)
69
.nest("/xrpc/", xrpc::actor::actor_routes())
70
.nest("/xrpc/", xrpc::feed::feed_routes())
71
.nest("/xrpc/", xrpc::stats::stats_routes())
···
81
}
82
83
async fn import_identity_car(_ctx: &ctx::Context, identity: &str) -> Result<(), String> {
84
+
use crate::types::{CarImportJob, CarImportJobStatus, JobStatus, queue_keys};
85
+
use tracing::{error, info};
86
+
87
info!("Submitting CAR import job for identity: {}", identity);
88
+
89
// Connect to Redis
90
+
let redis_url =
91
+
std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
92
+
let redis_client =
93
+
RedisClient::new(&redis_url).map_err(|e| format!("Failed to connect to Redis: {}", e))?;
94
+
95
// Create job
96
let job = CarImportJob {
97
request_id: Uuid::new_v4(),
···
100
created_at: Utc::now(),
101
description: Some(format!("CLI import request for {}", identity)),
102
};
103
+
104
// Serialize job for queue
105
+
let job_data =
106
+
serde_json::to_string(&job).map_err(|e| format!("Failed to serialize job: {}", e))?;
107
+
108
// Initialize job status
109
let status = CarImportJobStatus {
110
status: JobStatus::Pending,
···
114
error_message: None,
115
progress: None,
116
};
117
+
let status_data =
118
+
serde_json::to_string(&status).map_err(|e| format!("Failed to serialize status: {}", e))?;
119
+
120
// Submit to queue and set initial status
121
+
match redis_client
122
+
.queue_job(queue_keys::CAR_IMPORT_JOBS, &job_data)
123
+
.await
124
+
{
125
Ok(_) => {
126
// Set initial status
127
+
if let Err(e) = redis_client
128
+
.set_job_status(&queue_keys::job_status_key(&job.request_id), &status_data)
129
+
.await
130
+
{
131
error!("Failed to set job status: {}", e);
132
}
133
+
134
info!("โ
CAR import job queued successfully!");
135
info!("Job ID: {}", job.request_id);
136
info!("Identity: {}", identity);
137
+
info!(
138
+
"Monitor status with: curl http://localhost:3000/api/car/status/{}",
139
+
job.request_id
140
+
);
141
Ok(())
142
}
143
Err(e) => {
+1
-1
apps/aqua/src/redis_client.rs
+1
-1
apps/aqua/src/redis_client.rs
+7
-5
apps/aqua/src/repos/actor_profile.rs
+7
-5
apps/aqua/src/repos/actor_profile.rs
···
9
async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>>;
10
async fn get_multiple_actor_profiles(
11
&self,
12
-
identities: &Vec<String>,
13
) -> anyhow::Result<Vec<ProfileViewData>>;
14
}
15
···
30
avatar: row.avatar,
31
banner: row.banner,
32
// chrono -> atrium time
33
-
created_at: row.created_at.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
34
description: row.description,
35
description_facets: row
36
.description_facets
37
.and_then(|v| serde_json::from_value(v).ok()),
38
did: row.did,
39
-
featured_item: None,
40
display_name: row.display_name,
41
status: row.status.and_then(|v| serde_json::from_value(v).ok()),
42
}
43
}
···
46
#[async_trait]
47
impl ActorProfileRepo for PgDataSource {
48
async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>> {
49
-
self.get_multiple_actor_profiles(&vec![identity.to_string()])
50
.await
51
.map(|p| p.first().cloned())
52
}
53
async fn get_multiple_actor_profiles(
54
&self,
55
-
identities: &Vec<String>,
56
) -> anyhow::Result<Vec<ProfileViewData>> {
57
// split identities into dids (prefixed with "did:") and handles (not prefixed) in one iteration
58
let mut dids = Vec::new();
···
9
async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>>;
10
async fn get_multiple_actor_profiles(
11
&self,
12
+
identities: &[String],
13
) -> anyhow::Result<Vec<ProfileViewData>>;
14
}
15
···
30
avatar: row.avatar,
31
banner: row.banner,
32
// chrono -> atrium time
33
+
created_at: row
34
+
.created_at
35
+
.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
36
description: row.description,
37
description_facets: row
38
.description_facets
39
.and_then(|v| serde_json::from_value(v).ok()),
40
did: row.did,
41
display_name: row.display_name,
42
+
featured_item: None,
43
status: row.status.and_then(|v| serde_json::from_value(v).ok()),
44
}
45
}
···
48
#[async_trait]
49
impl ActorProfileRepo for PgDataSource {
50
async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>> {
51
+
self.get_multiple_actor_profiles(&[identity.to_string()])
52
.await
53
.map(|p| p.first().cloned())
54
}
55
async fn get_multiple_actor_profiles(
56
&self,
57
+
identities: &[String],
58
) -> anyhow::Result<Vec<ProfileViewData>> {
59
// split identities into dids (prefixed with "did:") and handles (not prefixed) in one iteration
60
let mut dids = Vec::new();
+22
-18
apps/aqua/src/repos/feed_play.rs
+22
-18
apps/aqua/src/repos/feed_play.rs
···
8
async fn get_feed_play(&self, identity: &str) -> anyhow::Result<Option<PlayViewData>>;
9
async fn get_feed_plays_for_profile(
10
&self,
11
-
identities: &Vec<String>,
12
) -> anyhow::Result<Vec<PlayViewData>>;
13
}
14
···
49
};
50
51
Ok(Some(PlayViewData {
52
artists,
53
-
duration: row.duration.map(|d| d as i64),
54
isrc: row.isrc,
55
-
music_service_base_domain: row.music_service_base_domain,
56
origin_url: row.origin_url,
57
-
played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))),
58
-
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
59
-
release_mb_id: row.release_mbid.map(|u| u.to_string()),
60
-
release_name: row.release_name,
61
submission_client_agent: row.submission_client_agent,
62
-
track_mb_id: Some(row.rkey.clone()),
63
-
track_name: row.track_name.clone(),
64
}))
65
}
66
67
async fn get_feed_plays_for_profile(
68
&self,
69
-
identities: &Vec<String>,
70
) -> anyhow::Result<Vec<PlayViewData>> {
71
let rows = sqlx::query!(
72
r#"
···
105
};
106
107
result.push(PlayViewData {
108
artists,
109
-
duration: row.duration.map(|d| d as i64),
110
isrc: row.isrc,
111
-
music_service_base_domain: row.music_service_base_domain,
112
origin_url: row.origin_url,
113
-
played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))),
114
-
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
115
-
release_mb_id: row.release_mbid.map(|u| u.to_string()),
116
-
release_name: row.release_name,
117
submission_client_agent: row.submission_client_agent,
118
-
track_mb_id: Some(row.rkey.clone()),
119
-
track_name: row.track_name.clone(),
120
});
121
}
122
···
8
async fn get_feed_play(&self, identity: &str) -> anyhow::Result<Option<PlayViewData>>;
9
async fn get_feed_plays_for_profile(
10
&self,
11
+
identities: &[String],
12
) -> anyhow::Result<Vec<PlayViewData>>;
13
}
14
···
49
};
50
51
Ok(Some(PlayViewData {
52
+
track_name: row.track_name.clone(),
53
+
track_mb_id: row.recording_mbid.map(|u| u.to_string()),
54
+
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
55
+
duration: row.duration.map(|d| d as i64),
56
artists,
57
+
release_name: row.release_name.clone(),
58
+
release_mb_id: row.release_mbid.map(|u| u.to_string()),
59
isrc: row.isrc,
60
origin_url: row.origin_url,
61
+
music_service_base_domain: row.music_service_base_domain,
62
submission_client_agent: row.submission_client_agent,
63
+
played_time: row
64
+
.played_time
65
+
.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
66
}))
67
}
68
69
async fn get_feed_plays_for_profile(
70
&self,
71
+
identities: &[String],
72
) -> anyhow::Result<Vec<PlayViewData>> {
73
let rows = sqlx::query!(
74
r#"
···
107
};
108
109
result.push(PlayViewData {
110
+
track_name: row.track_name.clone(),
111
+
track_mb_id: row.recording_mbid.map(|u| u.to_string()),
112
+
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
113
+
duration: row.duration.map(|d| d as i64),
114
artists,
115
+
release_name: row.release_name.clone(),
116
+
release_mb_id: row.release_mbid.map(|u| u.to_string()),
117
isrc: row.isrc,
118
origin_url: row.origin_url,
119
+
music_service_base_domain: row.music_service_base_domain,
120
submission_client_agent: row.submission_client_agent,
121
+
played_time: row
122
+
.played_time
123
+
.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
124
});
125
}
126
+1
-2
apps/aqua/src/repos/mod.rs
+1
-2
apps/aqua/src/repos/mod.rs
+10
-9
apps/aqua/src/repos/stats.rs
+10
-9
apps/aqua/src/repos/stats.rs
···
85
if let (Some(mbid), Some(name)) = (row.mbid, row.name) {
86
result.push(ReleaseViewData {
87
mbid: mbid.to_string(),
88
-
89
name,
90
play_count: row.play_count.unwrap_or(0),
91
});
···
217
};
218
219
result.push(PlayViewData {
220
artists,
221
-
duration: row.duration.map(|d| d as i64),
222
isrc: row.isrc,
223
-
music_service_base_domain: row.music_service_base_domain,
224
origin_url: row.origin_url,
225
-
played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))),
226
-
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
227
-
release_mb_id: row.release_mbid.map(|u| u.to_string()),
228
-
release_name: row.release_name,
229
submission_client_agent: row.submission_client_agent,
230
-
track_mb_id: Some(row.rkey.clone()),
231
-
track_name: row.track_name.clone(),
232
});
233
}
234
···
85
if let (Some(mbid), Some(name)) = (row.mbid, row.name) {
86
result.push(ReleaseViewData {
87
mbid: mbid.to_string(),
88
name,
89
play_count: row.play_count.unwrap_or(0),
90
});
···
216
};
217
218
result.push(PlayViewData {
219
+
track_name: row.track_name.clone(),
220
+
track_mb_id: row.recording_mbid.map(|u| u.to_string()),
221
+
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
222
+
duration: row.duration.map(|d| d as i64),
223
artists,
224
+
release_name: row.release_name.clone(),
225
+
release_mb_id: row.release_mbid.map(|u| u.to_string()),
226
isrc: row.isrc,
227
origin_url: row.origin_url,
228
+
music_service_base_domain: row.music_service_base_domain,
229
submission_client_agent: row.submission_client_agent,
230
+
played_time: row
231
+
.played_time
232
+
.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
233
});
234
}
235
+51
apps/aqua/src/types/jobs.rs
+51
apps/aqua/src/types/jobs.rs
···
···
1
+
use chrono::{DateTime, Utc};
2
+
use serde::{Deserialize, Serialize};
3
+
use uuid::Uuid;
4
+
5
+
#[derive(Debug, Clone, Serialize, Deserialize)]
6
+
pub struct CarImportJob {
7
+
pub request_id: Uuid,
8
+
pub identity: String,
9
+
pub since: Option<DateTime<Utc>>,
10
+
pub created_at: DateTime<Utc>,
11
+
pub description: Option<String>,
12
+
}
13
+
14
+
#[derive(Debug, Clone, Serialize, Deserialize)]
15
+
pub struct CarImportJobStatus {
16
+
pub status: JobStatus,
17
+
pub created_at: DateTime<Utc>,
18
+
pub started_at: Option<DateTime<Utc>>,
19
+
pub completed_at: Option<DateTime<Utc>>,
20
+
pub error_message: Option<String>,
21
+
pub progress: Option<JobProgress>,
22
+
}
23
+
24
+
#[derive(Debug, Clone, Serialize, Deserialize)]
25
+
pub enum JobStatus {
26
+
Pending,
27
+
Processing,
28
+
Completed,
29
+
Failed,
30
+
Cancelled,
31
+
}
32
+
33
+
#[derive(Debug, Clone, Serialize, Deserialize)]
34
+
pub struct JobProgress {
35
+
step: String,
36
+
pub user_did: Option<String>,
37
+
pub pds_host: Option<String>,
38
+
pub car_size_bytes: Option<u64>,
39
+
pub blocks_processed: Option<u64>,
40
+
}
41
+
42
+
pub mod queue_keys {
43
+
use uuid::Uuid;
44
+
45
+
pub const CAR_IMPORT_JOBS: &str = "car_import_jobs";
46
+
pub const CAR_IMPORT_STATUS_PREFIX: &str = "car_import_status";
47
+
48
+
pub fn job_status_key(job_id: &Uuid) -> String {
49
+
format!("{}:{}", CAR_IMPORT_STATUS_PREFIX, job_id)
50
+
}
51
+
}
+1
-1
apps/aqua/src/xrpc/actor.rs
+1
-1
apps/aqua/src/xrpc/actor.rs
+17
-11
apps/aqua/src/xrpc/stats.rs
+17
-11
apps/aqua/src/xrpc/stats.rs
···
1
use crate::ctx::Context;
2
use axum::{Extension, http::StatusCode, response::IntoResponse, routing::get};
3
use serde::{Deserialize, Serialize};
4
-
use types::fm::teal::alpha::stats::defs::{ArtistViewData, ReleaseViewData};
5
use types::fm::teal::alpha::feed::defs::PlayViewData;
6
7
// mount stats routes
8
pub fn stats_routes() -> axum::Router {
9
axum::Router::new()
10
.route("/fm.teal.alpha.stats.getTopArtists", get(get_top_artists))
11
.route("/fm.teal.alpha.stats.getTopReleases", get(get_top_releases))
12
-
.route("/fm.teal.alpha.stats.getUserTopArtists", get(get_user_top_artists))
13
-
.route("/fm.teal.alpha.stats.getUserTopReleases", get(get_user_top_releases))
14
.route("/fm.teal.alpha.stats.getLatest", get(get_latest))
15
}
16
···
29
axum::extract::Query(query): axum::extract::Query<GetTopArtistsQuery>,
30
) -> Result<impl IntoResponse, (StatusCode, String)> {
31
let repo = &ctx.db;
32
-
33
match repo.get_top_artists(query.limit).await {
34
Ok(artists) => Ok(axum::Json(GetTopArtistsResponse { artists })),
35
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
51
axum::extract::Query(query): axum::extract::Query<GetTopReleasesQuery>,
52
) -> Result<impl IntoResponse, (StatusCode, String)> {
53
let repo = &ctx.db;
54
-
55
match repo.get_top_releases(query.limit).await {
56
Ok(releases) => Ok(axum::Json(GetTopReleasesResponse { releases })),
57
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
74
axum::extract::Query(query): axum::extract::Query<GetUserTopArtistsQuery>,
75
) -> Result<impl IntoResponse, (StatusCode, String)> {
76
let repo = &ctx.db;
77
-
78
if query.actor.is_empty() {
79
return Err((StatusCode::BAD_REQUEST, "actor is required".to_string()));
80
}
81
-
82
match repo.get_user_top_artists(&query.actor, query.limit).await {
83
Ok(artists) => Ok(axum::Json(GetUserTopArtistsResponse { artists })),
84
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
101
axum::extract::Query(query): axum::extract::Query<GetUserTopReleasesQuery>,
102
) -> Result<impl IntoResponse, (StatusCode, String)> {
103
let repo = &ctx.db;
104
-
105
if query.actor.is_empty() {
106
return Err((StatusCode::BAD_REQUEST, "actor is required".to_string()));
107
}
108
-
109
match repo.get_user_top_releases(&query.actor, query.limit).await {
110
Ok(releases) => Ok(axum::Json(GetUserTopReleasesResponse { releases })),
111
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
127
axum::extract::Query(query): axum::extract::Query<GetLatestQuery>,
128
) -> Result<impl IntoResponse, (StatusCode, String)> {
129
let repo = &ctx.db;
130
-
131
match repo.get_latest(query.limit).await {
132
Ok(plays) => Ok(axum::Json(GetLatestResponse { plays })),
133
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
134
}
135
-
}
···
1
use crate::ctx::Context;
2
use axum::{Extension, http::StatusCode, response::IntoResponse, routing::get};
3
use serde::{Deserialize, Serialize};
4
use types::fm::teal::alpha::feed::defs::PlayViewData;
5
+
use types::fm::teal::alpha::stats::defs::{ArtistViewData, ReleaseViewData};
6
7
// mount stats routes
8
pub fn stats_routes() -> axum::Router {
9
axum::Router::new()
10
.route("/fm.teal.alpha.stats.getTopArtists", get(get_top_artists))
11
.route("/fm.teal.alpha.stats.getTopReleases", get(get_top_releases))
12
+
.route(
13
+
"/fm.teal.alpha.stats.getUserTopArtists",
14
+
get(get_user_top_artists),
15
+
)
16
+
.route(
17
+
"/fm.teal.alpha.stats.getUserTopReleases",
18
+
get(get_user_top_releases),
19
+
)
20
.route("/fm.teal.alpha.stats.getLatest", get(get_latest))
21
}
22
···
35
axum::extract::Query(query): axum::extract::Query<GetTopArtistsQuery>,
36
) -> Result<impl IntoResponse, (StatusCode, String)> {
37
let repo = &ctx.db;
38
+
39
match repo.get_top_artists(query.limit).await {
40
Ok(artists) => Ok(axum::Json(GetTopArtistsResponse { artists })),
41
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
57
axum::extract::Query(query): axum::extract::Query<GetTopReleasesQuery>,
58
) -> Result<impl IntoResponse, (StatusCode, String)> {
59
let repo = &ctx.db;
60
+
61
match repo.get_top_releases(query.limit).await {
62
Ok(releases) => Ok(axum::Json(GetTopReleasesResponse { releases })),
63
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
80
axum::extract::Query(query): axum::extract::Query<GetUserTopArtistsQuery>,
81
) -> Result<impl IntoResponse, (StatusCode, String)> {
82
let repo = &ctx.db;
83
+
84
if query.actor.is_empty() {
85
return Err((StatusCode::BAD_REQUEST, "actor is required".to_string()));
86
}
87
+
88
match repo.get_user_top_artists(&query.actor, query.limit).await {
89
Ok(artists) => Ok(axum::Json(GetUserTopArtistsResponse { artists })),
90
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
107
axum::extract::Query(query): axum::extract::Query<GetUserTopReleasesQuery>,
108
) -> Result<impl IntoResponse, (StatusCode, String)> {
109
let repo = &ctx.db;
110
+
111
if query.actor.is_empty() {
112
return Err((StatusCode::BAD_REQUEST, "actor is required".to_string()));
113
}
114
+
115
match repo.get_user_top_releases(&query.actor, query.limit).await {
116
Ok(releases) => Ok(axum::Json(GetUserTopReleasesResponse { releases })),
117
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
133
axum::extract::Query(query): axum::extract::Query<GetLatestQuery>,
134
) -> Result<impl IntoResponse, (StatusCode, String)> {
135
let repo = &ctx.db;
136
+
137
match repo.get_latest(query.limit).await {
138
Ok(plays) => Ok(axum::Json(GetLatestResponse { plays })),
139
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
140
}
141
+
}
+82
-5
lexicons/README.md
+82
-5
lexicons/README.md
···
17
18
### Initial Setup
19
20
-
If you're cloning this repository for the first time, you'll need to initialize the submodules:
21
22
```bash
23
git submodule update --init --recursive
24
```
25
26
### Updating ATProto Lexicons
27
28
-
To update to the latest ATProto lexicons:
29
30
```bash
31
cd vendor/atproto
32
git pull origin main
33
cd ../..
···
35
git commit -m "Update atproto lexicons to latest"
36
```
37
38
### Adding Custom Lexicons
39
40
Custom lexicons should be added to the `fm.teal.alpha/` directory following the ATProto lexicon schema format. These files are tracked directly in our repository and not affected by submodule updates.
41
42
-
## Generated Files
43
-
44
-
This directory may contain generated files (`.js`, `.d.ts`, etc.) that are created by lexicon compilation tools. These are ignored by git as specified in the `.gitignore` file.
···
17
18
### Initial Setup
19
20
+
If you're cloning this repository for the first time, you'll need to initialize the submodules and create the symbolic links:
21
22
```bash
23
+
# Initialize submodules
24
git submodule update --init --recursive
25
+
26
+
# Create symbolic links to atproto lexicons
27
+
cd lexicons
28
+
ln -s ../vendor/atproto/lexicons/app app
29
+
ln -s ../vendor/atproto/lexicons/chat chat
30
+
ln -s ../vendor/atproto/lexicons/com com
31
+
ln -s ../vendor/atproto/lexicons/tools tools
32
+
cd ..
33
+
```
34
+
35
+
Or use the provided setup script:
36
+
37
+
```bash
38
+
./scripts/setup-lexicons.sh
39
```
40
41
### Updating ATProto Lexicons
42
43
+
To update to the latest ATProto lexicons, use the provided update script:
44
45
```bash
46
+
./scripts/update-lexicons.sh
47
+
```
48
+
49
+
This will:
50
+
1. Fetch the latest changes from the atproto repository
51
+
2. Show you what changed
52
+
3. Stage the submodule update for commit
53
+
54
+
Then commit the changes:
55
+
```bash
56
+
git commit -m "Update atproto lexicons to latest"
57
+
```
58
+
59
+
**Manual approach:**
60
+
```bash
61
cd vendor/atproto
62
git pull origin main
63
cd ../..
···
65
git commit -m "Update atproto lexicons to latest"
66
```
67
68
+
### Available Scripts
69
+
70
+
Two convenience scripts are available:
71
+
72
+
**Setup Script** - Handle the initial setup:
73
+
74
+
```bash
75
+
#!/bin/bash
76
+
# scripts/setup-lexicons.sh
77
+
78
+
echo "Setting up lexicons..."
79
+
80
+
# Initialize submodules
81
+
git submodule update --init --recursive
82
+
83
+
# Create symbolic links if they don't exist
84
+
cd lexicons
85
+
if [ ! -L app ]; then
86
+
ln -s ../vendor/atproto/lexicons/app app
87
+
echo "Created symlink: lexicons/app"
88
+
fi
89
+
if [ ! -L chat ]; then
90
+
ln -s ../vendor/atproto/lexicons/chat chat
91
+
echo "Created symlink: lexicons/chat"
92
+
fi
93
+
if [ ! -L com ]; then
94
+
ln -s ../vendor/atproto/lexicons/com com
95
+
echo "Created symlink: lexicons/com"
96
+
fi
97
+
if [ ! -L tools ]; then
98
+
ln -s ../vendor/atproto/lexicons/tools tools
99
+
echo "Created symlink: lexicons/tools"
100
+
fi
101
+
cd ..
102
+
103
+
echo "Lexicons setup complete!"
104
+
```
105
+
106
+
**Update Script** - Update ATProto lexicons:
107
+
108
+
```bash
109
+
#!/bin/bash
110
+
# scripts/update-lexicons.sh
111
+
112
+
# Fetches latest changes from atproto repository
113
+
# Shows what changed and stages the update for commit
114
+
./scripts/update-lexicons.sh
115
+
```
116
+
117
### Adding Custom Lexicons
118
119
Custom lexicons should be added to the `fm.teal.alpha/` directory following the ATProto lexicon schema format. These files are tracked directly in our repository and not affected by submodule updates.
120
121
+
**Note**: The symbolic links (`app`, `chat`, `com`, `tools`) are not tracked in git and will be created during setup. They are ignored in `.gitignore` to avoid conflicts.
+21
-1
lexicons/fm.teal.alpha/actor/defs.json
+21
-1
lexicons/fm.teal.alpha/actor/defs.json
···
36
},
37
"status": {
38
"type": "ref",
39
+
"ref": "#statusView"
40
},
41
"createdAt": { "type": "string", "format": "datetime" }
42
}
···
57
"avatar": {
58
"type": "string",
59
"description": "IPLD of the avatar"
60
+
}
61
+
}
62
+
},
63
+
"statusView": {
64
+
"type": "object",
65
+
"description": "A declaration of the status of the actor.",
66
+
"properties": {
67
+
"time": {
68
+
"type": "string",
69
+
"format": "datetime",
70
+
"description": "The unix timestamp of when the item was recorded"
71
+
},
72
+
"expiry": {
73
+
"type": "string",
74
+
"format": "datetime",
75
+
"description": "The unix timestamp of the expiry time of the item. If unavailable, default to 10 minutes past the start time."
76
+
},
77
+
"item": {
78
+
"type": "ref",
79
+
"ref": "fm.teal.alpha.feed.defs#playView"
80
}
81
}
82
}
+12
-1
lexicons/fm.teal.alpha/feed/play.json
+12
-1
lexicons/fm.teal.alpha/feed/play.json
···
19
},
20
"trackMbId": {
21
"type": "string",
22
"description": "The Musicbrainz ID of the track"
23
},
24
"recordingMbId": {
···
86
"type": "string",
87
"format": "datetime",
88
"description": "The unix timestamp of when the track was played"
89
+
},
90
+
"trackDiscriminant": {
91
+
"type": "string",
92
+
"maxLength": 128,
93
+
"maxGraphemes": 1280,
94
+
"description": "Distinguishing information for track variants (e.g. 'Acoustic Version', 'Live at Wembley', 'Radio Edit', 'Demo'). Used to differentiate between different versions of the same base track while maintaining grouping capabilities."
95
+
},
96
+
"releaseDiscriminant": {
97
+
"type": "string",
98
+
"maxLength": 128,
99
+
"maxGraphemes": 1280,
100
+
"description": "Distinguishing information for release variants (e.g. 'Deluxe Edition', 'Remastered', '2023 Remaster', 'Special Edition'). Used to differentiate between different versions of the same base release while maintaining grouping capabilities."
101
}
102
}
103
}
+24
lexicons/fm.teal.alpha/richtext/facet.json
+24
lexicons/fm.teal.alpha/richtext/facet.json
···
···
1
+
{
2
+
"lexicon": 1,
3
+
"id": "fm.teal.alpha.richtext.facet",
4
+
"defs": {
5
+
"main": {
6
+
"type": "object",
7
+
"description": "Annotation of a sub-string within rich text.",
8
+
"required": ["index", "features"],
9
+
"properties": {
10
+
"index": { "type": "ref", "ref": "app.bsky.richtext.facet#byteSlice" },
11
+
"features": {
12
+
"type": "array",
13
+
"items": {
14
+
"type": "union",
15
+
"refs": [
16
+
"app.bsky.richtext.facet#mention",
17
+
"app.bsky.richtext.facet#link"
18
+
]
19
+
}
20
+
}
21
+
}
22
+
}
23
+
}
24
+
}
+226
migrations/20241220000001_initial_schema.sql
+226
migrations/20241220000001_initial_schema.sql
···
···
1
+
-- Initial comprehensive schema for Teal music platform
2
+
-- Based on services/cadet/sql/base.sql
3
+
4
+
CREATE TABLE artists (
5
+
mbid UUID PRIMARY KEY,
6
+
name TEXT NOT NULL,
7
+
play_count INTEGER DEFAULT 0
8
+
);
9
+
10
+
-- releases are synologous to 'albums'
11
+
CREATE TABLE releases (
12
+
mbid UUID PRIMARY KEY,
13
+
name TEXT NOT NULL,
14
+
play_count INTEGER DEFAULT 0
15
+
);
16
+
17
+
-- recordings are synologous to 'tracks' BUT tracks can be in multiple releases!
18
+
CREATE TABLE recordings (
19
+
mbid UUID PRIMARY KEY,
20
+
name TEXT NOT NULL,
21
+
play_count INTEGER DEFAULT 0
22
+
);
23
+
24
+
CREATE TABLE plays (
25
+
uri TEXT PRIMARY KEY,
26
+
did TEXT NOT NULL,
27
+
rkey TEXT NOT NULL,
28
+
cid TEXT NOT NULL,
29
+
isrc TEXT,
30
+
duration INTEGER,
31
+
track_name TEXT NOT NULL,
32
+
played_time TIMESTAMP WITH TIME ZONE,
33
+
processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
34
+
release_mbid UUID,
35
+
release_name TEXT,
36
+
recording_mbid UUID,
37
+
submission_client_agent TEXT,
38
+
music_service_base_domain TEXT,
39
+
origin_url TEXT,
40
+
FOREIGN KEY (release_mbid) REFERENCES releases (mbid),
41
+
FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid)
42
+
);
43
+
44
+
CREATE INDEX idx_plays_release_mbid ON plays (release_mbid);
45
+
CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid);
46
+
CREATE INDEX idx_plays_played_time ON plays (played_time);
47
+
CREATE INDEX idx_plays_did ON plays (did);
48
+
49
+
CREATE TABLE play_to_artists (
50
+
play_uri TEXT, -- references plays(uri)
51
+
artist_mbid UUID REFERENCES artists (mbid),
52
+
artist_name TEXT, -- storing here for ease of use when joining
53
+
PRIMARY KEY (play_uri, artist_mbid),
54
+
FOREIGN KEY (play_uri) REFERENCES plays (uri)
55
+
);
56
+
57
+
CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid);
58
+
59
+
-- Profiles table
60
+
CREATE TABLE profiles (
61
+
did TEXT PRIMARY KEY,
62
+
handle TEXT,
63
+
display_name TEXT,
64
+
description TEXT,
65
+
description_facets JSONB,
66
+
avatar TEXT, -- IPLD of the image, bafy...
67
+
banner TEXT,
68
+
created_at TIMESTAMP WITH TIME ZONE
69
+
);
70
+
71
+
-- User featured items table
72
+
CREATE TABLE featured_items (
73
+
did TEXT PRIMARY KEY,
74
+
mbid TEXT NOT NULL,
75
+
type TEXT NOT NULL
76
+
);
77
+
78
+
-- Statii table (status records)
79
+
CREATE TABLE statii (
80
+
uri TEXT PRIMARY KEY,
81
+
did TEXT NOT NULL,
82
+
rkey TEXT NOT NULL,
83
+
cid TEXT NOT NULL,
84
+
record JSONB NOT NULL,
85
+
indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
86
+
);
87
+
88
+
CREATE INDEX idx_statii_did_rkey ON statii (did, rkey);
89
+
90
+
-- Materialized view for artists' play counts
91
+
CREATE MATERIALIZED VIEW mv_artist_play_counts AS
92
+
SELECT
93
+
a.mbid AS artist_mbid,
94
+
a.name AS artist_name,
95
+
COUNT(p.uri) AS play_count
96
+
FROM
97
+
artists a
98
+
LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
99
+
LEFT JOIN plays p ON p.uri = pta.play_uri
100
+
GROUP BY
101
+
a.mbid,
102
+
a.name;
103
+
104
+
CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid);
105
+
106
+
-- Materialized view for releases' play counts
107
+
CREATE MATERIALIZED VIEW mv_release_play_counts AS
108
+
SELECT
109
+
r.mbid AS release_mbid,
110
+
r.name AS release_name,
111
+
COUNT(p.uri) AS play_count
112
+
FROM
113
+
releases r
114
+
LEFT JOIN plays p ON p.release_mbid = r.mbid
115
+
GROUP BY
116
+
r.mbid,
117
+
r.name;
118
+
119
+
CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid);
120
+
121
+
-- Materialized view for recordings' play counts
122
+
CREATE MATERIALIZED VIEW mv_recording_play_counts AS
123
+
SELECT
124
+
rec.mbid AS recording_mbid,
125
+
rec.name AS recording_name,
126
+
COUNT(p.uri) AS play_count
127
+
FROM
128
+
recordings rec
129
+
LEFT JOIN plays p ON p.recording_mbid = rec.mbid
130
+
GROUP BY
131
+
rec.mbid,
132
+
rec.name;
133
+
134
+
CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid);
135
+
136
+
-- Global play count materialized view
137
+
CREATE MATERIALIZED VIEW mv_global_play_count AS
138
+
SELECT
139
+
COUNT(uri) AS total_plays,
140
+
COUNT(DISTINCT did) AS unique_listeners
141
+
FROM plays;
142
+
143
+
CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays);
144
+
145
+
-- Top artists in the last 30 days
146
+
CREATE MATERIALIZED VIEW mv_top_artists_30days AS
147
+
SELECT
148
+
a.mbid AS artist_mbid,
149
+
a.name AS artist_name,
150
+
COUNT(p.uri) AS play_count
151
+
FROM artists a
152
+
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
153
+
INNER JOIN plays p ON p.uri = pta.play_uri
154
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
155
+
GROUP BY a.mbid, a.name
156
+
ORDER BY COUNT(p.uri) DESC;
157
+
158
+
-- Top releases in the last 30 days
159
+
CREATE MATERIALIZED VIEW mv_top_releases_30days AS
160
+
SELECT
161
+
r.mbid AS release_mbid,
162
+
r.name AS release_name,
163
+
COUNT(p.uri) AS play_count
164
+
FROM releases r
165
+
INNER JOIN plays p ON p.release_mbid = r.mbid
166
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
167
+
GROUP BY r.mbid, r.name
168
+
ORDER BY COUNT(p.uri) DESC;
169
+
170
+
-- Top artists for user in the last 30 days
171
+
CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS
172
+
SELECT
173
+
prof.did,
174
+
a.mbid AS artist_mbid,
175
+
a.name AS artist_name,
176
+
COUNT(p.uri) AS play_count
177
+
FROM artists a
178
+
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
179
+
INNER JOIN plays p ON p.uri = pta.play_uri
180
+
INNER JOIN profiles prof ON prof.did = p.did
181
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
182
+
GROUP BY prof.did, a.mbid, a.name
183
+
ORDER BY COUNT(p.uri) DESC;
184
+
185
+
-- Top artists for user in the last 7 days
186
+
CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS
187
+
SELECT
188
+
prof.did,
189
+
a.mbid AS artist_mbid,
190
+
a.name AS artist_name,
191
+
COUNT(p.uri) AS play_count
192
+
FROM artists a
193
+
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
194
+
INNER JOIN plays p ON p.uri = pta.play_uri
195
+
INNER JOIN profiles prof ON prof.did = p.did
196
+
WHERE p.played_time >= NOW() - INTERVAL '7 days'
197
+
GROUP BY prof.did, a.mbid, a.name
198
+
ORDER BY COUNT(p.uri) DESC;
199
+
200
+
-- Top releases for user in the last 30 days
201
+
CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS
202
+
SELECT
203
+
prof.did,
204
+
r.mbid AS release_mbid,
205
+
r.name AS release_name,
206
+
COUNT(p.uri) AS play_count
207
+
FROM releases r
208
+
INNER JOIN plays p ON p.release_mbid = r.mbid
209
+
INNER JOIN profiles prof ON prof.did = p.did
210
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
211
+
GROUP BY prof.did, r.mbid, r.name
212
+
ORDER BY COUNT(p.uri) DESC;
213
+
214
+
-- Top releases for user in the last 7 days
215
+
CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS
216
+
SELECT
217
+
prof.did,
218
+
r.mbid AS release_mbid,
219
+
r.name AS release_name,
220
+
COUNT(p.uri) AS play_count
221
+
FROM releases r
222
+
INNER JOIN plays p ON p.release_mbid = r.mbid
223
+
INNER JOIN profiles prof ON prof.did = p.did
224
+
WHERE p.played_time >= NOW() - INTERVAL '7 days'
225
+
GROUP BY prof.did, r.mbid, r.name
226
+
ORDER BY COUNT(p.uri) DESC;
+59
migrations/20241220000002_car_import_tables.sql
+59
migrations/20241220000002_car_import_tables.sql
···
···
1
+
-- CAR import functionality tables
2
+
-- For handling AT Protocol CAR file imports and processing
3
+
4
+
-- Tracks uploaded CAR files that are queued for processing
5
+
CREATE TABLE IF NOT EXISTS car_import_requests (
6
+
import_id TEXT PRIMARY KEY,
7
+
car_data_base64 TEXT NOT NULL,
8
+
status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed
9
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
10
+
processed_at TIMESTAMP WITH TIME ZONE,
11
+
error_message TEXT,
12
+
file_size_bytes INTEGER,
13
+
block_count INTEGER,
14
+
extracted_records_count INTEGER DEFAULT 0
15
+
);
16
+
17
+
CREATE INDEX idx_car_import_requests_status ON car_import_requests (status);
18
+
CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at);
19
+
20
+
-- Tracks raw IPLD blocks extracted from CAR files
21
+
CREATE TABLE IF NOT EXISTS car_blocks (
22
+
cid TEXT PRIMARY KEY,
23
+
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
24
+
block_data BYTEA NOT NULL,
25
+
decoded_successfully BOOLEAN DEFAULT FALSE,
26
+
collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc.
27
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
28
+
);
29
+
30
+
CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id);
31
+
CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type);
32
+
33
+
-- Tracks records extracted from CAR imports that were successfully processed
34
+
CREATE TABLE IF NOT EXISTS car_extracted_records (
35
+
id SERIAL PRIMARY KEY,
36
+
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
37
+
cid TEXT NOT NULL REFERENCES car_blocks(cid),
38
+
collection_type TEXT NOT NULL,
39
+
record_uri TEXT, -- AT URI if applicable (e.g., for play records)
40
+
synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123')
41
+
rkey TEXT,
42
+
extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
43
+
processing_notes TEXT
44
+
);
45
+
46
+
CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id);
47
+
CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type);
48
+
CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri);
49
+
50
+
-- Tracks import metadata and commit information
51
+
CREATE TABLE IF NOT EXISTS car_import_metadata (
52
+
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
53
+
metadata_key TEXT NOT NULL,
54
+
metadata_value JSONB NOT NULL,
55
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
56
+
PRIMARY KEY (import_id, metadata_key)
57
+
);
58
+
59
+
CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);
+112
migrations/20241220000003_artists_without_mbids.sql
+112
migrations/20241220000003_artists_without_mbids.sql
···
···
1
+
-- Migration to support artists without MusicBrainz IDs
2
+
-- This allows the system to comply with the Teal lexicon where only trackName is required
3
+
4
+
-- Add a field to plays table to store raw artist names for records without MBIDs
5
+
ALTER TABLE plays ADD COLUMN artist_names_raw JSONB;
6
+
7
+
-- Create a new artists table that doesn't require MBID as primary key
8
+
CREATE TABLE artists_extended (
9
+
id SERIAL PRIMARY KEY,
10
+
mbid UUID UNIQUE, -- Optional MusicBrainz ID
11
+
name TEXT NOT NULL,
12
+
name_normalized TEXT GENERATED ALWAYS AS (LOWER(TRIM(name))) STORED,
13
+
play_count INTEGER DEFAULT 0,
14
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
15
+
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
16
+
);
17
+
18
+
-- Create index for efficient lookups
19
+
CREATE INDEX idx_artists_extended_mbid ON artists_extended (mbid) WHERE mbid IS NOT NULL;
20
+
CREATE INDEX idx_artists_extended_name_normalized ON artists_extended (name_normalized);
21
+
CREATE UNIQUE INDEX idx_artists_extended_name_unique ON artists_extended (name_normalized) WHERE mbid IS NULL;
22
+
23
+
-- Create a new junction table that can handle both MBID and non-MBID artists
24
+
CREATE TABLE play_to_artists_extended (
25
+
play_uri TEXT NOT NULL REFERENCES plays(uri),
26
+
artist_id INTEGER NOT NULL REFERENCES artists_extended(id),
27
+
artist_name TEXT NOT NULL, -- Denormalized for performance
28
+
PRIMARY KEY (play_uri, artist_id)
29
+
);
30
+
31
+
CREATE INDEX idx_play_to_artists_extended_artist ON play_to_artists_extended (artist_id);
32
+
33
+
-- Migrate existing data from old tables to new structure
34
+
INSERT INTO artists_extended (mbid, name, play_count)
35
+
SELECT mbid, name, play_count FROM artists;
36
+
37
+
INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name)
38
+
SELECT
39
+
pta.play_uri,
40
+
ae.id,
41
+
pta.artist_name
42
+
FROM play_to_artists pta
43
+
JOIN artists_extended ae ON ae.mbid = pta.artist_mbid;
44
+
45
+
-- Update materialized views to use new structure
46
+
DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts;
47
+
CREATE MATERIALIZED VIEW mv_artist_play_counts AS
48
+
SELECT
49
+
ae.id AS artist_id,
50
+
ae.mbid AS artist_mbid,
51
+
ae.name AS artist_name,
52
+
COUNT(p.uri) AS play_count
53
+
FROM
54
+
artists_extended ae
55
+
LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
56
+
LEFT JOIN plays p ON p.uri = ptae.play_uri
57
+
GROUP BY
58
+
ae.id, ae.mbid, ae.name;
59
+
60
+
CREATE UNIQUE INDEX idx_mv_artist_play_counts_new ON mv_artist_play_counts (artist_id);
61
+
62
+
-- Update other materialized views that reference artists
63
+
DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_30days;
64
+
CREATE MATERIALIZED VIEW mv_top_artists_30days AS
65
+
SELECT
66
+
ae.id AS artist_id,
67
+
ae.mbid AS artist_mbid,
68
+
ae.name AS artist_name,
69
+
COUNT(p.uri) AS play_count
70
+
FROM artists_extended ae
71
+
INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
72
+
INNER JOIN plays p ON p.uri = ptae.play_uri
73
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
74
+
GROUP BY ae.id, ae.mbid, ae.name
75
+
ORDER BY COUNT(p.uri) DESC;
76
+
77
+
DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_30days;
78
+
CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS
79
+
SELECT
80
+
prof.did,
81
+
ae.id AS artist_id,
82
+
ae.mbid AS artist_mbid,
83
+
ae.name AS artist_name,
84
+
COUNT(p.uri) AS play_count
85
+
FROM artists_extended ae
86
+
INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
87
+
INNER JOIN plays p ON p.uri = ptae.play_uri
88
+
INNER JOIN profiles prof ON prof.did = p.did
89
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
90
+
GROUP BY prof.did, ae.id, ae.mbid, ae.name
91
+
ORDER BY COUNT(p.uri) DESC;
92
+
93
+
DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_7days;
94
+
CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS
95
+
SELECT
96
+
prof.did,
97
+
ae.id AS artist_id,
98
+
ae.mbid AS artist_mbid,
99
+
ae.name AS artist_name,
100
+
COUNT(p.uri) AS play_count
101
+
FROM artists_extended ae
102
+
INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
103
+
INNER JOIN plays p ON p.uri = ptae.play_uri
104
+
INNER JOIN profiles prof ON prof.did = p.did
105
+
WHERE p.played_time >= NOW() - INTERVAL '7 days'
106
+
GROUP BY prof.did, ae.id, ae.mbid, ae.name
107
+
ORDER BY COUNT(p.uri) DESC;
108
+
109
+
-- Comment explaining the migration strategy
110
+
COMMENT ON TABLE artists_extended IS 'Extended artists table that supports both MusicBrainz and non-MusicBrainz artists. Uses serial ID as primary key with optional MBID.';
111
+
COMMENT ON TABLE play_to_artists_extended IS 'Junction table linking plays to artists using the new artists_extended table structure.';
112
+
COMMENT ON COLUMN plays.artist_names_raw IS 'Raw artist names as JSON array for plays without MusicBrainz data, used as fallback when artist relationships cannot be established.';
+76
migrations/20241220000004_synthetic_mbids.sql
+76
migrations/20241220000004_synthetic_mbids.sql
···
···
1
+
-- Migration to support synthetic MBIDs for artists without MusicBrainz data
2
+
-- This ensures all artists have some form of ID while maintaining uniqueness
3
+
4
+
-- Enable UUID extension for v5 UUID generation
5
+
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
6
+
7
+
-- Add a column to track MBID type (musicbrainz, synthetic, unknown)
8
+
ALTER TABLE artists_extended ADD COLUMN mbid_type TEXT DEFAULT 'unknown' NOT NULL;
9
+
10
+
-- Add check constraint for valid MBID types
11
+
ALTER TABLE artists_extended ADD CONSTRAINT chk_mbid_type
12
+
CHECK (mbid_type IN ('musicbrainz', 'synthetic', 'unknown'));
13
+
14
+
-- Update existing records to set proper MBID type
15
+
UPDATE artists_extended SET mbid_type = 'musicbrainz' WHERE mbid IS NOT NULL;
16
+
17
+
-- Drop the unique constraint on name_normalized for null MBIDs since we'll handle duplicates differently
18
+
DROP INDEX IF EXISTS idx_artists_extended_name_unique;
19
+
20
+
-- Add index for efficient querying by MBID type
21
+
CREATE INDEX idx_artists_extended_mbid_type ON artists_extended (mbid_type);
22
+
23
+
-- Create a view to easily work with different artist types
24
+
CREATE VIEW artists_with_type AS
25
+
SELECT
26
+
id,
27
+
mbid,
28
+
name,
29
+
mbid_type,
30
+
play_count,
31
+
created_at,
32
+
updated_at,
33
+
-- For synthetic MBIDs, we can show the source name used for generation
34
+
CASE
35
+
WHEN mbid_type = 'synthetic' THEN 'Generated from: ' || name
36
+
WHEN mbid_type = 'musicbrainz' THEN 'MusicBrainz: ' || mbid::text
37
+
ELSE 'No MBID available'
38
+
END as mbid_info
39
+
FROM artists_extended;
40
+
41
+
-- Update materialized views to include MBID type information
42
+
DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts;
43
+
CREATE MATERIALIZED VIEW mv_artist_play_counts AS
44
+
SELECT
45
+
ae.id AS artist_id,
46
+
ae.mbid AS artist_mbid,
47
+
ae.name AS artist_name,
48
+
ae.mbid_type,
49
+
COUNT(p.uri) AS play_count
50
+
FROM
51
+
artists_extended ae
52
+
LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
53
+
LEFT JOIN plays p ON p.uri = ptae.play_uri
54
+
GROUP BY
55
+
ae.id, ae.mbid, ae.name, ae.mbid_type;
56
+
57
+
CREATE UNIQUE INDEX idx_mv_artist_play_counts_with_type ON mv_artist_play_counts (artist_id);
58
+
59
+
-- Add comments explaining the synthetic MBID system
60
+
COMMENT ON COLUMN artists_extended.mbid_type IS 'Type of MBID: musicbrainz (real), synthetic (generated), or unknown (legacy data)';
61
+
COMMENT ON COLUMN artists_extended.mbid IS 'MusicBrainz ID (for musicbrainz type) or synthetic UUID (for synthetic type)';
62
+
COMMENT ON VIEW artists_with_type IS 'View that provides human-readable information about artist MBID sources';
63
+
64
+
-- Add a function to generate synthetic MBIDs
65
+
CREATE OR REPLACE FUNCTION generate_synthetic_mbid(artist_name TEXT) RETURNS UUID AS $$
66
+
DECLARE
67
+
namespace_uuid UUID := '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; -- DNS namespace
68
+
result_uuid UUID;
69
+
BEGIN
70
+
-- Generate deterministic UUID v5 based on artist name
71
+
SELECT uuid_generate_v5(namespace_uuid, artist_name) INTO result_uuid;
72
+
RETURN result_uuid;
73
+
END;
74
+
$$ LANGUAGE plpgsql IMMUTABLE;
75
+
76
+
COMMENT ON FUNCTION generate_synthetic_mbid IS 'Generates a deterministic UUID v5 for artist names without MusicBrainz IDs';
+101
migrations/20241220000005_fuzzy_matching.sql
+101
migrations/20241220000005_fuzzy_matching.sql
···
···
1
+
-- Migration to add fuzzy text matching capabilities
2
+
-- This enables better artist name matching using trigram similarity
3
+
4
+
-- Enable pg_trgm extension for trigram similarity matching
5
+
CREATE EXTENSION IF NOT EXISTS pg_trgm;
6
+
7
+
-- Create indexes for efficient trigram matching on artist names
8
+
CREATE INDEX idx_artists_extended_name_trgm ON artists_extended USING gin (name gin_trgm_ops);
9
+
CREATE INDEX idx_artists_extended_name_normalized_trgm ON artists_extended USING gin (name_normalized gin_trgm_ops);
10
+
11
+
-- Create a function to calculate comprehensive artist similarity
12
+
CREATE OR REPLACE FUNCTION calculate_artist_similarity(
13
+
input_name TEXT,
14
+
existing_name TEXT,
15
+
input_album TEXT DEFAULT NULL,
16
+
existing_album TEXT DEFAULT NULL
17
+
) RETURNS FLOAT AS $$
18
+
DECLARE
19
+
name_similarity FLOAT;
20
+
album_similarity FLOAT := 0.0;
21
+
final_score FLOAT;
22
+
BEGIN
23
+
-- Calculate trigram similarity for artist names
24
+
name_similarity := similarity(LOWER(TRIM(input_name)), LOWER(TRIM(existing_name)));
25
+
26
+
-- Boost for exact matches after normalization
27
+
IF LOWER(TRIM(regexp_replace(input_name, '[^a-zA-Z0-9\s]', '', 'g'))) =
28
+
LOWER(TRIM(regexp_replace(existing_name, '[^a-zA-Z0-9\s]', '', 'g'))) THEN
29
+
name_similarity := GREATEST(name_similarity, 0.95);
30
+
END IF;
31
+
32
+
-- Factor in album similarity if both are provided
33
+
IF input_album IS NOT NULL AND existing_album IS NOT NULL THEN
34
+
album_similarity := similarity(LOWER(TRIM(input_album)), LOWER(TRIM(existing_album)));
35
+
-- Weight: 80% name, 20% album
36
+
final_score := (name_similarity * 0.8) + (album_similarity * 0.2);
37
+
ELSE
38
+
final_score := name_similarity;
39
+
END IF;
40
+
41
+
RETURN final_score;
42
+
END;
43
+
$$ LANGUAGE plpgsql IMMUTABLE;
44
+
45
+
-- Create a view for fuzzy artist matching with confidence scores
46
+
CREATE VIEW fuzzy_artist_matches AS
47
+
SELECT DISTINCT
48
+
ae1.id as query_artist_id,
49
+
ae1.name as query_artist_name,
50
+
ae1.mbid_type as query_mbid_type,
51
+
ae2.id as match_artist_id,
52
+
ae2.name as match_artist_name,
53
+
ae2.mbid as match_mbid,
54
+
ae2.mbid_type as match_mbid_type,
55
+
similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as name_similarity,
56
+
CASE
57
+
WHEN ae2.mbid_type = 'musicbrainz' THEN 'upgrade_to_mb'
58
+
WHEN ae1.mbid_type = 'musicbrainz' AND ae2.mbid_type = 'synthetic' THEN 'consolidate_to_mb'
59
+
ELSE 'merge_synthetic'
60
+
END as match_action
61
+
FROM artists_extended ae1
62
+
CROSS JOIN artists_extended ae2
63
+
WHERE ae1.id != ae2.id
64
+
AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) > 0.8
65
+
AND (
66
+
ae1.mbid_type = 'synthetic' OR ae2.mbid_type = 'musicbrainz'
67
+
);
68
+
69
+
-- Add comments
70
+
COMMENT ON EXTENSION pg_trgm IS 'Trigram extension for fuzzy text matching';
71
+
COMMENT ON INDEX idx_artists_extended_name_trgm IS 'GIN index for trigram similarity on artist names';
72
+
COMMENT ON FUNCTION calculate_artist_similarity IS 'Calculates similarity score between artists considering name and optional album context';
73
+
COMMENT ON VIEW fuzzy_artist_matches IS 'Shows potential artist matches with confidence scores and recommended actions';
74
+
75
+
-- Create a function to suggest artist consolidations
76
+
CREATE OR REPLACE FUNCTION suggest_artist_consolidations(min_similarity FLOAT DEFAULT 0.9)
77
+
RETURNS TABLE(
78
+
action TEXT,
79
+
synthetic_artist TEXT,
80
+
target_artist TEXT,
81
+
similarity_score FLOAT,
82
+
synthetic_plays INTEGER,
83
+
target_plays INTEGER
84
+
) AS $$
85
+
BEGIN
86
+
RETURN QUERY
87
+
SELECT
88
+
fam.match_action as action,
89
+
fam.query_artist_name as synthetic_artist,
90
+
fam.match_artist_name as target_artist,
91
+
fam.name_similarity as similarity_score,
92
+
(SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.query_artist_id) as synthetic_plays,
93
+
(SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.match_artist_id) as target_plays
94
+
FROM fuzzy_artist_matches fam
95
+
WHERE fam.name_similarity >= min_similarity
96
+
AND fam.match_action = 'upgrade_to_mb'
97
+
ORDER BY fam.name_similarity DESC, synthetic_plays DESC;
98
+
END;
99
+
$$ LANGUAGE plpgsql;
100
+
101
+
COMMENT ON FUNCTION suggest_artist_consolidations IS 'Returns suggestions for consolidating synthetic artists with MusicBrainz artists based on similarity';
+138
migrations/20241220000006_discriminant_fields.sql
+138
migrations/20241220000006_discriminant_fields.sql
···
···
1
+
-- Migration to add discriminant fields for track and release variants
2
+
-- This enables proper handling of different versions while maintaining grouping capabilities
3
+
4
+
-- Add discriminant fields to plays table
5
+
ALTER TABLE plays ADD COLUMN track_discriminant TEXT;
6
+
ALTER TABLE plays ADD COLUMN release_discriminant TEXT;
7
+
8
+
-- Add discriminant field to releases table
9
+
ALTER TABLE releases ADD COLUMN discriminant TEXT;
10
+
11
+
-- Add discriminant field to recordings table
12
+
ALTER TABLE recordings ADD COLUMN discriminant TEXT;
13
+
14
+
-- Create indexes for efficient searching and filtering
15
+
CREATE INDEX idx_plays_track_discriminant ON plays (track_discriminant);
16
+
CREATE INDEX idx_plays_release_discriminant ON plays (release_discriminant);
17
+
CREATE INDEX idx_releases_discriminant ON releases (discriminant);
18
+
CREATE INDEX idx_recordings_discriminant ON recordings (discriminant);
19
+
20
+
-- Create composite indexes for grouping by base name + discriminant
21
+
CREATE INDEX idx_plays_track_name_discriminant ON plays (track_name, track_discriminant);
22
+
CREATE INDEX idx_plays_release_name_discriminant ON plays (release_name, release_discriminant);
23
+
24
+
-- Update materialized views to include discriminant information
25
+
DROP MATERIALIZED VIEW IF EXISTS mv_release_play_counts;
26
+
CREATE MATERIALIZED VIEW mv_release_play_counts AS
27
+
SELECT
28
+
r.mbid AS release_mbid,
29
+
r.name AS release_name,
30
+
r.discriminant AS release_discriminant,
31
+
COUNT(p.uri) AS play_count
32
+
FROM
33
+
releases r
34
+
LEFT JOIN plays p ON p.release_mbid = r.mbid
35
+
GROUP BY
36
+
r.mbid, r.name, r.discriminant;
37
+
38
+
CREATE UNIQUE INDEX idx_mv_release_play_counts_discriminant ON mv_release_play_counts (release_mbid);
39
+
40
+
DROP MATERIALIZED VIEW IF EXISTS mv_recording_play_counts;
41
+
CREATE MATERIALIZED VIEW mv_recording_play_counts AS
42
+
SELECT
43
+
rec.mbid AS recording_mbid,
44
+
rec.name AS recording_name,
45
+
rec.discriminant AS recording_discriminant,
46
+
COUNT(p.uri) AS play_count
47
+
FROM
48
+
recordings rec
49
+
LEFT JOIN plays p ON p.recording_mbid = rec.mbid
50
+
GROUP BY
51
+
rec.mbid, rec.name, rec.discriminant;
52
+
53
+
CREATE UNIQUE INDEX idx_mv_recording_play_counts_discriminant ON mv_recording_play_counts (recording_mbid);
54
+
55
+
-- Create views for analyzing track/release variants
56
+
CREATE VIEW track_variants AS
57
+
SELECT
58
+
track_name,
59
+
track_discriminant,
60
+
COUNT(*) AS play_count,
61
+
COUNT(DISTINCT did) AS unique_listeners,
62
+
COUNT(DISTINCT recording_mbid) AS unique_recordings
63
+
FROM plays
64
+
WHERE track_name IS NOT NULL
65
+
GROUP BY track_name, track_discriminant
66
+
ORDER BY track_name, play_count DESC;
67
+
68
+
CREATE VIEW release_variants AS
69
+
SELECT
70
+
release_name,
71
+
release_discriminant,
72
+
COUNT(*) AS play_count,
73
+
COUNT(DISTINCT did) AS unique_listeners,
74
+
COUNT(DISTINCT release_mbid) AS unique_releases
75
+
FROM plays
76
+
WHERE release_name IS NOT NULL
77
+
GROUP BY release_name, release_discriminant
78
+
ORDER BY release_name, play_count DESC;
79
+
80
+
-- Create function to extract potential discriminants from existing names
81
+
CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
82
+
DECLARE
83
+
discriminant_patterns TEXT[] := ARRAY[
84
+
'\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\)',
85
+
'\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\]',
86
+
'\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\}'
87
+
];
88
+
pattern TEXT;
89
+
match_result TEXT;
90
+
BEGIN
91
+
-- Try each pattern to find discriminant information
92
+
FOREACH pattern IN ARRAY discriminant_patterns
93
+
LOOP
94
+
SELECT substring(name_text FROM pattern) INTO match_result;
95
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
96
+
RETURN trim(match_result);
97
+
END IF;
98
+
END LOOP;
99
+
100
+
RETURN NULL;
101
+
END;
102
+
$$ LANGUAGE plpgsql IMMUTABLE;
103
+
104
+
-- Create function to get base name without discriminant
105
+
CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
106
+
DECLARE
107
+
cleanup_patterns TEXT[] := ARRAY[
108
+
'\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\)\s*',
109
+
'\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\]\s*',
110
+
'\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\}\s*'
111
+
];
112
+
pattern TEXT;
113
+
result_text TEXT := name_text;
114
+
BEGIN
115
+
-- Remove discriminant patterns to get base name
116
+
FOREACH pattern IN ARRAY cleanup_patterns
117
+
LOOP
118
+
result_text := regexp_replace(result_text, pattern, ' ', 'gi');
119
+
END LOOP;
120
+
121
+
-- Clean up extra whitespace
122
+
result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
123
+
124
+
RETURN result_text;
125
+
END;
126
+
$$ LANGUAGE plpgsql IMMUTABLE;
127
+
128
+
-- Add comments explaining the discriminant system
129
+
COMMENT ON COLUMN plays.track_discriminant IS 'Distinguishing information for track variants (e.g., "Acoustic Version", "Live at Wembley", "Radio Edit")';
130
+
COMMENT ON COLUMN plays.release_discriminant IS 'Distinguishing information for release variants (e.g., "Deluxe Edition", "Remastered", "2023 Remaster")';
131
+
COMMENT ON COLUMN releases.discriminant IS 'Distinguishing information for release variants to enable proper grouping';
132
+
COMMENT ON COLUMN recordings.discriminant IS 'Distinguishing information for recording variants to enable proper grouping';
133
+
134
+
COMMENT ON VIEW track_variants IS 'Shows all variants of tracks with their play counts and unique listeners';
135
+
COMMENT ON VIEW release_variants IS 'Shows all variants of releases with their play counts and unique listeners';
136
+
137
+
COMMENT ON FUNCTION extract_discriminant IS 'Extracts discriminant information from track/release names for migration purposes';
138
+
COMMENT ON FUNCTION get_base_name IS 'Returns the base name without discriminant information for grouping purposes';
+276
migrations/20241220000007_enhanced_discriminant_extraction.sql
+276
migrations/20241220000007_enhanced_discriminant_extraction.sql
···
···
1
+
-- Enhanced discriminant extraction with comprehensive edition/version patterns
2
+
-- This migration improves the auto-population of discriminants for better metadata handling
3
+
4
+
-- Drop existing functions to replace them with enhanced versions
5
+
DROP FUNCTION IF EXISTS extract_discriminant(TEXT);
6
+
DROP FUNCTION IF EXISTS get_base_name(TEXT);
7
+
8
+
-- Enhanced function to extract discriminants with comprehensive patterns
9
+
CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
10
+
DECLARE
11
+
-- Comprehensive patterns for discriminant extraction
12
+
discriminant_patterns TEXT[] := ARRAY[
13
+
-- Parentheses patterns
14
+
'\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)',
15
+
'\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)',
16
+
'\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)',
17
+
'\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)',
18
+
'\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)',
19
+
20
+
-- Brackets patterns
21
+
'\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]',
22
+
'\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]',
23
+
'\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]',
24
+
'\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]',
25
+
'\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]',
26
+
27
+
-- Braces patterns
28
+
'\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}',
29
+
'\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}',
30
+
'\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}',
31
+
'\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}',
32
+
'\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}',
33
+
34
+
-- Dash/hyphen patterns (common for editions)
35
+
'[-โโ]\s*([^-โโ]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$',
36
+
'[-โโ]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
37
+
38
+
-- Colon patterns (common for subtitles and versions)
39
+
':\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$',
40
+
':\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
41
+
];
42
+
43
+
pattern TEXT;
44
+
match_result TEXT;
45
+
BEGIN
46
+
-- Return early if input is null or empty
47
+
IF name_text IS NULL OR trim(name_text) = '' THEN
48
+
RETURN NULL;
49
+
END IF;
50
+
51
+
-- Try each pattern to find discriminant information
52
+
FOREACH pattern IN ARRAY discriminant_patterns
53
+
LOOP
54
+
SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result;
55
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
56
+
-- Clean up the match result
57
+
match_result := trim(match_result);
58
+
-- Remove leading/trailing punctuation
59
+
match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
60
+
-- Ensure it's not just whitespace or empty after cleanup
61
+
IF length(trim(match_result)) > 0 THEN
62
+
RETURN match_result;
63
+
END IF;
64
+
END IF;
65
+
END LOOP;
66
+
67
+
RETURN NULL;
68
+
END;
69
+
$$ LANGUAGE plpgsql IMMUTABLE;
70
+
71
+
-- Enhanced function to get base name without discriminant
72
+
CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
73
+
DECLARE
74
+
-- Comprehensive cleanup patterns matching the extraction patterns
75
+
cleanup_patterns TEXT[] := ARRAY[
76
+
-- Remove parentheses content
77
+
'\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*',
78
+
'\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*',
79
+
'\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*',
80
+
'\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*',
81
+
'\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*',
82
+
83
+
-- Remove brackets content
84
+
'\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*',
85
+
'\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*',
86
+
'\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*',
87
+
'\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*',
88
+
'\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*',
89
+
90
+
-- Remove braces content
91
+
'\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*',
92
+
'\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*',
93
+
'\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*',
94
+
'\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*',
95
+
'\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*',
96
+
97
+
-- Remove dash/hyphen patterns
98
+
'\s*[-โโ]\s*[^-โโ]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$',
99
+
'\s*[-โโ]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
100
+
101
+
-- Remove colon patterns
102
+
'\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$',
103
+
'\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
104
+
];
105
+
106
+
pattern TEXT;
107
+
result_text TEXT := name_text;
108
+
BEGIN
109
+
-- Return early if input is null or empty
110
+
IF name_text IS NULL OR trim(name_text) = '' THEN
111
+
RETURN name_text;
112
+
END IF;
113
+
114
+
-- Remove discriminant patterns to get base name
115
+
FOREACH pattern IN ARRAY cleanup_patterns
116
+
LOOP
117
+
result_text := regexp_replace(result_text, pattern, ' ', 'gi');
118
+
END LOOP;
119
+
120
+
-- Clean up extra whitespace and normalize
121
+
result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
122
+
123
+
-- Remove trailing punctuation that might be left after removal
124
+
result_text := regexp_replace(result_text, '[,;:\-โโ]\s*$', '', 'g');
125
+
result_text := trim(result_text);
126
+
127
+
-- Ensure we don't return an empty string
128
+
IF length(result_text) = 0 THEN
129
+
RETURN name_text;
130
+
END IF;
131
+
132
+
RETURN result_text;
133
+
END;
134
+
$$ LANGUAGE plpgsql IMMUTABLE;
135
+
136
+
-- Create function to extract discriminant specifically for editions and versions
137
+
CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$
138
+
DECLARE
139
+
-- Focused patterns for edition/version extraction
140
+
edition_patterns TEXT[] := ARRAY[
141
+
-- Edition patterns
142
+
'\(([^)]*edition[^)]*)\)',
143
+
'\[([^]]*edition[^]]*)\]',
144
+
'\{([^}]*edition[^}]*)\}',
145
+
'[-โโ]\s*([^-โโ]*edition[^-โโ]*)$',
146
+
':\s*([^:]*edition[^:]*)$',
147
+
148
+
-- Version patterns
149
+
'\(([^)]*version[^)]*)\)',
150
+
'\[([^]]*version[^]]*)\]',
151
+
'\{([^}]*version[^}]*)\}',
152
+
'[-โโ]\s*([^-โโ]*version[^-โโ]*)$',
153
+
':\s*([^:]*version[^:]*)$',
154
+
155
+
-- Remaster patterns
156
+
'\(([^)]*remaster[^)]*)\)',
157
+
'\[([^]]*remaster[^]]*)\]',
158
+
'\{([^}]*remaster[^}]*)\}',
159
+
'[-โโ]\s*([^-โโ]*remaster[^-โโ]*)$',
160
+
':\s*([^:]*remaster[^:]*)$',
161
+
162
+
-- Year-based patterns
163
+
'\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)',
164
+
'\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]',
165
+
'\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}'
166
+
];
167
+
168
+
pattern TEXT;
169
+
match_result TEXT;
170
+
BEGIN
171
+
-- Return early if input is null or empty
172
+
IF name_text IS NULL OR trim(name_text) = '' THEN
173
+
RETURN NULL;
174
+
END IF;
175
+
176
+
-- Try edition-specific patterns first
177
+
FOREACH pattern IN ARRAY edition_patterns
178
+
LOOP
179
+
SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result;
180
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
181
+
match_result := trim(match_result);
182
+
match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
183
+
IF length(trim(match_result)) > 0 THEN
184
+
RETURN match_result;
185
+
END IF;
186
+
END IF;
187
+
END LOOP;
188
+
189
+
RETURN NULL;
190
+
END;
191
+
$$ LANGUAGE plpgsql IMMUTABLE;
192
+
193
+
-- Update recordings table to populate discriminants from existing names
194
+
UPDATE recordings
195
+
SET discriminant = extract_discriminant(name)
196
+
WHERE discriminant IS NULL
197
+
AND extract_discriminant(name) IS NOT NULL;
198
+
199
+
-- Update releases table to populate discriminants from existing names
200
+
UPDATE releases
201
+
SET discriminant = extract_discriminant(name)
202
+
WHERE discriminant IS NULL
203
+
AND extract_discriminant(name) IS NOT NULL;
204
+
205
+
-- Update plays table to populate discriminants from existing names where not already set
206
+
UPDATE plays
207
+
SET track_discriminant = extract_discriminant(track_name)
208
+
WHERE track_discriminant IS NULL
209
+
AND extract_discriminant(track_name) IS NOT NULL;
210
+
211
+
UPDATE plays
212
+
SET release_discriminant = extract_discriminant(release_name)
213
+
WHERE release_discriminant IS NULL
214
+
AND release_name IS NOT NULL
215
+
AND extract_discriminant(release_name) IS NOT NULL;
216
+
217
+
-- Create indexes for efficient discriminant queries
218
+
CREATE INDEX IF NOT EXISTS idx_recordings_name_discriminant ON recordings (name, discriminant);
219
+
CREATE INDEX IF NOT EXISTS idx_releases_name_discriminant ON releases (name, discriminant);
220
+
221
+
-- Add comments for the new function
222
+
COMMENT ON FUNCTION extract_discriminant IS 'Enhanced discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons';
223
+
COMMENT ON FUNCTION get_base_name IS 'Enhanced base name extraction removing comprehensive discriminant patterns to enable proper grouping';
224
+
COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized function for extracting edition and version discriminants with focused patterns';
225
+
226
+
-- Create a view to show discriminant extraction results for analysis
227
+
CREATE OR REPLACE VIEW discriminant_analysis AS
228
+
SELECT
229
+
'recordings' as table_name,
230
+
name as original_name,
231
+
discriminant,
232
+
get_base_name(name) as base_name,
233
+
extract_discriminant(name) as extracted_discriminant,
234
+
extract_edition_discriminant(name) as edition_discriminant
235
+
FROM recordings
236
+
WHERE name IS NOT NULL
237
+
UNION ALL
238
+
SELECT
239
+
'releases' as table_name,
240
+
name as original_name,
241
+
discriminant,
242
+
get_base_name(name) as base_name,
243
+
extract_discriminant(name) as extracted_discriminant,
244
+
extract_edition_discriminant(name) as edition_discriminant
245
+
FROM releases
246
+
WHERE name IS NOT NULL;
247
+
248
+
COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing discriminant extraction results for quality assessment and debugging';
249
+
250
+
-- Refresh materialized views to include discriminant information
251
+
REFRESH MATERIALIZED VIEW mv_release_play_counts;
252
+
REFRESH MATERIALIZED VIEW mv_recording_play_counts;
253
+
254
+
-- Create summary statistics for discriminant usage
255
+
CREATE OR REPLACE VIEW discriminant_stats AS
256
+
SELECT
257
+
'recordings' as entity_type,
258
+
COUNT(*) as total_count,
259
+
COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant,
260
+
COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant,
261
+
ROUND(
262
+
COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2
263
+
) as discriminant_percentage
264
+
FROM recordings
265
+
UNION ALL
266
+
SELECT
267
+
'releases' as entity_type,
268
+
COUNT(*) as total_count,
269
+
COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant,
270
+
COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant,
271
+
ROUND(
272
+
COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2
273
+
) as discriminant_percentage
274
+
FROM releases;
275
+
276
+
COMMENT ON VIEW discriminant_stats IS 'Statistics showing discriminant usage and extraction potential across entity types';
+252
migrations/20241220000008_fix_discriminant_case_sensitivity.sql
+252
migrations/20241220000008_fix_discriminant_case_sensitivity.sql
···
···
1
+
-- Fix case sensitivity in discriminant extraction patterns
2
+
-- This migration updates the discriminant extraction functions to properly handle case-insensitive matching
3
+
4
+
-- Drop dependent views first, then functions, then recreate everything
5
+
DROP VIEW IF EXISTS discriminant_analysis CASCADE;
6
+
DROP VIEW IF EXISTS discriminant_stats CASCADE;
7
+
8
+
-- Drop existing functions to replace with case-insensitive versions
9
+
DROP FUNCTION IF EXISTS extract_discriminant(TEXT) CASCADE;
10
+
DROP FUNCTION IF EXISTS get_base_name(TEXT) CASCADE;
11
+
DROP FUNCTION IF EXISTS extract_edition_discriminant(TEXT) CASCADE;
12
+
13
+
-- Enhanced function to extract discriminants with case-insensitive matching
14
+
CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
15
+
DECLARE
16
+
-- Comprehensive patterns for discriminant extraction with case-insensitive flags
17
+
discriminant_patterns TEXT[] := ARRAY[
18
+
-- Parentheses patterns
19
+
'(?i)\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)',
20
+
'(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)',
21
+
'(?i)\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)',
22
+
'(?i)\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)',
23
+
'(?i)\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)',
24
+
25
+
-- Brackets patterns
26
+
'(?i)\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]',
27
+
'(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]',
28
+
'(?i)\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]',
29
+
'(?i)\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]',
30
+
'(?i)\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]',
31
+
32
+
-- Braces patterns
33
+
'(?i)\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}',
34
+
'(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}',
35
+
'(?i)\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}',
36
+
'(?i)\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}',
37
+
'(?i)\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}',
38
+
39
+
-- Dash/hyphen patterns (common for editions)
40
+
'(?i)[-โโ]\s*([^-โโ]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$',
41
+
'(?i)[-โโ]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
42
+
43
+
-- Colon patterns (common for subtitles and versions)
44
+
'(?i):\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$',
45
+
'(?i):\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
46
+
];
47
+
48
+
pattern TEXT;
49
+
match_result TEXT;
50
+
BEGIN
51
+
-- Return early if input is null or empty
52
+
IF name_text IS NULL OR trim(name_text) = '' THEN
53
+
RETURN NULL;
54
+
END IF;
55
+
56
+
-- Try each pattern to find discriminant information
57
+
FOREACH pattern IN ARRAY discriminant_patterns
58
+
LOOP
59
+
SELECT substring(name_text FROM pattern) INTO match_result;
60
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
61
+
-- Clean up the match result
62
+
match_result := trim(match_result);
63
+
-- Remove leading/trailing punctuation
64
+
match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
65
+
-- Ensure it's not just whitespace or empty after cleanup
66
+
IF length(trim(match_result)) > 0 THEN
67
+
RETURN match_result;
68
+
END IF;
69
+
END IF;
70
+
END LOOP;
71
+
72
+
RETURN NULL;
73
+
END;
74
+
$$ LANGUAGE plpgsql IMMUTABLE;
75
+
76
+
-- Enhanced function to get base name without discriminant with case-insensitive matching
77
+
CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
78
+
DECLARE
79
+
-- Comprehensive cleanup patterns matching the extraction patterns
80
+
cleanup_patterns TEXT[] := ARRAY[
81
+
-- Remove parentheses content
82
+
'(?i)\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*',
83
+
'(?i)\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*',
84
+
'(?i)\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*',
85
+
'(?i)\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*',
86
+
'(?i)\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*',
87
+
88
+
-- Remove brackets content
89
+
'(?i)\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*',
90
+
'(?i)\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*',
91
+
'(?i)\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*',
92
+
'(?i)\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*',
93
+
'(?i)\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*',
94
+
95
+
-- Remove braces content
96
+
'(?i)\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*',
97
+
'(?i)\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*',
98
+
'(?i)\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*',
99
+
'(?i)\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*',
100
+
'(?i)\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*',
101
+
102
+
-- Remove dash/hyphen patterns
103
+
'(?i)\s*[-โโ]\s*[^-โโ]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$',
104
+
'(?i)\s*[-โโ]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
105
+
106
+
-- Remove colon patterns
107
+
'(?i)\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$',
108
+
'(?i)\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
109
+
];
110
+
111
+
pattern TEXT;
112
+
result_text TEXT := name_text;
113
+
BEGIN
114
+
-- Return early if input is null or empty
115
+
IF name_text IS NULL OR trim(name_text) = '' THEN
116
+
RETURN name_text;
117
+
END IF;
118
+
119
+
-- Remove discriminant patterns to get base name
120
+
FOREACH pattern IN ARRAY cleanup_patterns
121
+
LOOP
122
+
result_text := regexp_replace(result_text, pattern, ' ', 'g');
123
+
END LOOP;
124
+
125
+
-- Clean up extra whitespace and normalize
126
+
result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
127
+
128
+
-- Remove trailing punctuation that might be left after removal
129
+
result_text := regexp_replace(result_text, '[,;:\-โโ]\s*$', '', 'g');
130
+
result_text := trim(result_text);
131
+
132
+
-- Ensure we don't return an empty string
133
+
IF length(result_text) = 0 THEN
134
+
RETURN name_text;
135
+
END IF;
136
+
137
+
RETURN result_text;
138
+
END;
139
+
$$ LANGUAGE plpgsql IMMUTABLE;
140
+
141
+
-- Enhanced function to extract discriminant specifically for editions and versions with case-insensitive matching
142
+
CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$
143
+
DECLARE
144
+
-- Focused patterns for edition/version extraction with case-insensitive flags
145
+
edition_patterns TEXT[] := ARRAY[
146
+
-- Edition patterns
147
+
'(?i)\(([^)]*edition[^)]*)\)',
148
+
'(?i)\[([^]]*edition[^]]*)\]',
149
+
'(?i)\{([^}]*edition[^}]*)\}',
150
+
'(?i)[-โโ]\s*([^-โโ]*edition[^-โโ]*)$',
151
+
'(?i):\s*([^:]*edition[^:]*)$',
152
+
153
+
-- Version patterns
154
+
'(?i)\(([^)]*version[^)]*)\)',
155
+
'(?i)\[([^]]*version[^]]*)\]',
156
+
'(?i)\{([^}]*version[^}]*)\}',
157
+
'(?i)[-โโ]\s*([^-โโ]*version[^-โโ]*)$',
158
+
'(?i):\s*([^:]*version[^:]*)$',
159
+
160
+
-- Remaster patterns
161
+
'(?i)\(([^)]*remaster[^)]*)\)',
162
+
'(?i)\[([^]]*remaster[^]]*)\]',
163
+
'(?i)\{([^}]*remaster[^}]*)\}',
164
+
'(?i)[-โโ]\s*([^-โโ]*remaster[^-โโ]*)$',
165
+
'(?i):\s*([^:]*remaster[^:]*)$',
166
+
167
+
-- Year-based patterns
168
+
'(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)',
169
+
'(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]',
170
+
'(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}'
171
+
];
172
+
173
+
pattern TEXT;
174
+
match_result TEXT;
175
+
BEGIN
176
+
-- Return early if input is null or empty
177
+
IF name_text IS NULL OR trim(name_text) = '' THEN
178
+
RETURN NULL;
179
+
END IF;
180
+
181
+
-- Try edition-specific patterns first
182
+
FOREACH pattern IN ARRAY edition_patterns
183
+
LOOP
184
+
SELECT substring(name_text FROM pattern) INTO match_result;
185
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
186
+
match_result := trim(match_result);
187
+
match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
188
+
IF length(trim(match_result)) > 0 THEN
189
+
RETURN match_result;
190
+
END IF;
191
+
END IF;
192
+
END LOOP;
193
+
194
+
RETURN NULL;
195
+
END;
196
+
$$ LANGUAGE plpgsql IMMUTABLE;
197
+
198
+
-- Update existing records with newly extracted discriminants (case-insensitive)
199
+
UPDATE recordings
200
+
SET discriminant = extract_discriminant(name)
201
+
WHERE discriminant IS NULL
202
+
AND extract_discriminant(name) IS NOT NULL;
203
+
204
+
UPDATE releases
205
+
SET discriminant = extract_discriminant(name)
206
+
WHERE discriminant IS NULL
207
+
AND extract_discriminant(name) IS NOT NULL;
208
+
209
+
UPDATE plays
210
+
SET track_discriminant = extract_discriminant(track_name)
211
+
WHERE track_discriminant IS NULL
212
+
AND extract_discriminant(track_name) IS NOT NULL;
213
+
214
+
UPDATE plays
215
+
SET release_discriminant = extract_discriminant(release_name)
216
+
WHERE release_discriminant IS NULL
217
+
AND release_name IS NOT NULL
218
+
AND extract_discriminant(release_name) IS NOT NULL;
219
+
220
+
-- Update comments for the enhanced functions
221
+
COMMENT ON FUNCTION extract_discriminant IS 'Enhanced case-insensitive discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons';
222
+
COMMENT ON FUNCTION get_base_name IS 'Enhanced case-insensitive base name extraction removing comprehensive discriminant patterns to enable proper grouping';
223
+
COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized case-insensitive function for extracting edition and version discriminants with focused patterns';
224
+
225
+
-- Refresh materialized views to reflect the case-insensitive improvements
226
+
REFRESH MATERIALIZED VIEW mv_release_play_counts;
227
+
REFRESH MATERIALIZED VIEW mv_recording_play_counts;
228
+
229
+
-- Update discriminant analysis view to include case-insensitive results
230
+
DROP VIEW IF EXISTS discriminant_analysis;
231
+
CREATE OR REPLACE VIEW discriminant_analysis AS
232
+
SELECT
233
+
'recordings' as table_name,
234
+
name as original_name,
235
+
discriminant,
236
+
get_base_name(name) as base_name,
237
+
extract_discriminant(name) as extracted_discriminant,
238
+
extract_edition_discriminant(name) as edition_discriminant
239
+
FROM recordings
240
+
WHERE name IS NOT NULL
241
+
UNION ALL
242
+
SELECT
243
+
'releases' as table_name,
244
+
name as original_name,
245
+
discriminant,
246
+
get_base_name(name) as base_name,
247
+
extract_discriminant(name) as extracted_discriminant,
248
+
extract_edition_discriminant(name) as edition_discriminant
249
+
FROM releases
250
+
WHERE name IS NOT NULL;
251
+
252
+
COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing case-insensitive discriminant extraction results for quality assessment and debugging';
+12
-3
package.json
+12
-3
package.json
···
7
"dev": "turbo dev",
8
"build": "pnpm turbo run build --filter='./packages/*' --filter='./apps/*'",
9
"build:rust": "turbo run build:rust",
10
-
"typecheck": "pnpm -r exec tsc --noEmit",
11
"test": "turbo run test test:rust",
12
-
"rust:fmt": "cd services && cargo fmt",
13
-
"rust:clippy": "cd services && cargo clippy",
14
"fix": "biome lint --apply . && biome format --write . && biome check . --apply",
15
"nuke": "rimraf node_modules */*/node_modules",
16
"lex:gen-server": "turbo lex:gen-server",
17
"format": "prettier --write .",
···
19
"lex:watch": "cd tools/lexicon-cli && node dist/index.js watch",
20
"lex:validate": "cd tools/lexicon-cli && node dist/index.js validate",
21
"lex:diff": "cd tools/lexicon-cli && node dist/index.js diff",
22
"db:migrate": "cd services && sqlx migrate run",
23
"db:migrate:revert": "cd services && sqlx migrate revert",
24
"db:create": "cd services && sqlx database create",
···
7
"dev": "turbo dev",
8
"build": "pnpm turbo run build --filter='./packages/*' --filter='./apps/*'",
9
"build:rust": "turbo run build:rust",
10
+
"typecheck": "pnpm -r --filter='!./vendor/*' exec tsc --noEmit",
11
"test": "turbo run test test:rust",
12
+
"rust:fmt": "pnpm rust:fmt:services && pnpm rust:fmt:apps",
13
+
"rust:clippy": "pnpm rust:clippy:services && pnpm rust:clippy:apps",
14
+
"rust:fmt:services": "cd services && cargo fmt",
15
+
"rust:clippy:services": "cd services && cargo clippy -- -D warnings",
16
+
"rust:fmt:apps": "for dir in apps/*/; do if [ -f \"$dir/Cargo.toml\" ]; then echo \"Formatting $dir\" && cd \"$dir\" && cargo fmt && cd ../..; fi; done",
17
+
"rust:clippy:apps": "for dir in apps/*/; do if [ -f \"$dir/Cargo.toml\" ]; then echo \"Linting $dir\" && cd \"$dir\" && cargo clippy -- -D warnings && cd ../..; fi; done",
18
"fix": "biome lint --apply . && biome format --write . && biome check . --apply",
19
+
"hooks:install": "./scripts/install-git-hooks.sh",
20
+
"hooks:install-precommit": "pre-commit install",
21
+
"postinstall": "pnpm lex:gen-server",
22
"nuke": "rimraf node_modules */*/node_modules",
23
"lex:gen-server": "turbo lex:gen-server",
24
"format": "prettier --write .",
···
26
"lex:watch": "cd tools/lexicon-cli && node dist/index.js watch",
27
"lex:validate": "cd tools/lexicon-cli && node dist/index.js validate",
28
"lex:diff": "cd tools/lexicon-cli && node dist/index.js diff",
29
+
"lex:build-amethyst": "pnpm lex:gen-server && pnpm turbo build --filter=@teal/amethyst",
30
+
"lex:dev": "pnpm lex:gen-server && pnpm turbo dev --filter=@teal/amethyst",
31
"db:migrate": "cd services && sqlx migrate run",
32
"db:migrate:revert": "cd services && sqlx migrate revert",
33
"db:create": "cd services && sqlx database create",
+25
packages/lexicons/lex-gen.sh
+25
packages/lexicons/lex-gen.sh
···
···
1
+
#!/bin/bash
2
+
set -e
3
+
4
+
# Navigate to the lexicons directory and find all .json files
5
+
cd ../../lexicons
6
+
json_files=$(find . -name "*.json" -type f)
7
+
8
+
# Go back to the lexicons package directory
9
+
cd ../packages/lexicons
10
+
11
+
# Check if we found any lexicon files
12
+
if [ -z "$json_files" ]; then
13
+
echo "No lexicon files found in ../../lexicons/"
14
+
exit 1
15
+
fi
16
+
17
+
# Convert the file list to absolute paths
18
+
lexicon_paths=""
19
+
for file in $json_files; do
20
+
lexicon_paths="$lexicon_paths ../../lexicons/$file"
21
+
done
22
+
23
+
# Generate lexicons
24
+
echo "Generating lexicons from: $lexicon_paths"
25
+
lex gen-server ./src $lexicon_paths --yes
+14
packages/lexicons/package.json
+14
packages/lexicons/package.json
···
···
1
+
{
2
+
"name": "@teal/lexicons",
3
+
"type": "module",
4
+
"main": "./index.ts",
5
+
"dependencies": {
6
+
"@atproto/lex-cli": "^0.5.4",
7
+
"@atproto/lexicon": "^0.4.2",
8
+
"@atproto/xrpc-server": "^0.7.4",
9
+
"@teal/tsconfig": "workspace:*"
10
+
},
11
+
"scripts": {
12
+
"lex:gen-server": "bash ./lex-gen.sh"
13
+
}
14
+
}
-4
pnpm-lock.yaml
-4
pnpm-lock.yaml
+100
scripts/install-git-hooks.sh
+100
scripts/install-git-hooks.sh
···
···
1
+
#!/bin/bash
2
+
3
+
# Install git hooks for the Teal project
4
+
# This script sets up pre-commit hooks for code formatting and linting
5
+
6
+
set -e
7
+
8
+
# Colors for output
9
+
RED='\033[0;31m'
10
+
GREEN='\033[0;32m'
11
+
YELLOW='\033[1;33m'
12
+
BLUE='\033[0;34m'
13
+
NC='\033[0m' # No Color
14
+
15
+
print_status() {
16
+
echo -e "${BLUE}[INFO]${NC} $1"
17
+
}
18
+
19
+
print_success() {
20
+
echo -e "${GREEN}[SUCCESS]${NC} $1"
21
+
}
22
+
23
+
print_error() {
24
+
echo -e "${RED}[ERROR]${NC} $1"
25
+
}
26
+
27
+
print_warning() {
28
+
echo -e "${YELLOW}[WARNING]${NC} $1"
29
+
}
30
+
31
+
# Check if we're in a git repository
32
+
if [ ! -d ".git" ]; then
33
+
print_error "This script must be run from the root of a git repository"
34
+
exit 1
35
+
fi
36
+
37
+
print_status "Installing git hooks for Teal project..."
38
+
39
+
# Create hooks directory if it doesn't exist
40
+
mkdir -p .git/hooks
41
+
42
+
# Install pre-commit hook
43
+
if [ -f "scripts/pre-commit-hook.sh" ]; then
44
+
print_status "Installing pre-commit hook..."
45
+
cp scripts/pre-commit-hook.sh .git/hooks/pre-commit
46
+
chmod +x .git/hooks/pre-commit
47
+
print_success "Pre-commit hook installed"
48
+
else
49
+
print_error "Pre-commit hook script not found at scripts/pre-commit-hook.sh"
50
+
exit 1
51
+
fi
52
+
53
+
# Optional: Install other hooks
54
+
# You can add more hooks here if needed
55
+
56
+
print_status "Testing hook installation..."
57
+
58
+
# Test if the hook is executable
59
+
if [ -x ".git/hooks/pre-commit" ]; then
60
+
print_success "Pre-commit hook is executable"
61
+
else
62
+
print_error "Pre-commit hook is not executable"
63
+
exit 1
64
+
fi
65
+
66
+
# Check if required tools are available
67
+
print_status "Checking required tools..."
68
+
69
+
MISSING_TOOLS=""
70
+
71
+
if ! command -v pnpm >/dev/null 2>&1; then
72
+
MISSING_TOOLS="$MISSING_TOOLS pnpm"
73
+
fi
74
+
75
+
if ! command -v node >/dev/null 2>&1; then
76
+
MISSING_TOOLS="$MISSING_TOOLS node"
77
+
fi
78
+
79
+
if ! command -v cargo >/dev/null 2>&1; then
80
+
MISSING_TOOLS="$MISSING_TOOLS cargo"
81
+
fi
82
+
83
+
if [ -n "$MISSING_TOOLS" ]; then
84
+
print_warning "Some tools are missing:$MISSING_TOOLS"
85
+
print_warning "The git hooks may not work properly without these tools"
86
+
else
87
+
print_success "All required tools are available"
88
+
fi
89
+
90
+
print_success "Git hooks installation complete! ๐"
91
+
print_status "The following hooks have been installed:"
92
+
echo " - pre-commit: Runs formatting and linting checks before commits"
93
+
94
+
print_status "To test the pre-commit hook, try making a commit with staged files"
95
+
print_status "To temporarily skip hooks, use: git commit --no-verify"
96
+
97
+
# Optional: Show hook status
98
+
echo ""
99
+
print_status "Installed hooks:"
100
+
ls -la .git/hooks/ | grep -v sample | grep -v "^d" | sed 's/^/ /'
+213
scripts/pre-commit-hook.sh
+213
scripts/pre-commit-hook.sh
···
···
1
+
#!/bin/bash
2
+
3
+
# Pre-commit hook for Teal project
4
+
# This script runs code formatting and linting checks before allowing commits
5
+
6
+
set -e
7
+
8
+
echo "๐ Running pre-commit checks..."
9
+
10
+
# Colors for output
11
+
RED='\033[0;31m'
12
+
GREEN='\033[0;32m'
13
+
YELLOW='\033[1;33m'
14
+
BLUE='\033[0;34m'
15
+
NC='\033[0m' # No Color
16
+
17
+
# Function to print colored output
18
+
print_status() {
19
+
echo -e "${BLUE}[INFO]${NC} $1"
20
+
}
21
+
22
+
print_success() {
23
+
echo -e "${GREEN}[SUCCESS]${NC} $1"
24
+
}
25
+
26
+
print_warning() {
27
+
echo -e "${YELLOW}[WARNING]${NC} $1"
28
+
}
29
+
30
+
print_error() {
31
+
echo -e "${RED}[ERROR]${NC} $1"
32
+
}
33
+
34
+
# Get list of staged files
35
+
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)
36
+
37
+
if [ -z "$STAGED_FILES" ]; then
38
+
print_warning "No staged files found"
39
+
exit 0
40
+
fi
41
+
42
+
# Check if we have TypeScript/JavaScript files
43
+
TS_JS_FILES=$(echo "$STAGED_FILES" | grep -E '\.(ts|tsx|js|jsx)$' || true)
44
+
# Check if we have Rust files
45
+
RUST_FILES=$(echo "$STAGED_FILES" | grep -E '\.rs$' || true)
46
+
# Check if we have lexicon files
47
+
LEXICON_FILES=$(echo "$STAGED_FILES" | grep -E 'lexicons/.*\.json$' || true)
48
+
49
+
print_status "Staged files to check:"
50
+
echo "$STAGED_FILES" | sed 's/^/ - /'
51
+
52
+
# 1. TypeScript/JavaScript checks
53
+
if [ -n "$TS_JS_FILES" ]; then
54
+
print_status "Running TypeScript/JavaScript checks..."
55
+
56
+
# Check if biome is available and run it
57
+
if command -v pnpm >/dev/null 2>&1; then
58
+
print_status "Running Biome formatting and linting..."
59
+
if ! pnpm biome check . --apply --no-errors-on-unmatched 2>/dev/null; then
60
+
print_error "Biome check failed. Please fix the issues and try again."
61
+
exit 1
62
+
fi
63
+
64
+
print_status "Running Prettier formatting..."
65
+
if ! pnpm prettier --write $TS_JS_FILES 2>/dev/null; then
66
+
print_error "Prettier formatting failed. Please fix the issues and try again."
67
+
exit 1
68
+
fi
69
+
70
+
# TypeScript checking temporarily disabled due to vendor compilation issues
71
+
# Re-enable once vendor code is fixed
72
+
else
73
+
print_warning "pnpm not found, skipping JS/TS checks"
74
+
fi
75
+
fi
76
+
77
+
# 2. Rust checks
78
+
if [ -n "$RUST_FILES" ]; then
79
+
print_status "Running Rust checks..."
80
+
81
+
if command -v cargo >/dev/null 2>&1; then
82
+
RUST_ERRORS=0
83
+
84
+
# Check services workspace
85
+
if [ -f "services/Cargo.toml" ]; then
86
+
print_status "Running cargo fmt on services workspace..."
87
+
if ! (cd services && cargo fmt --check) 2>/dev/null; then
88
+
print_status "Auto-formatting Rust code in services..."
89
+
(cd services && cargo fmt) 2>/dev/null || true
90
+
fi
91
+
92
+
print_status "Running cargo clippy on services workspace..."
93
+
if (cd services && cargo check); then
94
+
if ! (cd services && cargo clippy -- -D warnings); then
95
+
print_warning "Cargo clippy found issues in services workspace. Please fix the warnings."
96
+
print_warning "Run 'pnpm rust:clippy:services' to see detailed errors."
97
+
# Don't fail the commit for clippy warnings, just warn
98
+
fi
99
+
else
100
+
print_warning "Services workspace has compilation errors. Skipping clippy."
101
+
print_warning "Run 'pnpm rust:clippy:services' to see detailed errors."
102
+
fi
103
+
fi
104
+
105
+
# Check individual Rust projects outside services
106
+
CHECKED_DIRS=""
107
+
for rust_file in $RUST_FILES; do
108
+
rust_dir=$(dirname "$rust_file")
109
+
# Find the nearest Cargo.toml going up the directory tree
110
+
check_dir="$rust_dir"
111
+
while [ "$check_dir" != "." ] && [ "$check_dir" != "/" ]; do
112
+
if [ -f "$check_dir/Cargo.toml" ] && [ "$check_dir" != "services" ]; then
113
+
# Skip if we already checked this directory
114
+
if echo "$CHECKED_DIRS" | grep -q "$check_dir"; then
115
+
break
116
+
fi
117
+
CHECKED_DIRS="$CHECKED_DIRS $check_dir"
118
+
119
+
# Found a Cargo.toml outside services workspace
120
+
print_status "Running cargo fmt on $check_dir..."
121
+
if ! (cd "$check_dir" && cargo fmt --check) 2>/dev/null; then
122
+
print_status "Auto-formatting Rust code in $check_dir..."
123
+
(cd "$check_dir" && cargo fmt) 2>/dev/null || true
124
+
fi
125
+
126
+
print_status "Running cargo clippy on $check_dir..."
127
+
if (cd "$check_dir" && cargo check); then
128
+
if ! (cd "$check_dir" && cargo clippy -- -D warnings); then
129
+
print_error "Cargo clippy found issues in $check_dir. Please fix the warnings and try again."
130
+
RUST_ERRORS=1
131
+
fi
132
+
else
133
+
print_warning "Project $check_dir has compilation errors. Skipping clippy."
134
+
print_warning "Run 'cd $check_dir && cargo check' to see detailed errors."
135
+
fi
136
+
break
137
+
fi
138
+
check_dir=$(dirname "$check_dir")
139
+
done
140
+
done
141
+
142
+
if [ $RUST_ERRORS -eq 1 ]; then
143
+
exit 1
144
+
fi
145
+
else
146
+
print_warning "Cargo not found, skipping Rust checks"
147
+
fi
148
+
fi
149
+
150
+
# 3. Lexicon checks
151
+
if [ -n "$LEXICON_FILES" ]; then
152
+
print_status "Lexicon files changed, validating and regenerating..."
153
+
154
+
if command -v pnpm >/dev/null 2>&1; then
155
+
print_status "Validating lexicons..."
156
+
if ! pnpm lex:validate 2>/dev/null; then
157
+
print_error "Lexicon validation failed. Please fix the lexicon files and try again."
158
+
exit 1
159
+
fi
160
+
161
+
print_status "Regenerating lexicons..."
162
+
if ! pnpm lex:gen-server 2>/dev/null; then
163
+
print_error "Lexicon generation failed. Please check the lexicon files and try again."
164
+
exit 1
165
+
fi
166
+
167
+
# Note: Generated lexicon files are ignored by .gitignore and not added to staging
168
+
print_status "Generated lexicon files are ignored by .gitignore (as intended)"
169
+
else
170
+
print_warning "pnpm not found, skipping lexicon checks"
171
+
fi
172
+
fi
173
+
174
+
# 4. Re-add files that might have been formatted
175
+
FORMATTED_FILES=""
176
+
for file in $STAGED_FILES; do
177
+
if [ -f "$file" ]; then
178
+
# Check if file was modified by formatters
179
+
if [ -n "$(git diff "$file")" ]; then
180
+
FORMATTED_FILES="$FORMATTED_FILES $file"
181
+
git add "$file"
182
+
fi
183
+
fi
184
+
done
185
+
186
+
if [ -n "$FORMATTED_FILES" ]; then
187
+
print_success "Auto-formatted files have been re-staged:"
188
+
echo "$FORMATTED_FILES" | tr ' ' '\n' | sed 's/^/ - /'
189
+
fi
190
+
191
+
# 5. Final validation - ensure no syntax errors in staged files
192
+
print_status "Running final validation..."
193
+
194
+
# Check for common issues
195
+
for file in $TS_JS_FILES; do
196
+
if [ -f "$file" ]; then
197
+
# Check for console.log statements (optional - remove if you want to allow them)
198
+
if grep -n "console\.log" "$file" >/dev/null 2>&1; then
199
+
print_warning "Found console.log statements in $file! yooo!!!"
200
+
# Uncomment the next two lines if you want to block commits with console.log
201
+
# print_error "Please remove console.log statements before committing"
202
+
# exit 1
203
+
fi
204
+
205
+
# Check for TODO/FIXME comments in committed code (optional)
206
+
if grep -n -i "TODO\|FIXME" "$file" >/dev/null 2>&1; then
207
+
print_warning "Found TODO/FIXME comments in $file"
208
+
fi
209
+
fi
210
+
done
211
+
212
+
print_success "All pre-commit checks passed! ๐"
213
+
exit 0
+66
scripts/setup-lexicons.sh
+66
scripts/setup-lexicons.sh
···
···
1
+
#!/bin/bash
2
+
# scripts/setup-lexicons.sh
3
+
# Setup script for ATProto lexicons submodule and symbolic links
4
+
5
+
set -e
6
+
7
+
echo "Setting up lexicons..."
8
+
9
+
# Check if we're in the right directory
10
+
if [ ! -f "package.json" ] || [ ! -d "lexicons" ]; then
11
+
echo "Error: This script must be run from the project root directory"
12
+
exit 1
13
+
fi
14
+
15
+
# Initialize submodules
16
+
echo "Initializing submodules..."
17
+
git submodule update --init --recursive
18
+
19
+
# Check if vendor/atproto exists
20
+
if [ ! -d "vendor/atproto" ]; then
21
+
echo "Error: vendor/atproto submodule not found"
22
+
exit 1
23
+
fi
24
+
25
+
# Create symbolic links if they don't exist
26
+
echo "Creating symbolic links..."
27
+
cd lexicons
28
+
29
+
if [ ! -L app ]; then
30
+
ln -s ../vendor/atproto/lexicons/app app
31
+
echo "Created symlink: lexicons/app"
32
+
else
33
+
echo "Symlink already exists: lexicons/app"
34
+
fi
35
+
36
+
if [ ! -L chat ]; then
37
+
ln -s ../vendor/atproto/lexicons/chat chat
38
+
echo "Created symlink: lexicons/chat"
39
+
else
40
+
echo "Symlink already exists: lexicons/chat"
41
+
fi
42
+
43
+
if [ ! -L com ]; then
44
+
ln -s ../vendor/atproto/lexicons/com com
45
+
echo "Created symlink: lexicons/com"
46
+
else
47
+
echo "Symlink already exists: lexicons/com"
48
+
fi
49
+
50
+
if [ ! -L tools ]; then
51
+
ln -s ../vendor/atproto/lexicons/tools tools
52
+
echo "Created symlink: lexicons/tools"
53
+
else
54
+
echo "Symlink already exists: lexicons/tools"
55
+
fi
56
+
57
+
cd ..
58
+
59
+
echo "Lexicons setup complete!"
60
+
echo ""
61
+
echo "You should now have access to:"
62
+
echo " - lexicons/app -> ATProto app lexicons"
63
+
echo " - lexicons/chat -> ATProto chat lexicons"
64
+
echo " - lexicons/com -> ATProto protocol lexicons"
65
+
echo " - lexicons/tools -> ATProto tools lexicons"
66
+
echo " - lexicons/fm.teal.alpha -> Custom Teal lexicons"
+69
scripts/setup-sqlx-offline.sh
+69
scripts/setup-sqlx-offline.sh
···
···
1
+
#!/bin/bash
2
+
3
+
# Script to copy .sqlx files to all Rust projects that use SQLx
4
+
# This is needed for offline SQLx builds (SQLX_OFFLINE=true)
5
+
6
+
set -e
7
+
8
+
# Get the script directory (should be in teal/scripts/)
9
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
+
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
11
+
12
+
# Source .sqlx directory
13
+
SQLX_SOURCE="$PROJECT_ROOT/.sqlx"
14
+
15
+
# List of projects that use SQLx (relative to project root)
16
+
SQLX_PROJECTS=(
17
+
"apps/aqua"
18
+
"services/cadet"
19
+
"services/satellite"
20
+
)
21
+
22
+
echo "๐ง Setting up SQLx offline files..."
23
+
24
+
# Check if source .sqlx directory exists
25
+
if [ ! -d "$SQLX_SOURCE" ]; then
26
+
echo "โ Source .sqlx directory not found at: $SQLX_SOURCE"
27
+
echo " Make sure you've run 'cargo sqlx prepare' from the services directory first."
28
+
exit 1
29
+
fi
30
+
31
+
# Copy .sqlx files to each project that needs them
32
+
for project in "${SQLX_PROJECTS[@]}"; do
33
+
project_path="$PROJECT_ROOT/$project"
34
+
target_sqlx="$project_path/.sqlx"
35
+
36
+
if [ ! -d "$project_path" ]; then
37
+
echo "โ ๏ธ Project directory not found: $project_path (skipping)"
38
+
continue
39
+
fi
40
+
41
+
# Check if project actually uses SQLx
42
+
if [ ! -f "$project_path/Cargo.toml" ]; then
43
+
echo "โ ๏ธ No Cargo.toml found in $project (skipping)"
44
+
continue
45
+
fi
46
+
47
+
if ! grep -q "sqlx" "$project_path/Cargo.toml"; then
48
+
echo "โ ๏ธ Project $project doesn't appear to use SQLx (skipping)"
49
+
continue
50
+
fi
51
+
52
+
echo "๐ฆ Copying .sqlx files to $project..."
53
+
54
+
# Remove existing .sqlx directory if it exists
55
+
if [ -d "$target_sqlx" ]; then
56
+
rm -rf "$target_sqlx"
57
+
fi
58
+
59
+
# Copy the .sqlx directory
60
+
cp -r "$SQLX_SOURCE" "$target_sqlx"
61
+
62
+
echo " โ
Copied $(ls -1 "$target_sqlx" | wc -l) query files"
63
+
done
64
+
65
+
echo "โ
SQLx offline setup complete!"
66
+
echo ""
67
+
echo "Note: If you add new SQL queries or modify existing ones, you'll need to:"
68
+
echo "1. Run 'cargo sqlx prepare' from the services directory"
69
+
echo "2. Run this script again to update all project copies"
+62
scripts/update-lexicons.sh
+62
scripts/update-lexicons.sh
···
···
1
+
#!/bin/bash
2
+
# scripts/update-lexicons.sh
3
+
# Update script for ATProto lexicons from upstream
4
+
5
+
set -e
6
+
7
+
echo "Updating ATProto lexicons..."
8
+
9
+
# Check if we're in the right directory
10
+
if [ ! -f "package.json" ] || [ ! -d "vendor/atproto" ]; then
11
+
echo "Error: This script must be run from the project root directory"
12
+
echo "Make sure vendor/atproto submodule exists"
13
+
exit 1
14
+
fi
15
+
16
+
# Save current directory
17
+
PROJECT_ROOT=$(pwd)
18
+
19
+
# Update the submodule
20
+
echo "Fetching latest changes from atproto repository..."
21
+
cd vendor/atproto
22
+
23
+
# Fetch latest changes
24
+
git fetch origin
25
+
26
+
# Get current commit
27
+
CURRENT_COMMIT=$(git rev-parse HEAD)
28
+
CURRENT_SHORT=$(git rev-parse --short HEAD)
29
+
30
+
# Get latest commit on main
31
+
LATEST_COMMIT=$(git rev-parse origin/main)
32
+
LATEST_SHORT=$(git rev-parse --short origin/main)
33
+
34
+
if [ "$CURRENT_COMMIT" = "$LATEST_COMMIT" ]; then
35
+
echo "Already up to date (${CURRENT_SHORT})"
36
+
cd "$PROJECT_ROOT"
37
+
exit 0
38
+
fi
39
+
40
+
echo "Updating from ${CURRENT_SHORT} to ${LATEST_SHORT}..."
41
+
42
+
# Pull latest changes
43
+
git pull origin main
44
+
45
+
# Go back to project root
46
+
cd "$PROJECT_ROOT"
47
+
48
+
# Stage the submodule update
49
+
git add vendor/atproto
50
+
51
+
# Show what changed
52
+
echo ""
53
+
echo "Submodule updated successfully!"
54
+
echo "Changes:"
55
+
git diff --cached --submodule=log vendor/atproto
56
+
57
+
echo ""
58
+
echo "To complete the update, commit the changes:"
59
+
echo " git commit -m \"Update atproto lexicons to ${LATEST_SHORT}\""
60
+
echo ""
61
+
echo "Or to see what lexicon files changed:"
62
+
echo " cd vendor/atproto && git log --oneline ${CURRENT_SHORT}..${LATEST_SHORT} -- lexicons/"
-6
services/.sqlx/.sqlxrc
-6
services/.sqlx/.sqlxrc
+96
-366
services/Cargo.lock
+96
-366
services/Cargo.lock
···
60
]
61
62
[[package]]
63
-
name = "anstream"
64
-
version = "0.6.19"
65
-
source = "registry+https://github.com/rust-lang/crates.io-index"
66
-
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
67
-
dependencies = [
68
-
"anstyle",
69
-
"anstyle-parse",
70
-
"anstyle-query",
71
-
"anstyle-wincon",
72
-
"colorchoice",
73
-
"is_terminal_polyfill",
74
-
"utf8parse",
75
-
]
76
-
77
-
[[package]]
78
-
name = "anstyle"
79
-
version = "1.0.11"
80
-
source = "registry+https://github.com/rust-lang/crates.io-index"
81
-
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
82
-
83
-
[[package]]
84
-
name = "anstyle-parse"
85
-
version = "0.2.7"
86
-
source = "registry+https://github.com/rust-lang/crates.io-index"
87
-
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
88
-
dependencies = [
89
-
"utf8parse",
90
-
]
91
-
92
-
[[package]]
93
-
name = "anstyle-query"
94
-
version = "1.1.3"
95
-
source = "registry+https://github.com/rust-lang/crates.io-index"
96
-
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
97
-
dependencies = [
98
-
"windows-sys 0.59.0",
99
-
]
100
-
101
-
[[package]]
102
-
name = "anstyle-wincon"
103
-
version = "3.0.9"
104
-
source = "registry+https://github.com/rust-lang/crates.io-index"
105
-
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
106
-
dependencies = [
107
-
"anstyle",
108
-
"once_cell_polyfill",
109
-
"windows-sys 0.59.0",
110
-
]
111
-
112
-
[[package]]
113
name = "anyhow"
114
version = "1.0.98"
115
source = "registry+https://github.com/rust-lang/crates.io-index"
116
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
117
118
[[package]]
119
-
name = "aqua"
120
-
version = "0.1.0"
121
-
dependencies = [
122
-
"anyhow",
123
-
"async-trait",
124
-
"atrium-api",
125
-
"axum",
126
-
"base64",
127
-
"chrono",
128
-
"clap",
129
-
"dotenvy",
130
-
"iroh-car",
131
-
"redis",
132
-
"reqwest",
133
-
"serde",
134
-
"serde_json",
135
-
"sqlx",
136
-
"sys-info",
137
-
"time",
138
-
"tokio",
139
-
"tower-http",
140
-
"tracing",
141
-
"tracing-subscriber",
142
-
"types",
143
-
"url",
144
-
"uuid",
145
-
"vergen",
146
-
"vergen-gitcl",
147
-
]
148
-
149
-
[[package]]
150
name = "arc-swap"
151
version = "1.7.1"
152
source = "registry+https://github.com/rust-lang/crates.io-index"
···
187
]
188
189
[[package]]
190
name = "atoi"
191
version = "2.0.0"
192
source = "registry+https://github.com/rust-lang/crates.io-index"
···
287
checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
288
dependencies = [
289
"axum-core",
290
-
"axum-macros",
291
"bytes",
292
"form_urlencoded",
293
"futures-util",
···
300
"matchit",
301
"memchr",
302
"mime",
303
-
"multer",
304
"percent-encoding",
305
"pin-project-lite",
306
"rustversion",
···
337
]
338
339
[[package]]
340
-
name = "axum-macros"
341
-
version = "0.5.0"
342
-
source = "registry+https://github.com/rust-lang/crates.io-index"
343
-
checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
344
-
dependencies = [
345
-
"proc-macro2",
346
-
"quote",
347
-
"syn 2.0.104",
348
-
]
349
-
350
-
[[package]]
351
name = "backtrace"
352
version = "0.3.75"
353
source = "registry+https://github.com/rust-lang/crates.io-index"
···
504
version = "1.10.1"
505
source = "registry+https://github.com/rust-lang/crates.io-index"
506
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
507
508
[[package]]
509
name = "cadet"
···
511
dependencies = [
512
"anyhow",
513
"async-trait",
514
"atrium-api",
515
"base64",
516
"chrono",
517
"cid 0.11.1",
518
"dotenvy",
519
"flume",
520
-
"iroh-car",
521
"libipld",
522
"metrics 0.23.1",
523
"metrics-exporter-prometheus",
···
528
"reqwest",
529
"rocketman",
530
"serde",
531
"serde_json",
532
"sqlx",
533
"time",
···
541
]
542
543
[[package]]
544
-
name = "camino"
545
-
version = "1.1.10"
546
-
source = "registry+https://github.com/rust-lang/crates.io-index"
547
-
checksum = "0da45bc31171d8d6960122e222a67740df867c1dd53b4d51caa297084c185cab"
548
-
dependencies = [
549
-
"serde",
550
-
]
551
-
552
-
[[package]]
553
-
name = "cargo-platform"
554
-
version = "0.1.9"
555
-
source = "registry+https://github.com/rust-lang/crates.io-index"
556
-
checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea"
557
-
dependencies = [
558
-
"serde",
559
-
]
560
-
561
-
[[package]]
562
-
name = "cargo_metadata"
563
-
version = "0.19.2"
564
-
source = "registry+https://github.com/rust-lang/crates.io-index"
565
-
checksum = "dd5eb614ed4c27c5d706420e4320fbe3216ab31fa1c33cd8246ac36dae4479ba"
566
-
dependencies = [
567
-
"camino",
568
-
"cargo-platform",
569
-
"semver",
570
-
"serde",
571
-
"serde_json",
572
-
"thiserror 2.0.12",
573
-
]
574
-
575
-
[[package]]
576
name = "cbor4ii"
577
version = "0.2.14"
578
source = "registry+https://github.com/rust-lang/crates.io-index"
···
661
]
662
663
[[package]]
664
-
name = "clap"
665
-
version = "4.5.41"
666
-
source = "registry+https://github.com/rust-lang/crates.io-index"
667
-
checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
668
-
dependencies = [
669
-
"clap_builder",
670
-
"clap_derive",
671
-
]
672
-
673
-
[[package]]
674
-
name = "clap_builder"
675
-
version = "4.5.41"
676
-
source = "registry+https://github.com/rust-lang/crates.io-index"
677
-
checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
678
-
dependencies = [
679
-
"anstream",
680
-
"anstyle",
681
-
"clap_lex",
682
-
"strsim",
683
-
]
684
-
685
-
[[package]]
686
-
name = "clap_derive"
687
-
version = "4.5.41"
688
-
source = "registry+https://github.com/rust-lang/crates.io-index"
689
-
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
690
-
dependencies = [
691
-
"heck",
692
-
"proc-macro2",
693
-
"quote",
694
-
"syn 2.0.104",
695
-
]
696
-
697
-
[[package]]
698
-
name = "clap_lex"
699
-
version = "0.7.5"
700
-
source = "registry+https://github.com/rust-lang/crates.io-index"
701
-
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
702
-
703
-
[[package]]
704
name = "cmake"
705
version = "0.1.54"
706
source = "registry+https://github.com/rust-lang/crates.io-index"
···
708
dependencies = [
709
"cc",
710
]
711
-
712
-
[[package]]
713
-
name = "colorchoice"
714
-
version = "1.0.4"
715
-
source = "registry+https://github.com/rust-lang/crates.io-index"
716
-
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
717
718
[[package]]
719
name = "combine"
···
1296
"libc",
1297
"log",
1298
"rustversion",
1299
-
"windows 0.61.3",
1300
]
1301
1302
[[package]]
···
1568
"js-sys",
1569
"log",
1570
"wasm-bindgen",
1571
-
"windows-core 0.61.2",
1572
]
1573
1574
[[package]]
···
1757
]
1758
1759
[[package]]
1760
-
name = "is_terminal_polyfill"
1761
-
version = "1.70.1"
1762
source = "registry+https://github.com/rust-lang/crates.io-index"
1763
-
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
1764
1765
[[package]]
1766
name = "itertools"
···
2150
]
2151
2152
[[package]]
2153
-
name = "multer"
2154
-
version = "3.1.0"
2155
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2156
-
checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b"
2157
-
dependencies = [
2158
-
"bytes",
2159
-
"encoding_rs",
2160
-
"futures-util",
2161
-
"http",
2162
-
"httparse",
2163
-
"memchr",
2164
-
"mime",
2165
-
"spin",
2166
-
"version_check",
2167
-
]
2168
-
2169
-
[[package]]
2170
name = "multibase"
2171
version = "0.9.1"
2172
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2300
]
2301
2302
[[package]]
2303
-
name = "ntapi"
2304
-
version = "0.4.1"
2305
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2306
-
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
2307
-
dependencies = [
2308
-
"winapi",
2309
-
]
2310
-
2311
-
[[package]]
2312
name = "nu-ansi-term"
2313
version = "0.46.0"
2314
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2383
]
2384
2385
[[package]]
2386
-
name = "num_threads"
2387
-
version = "0.1.7"
2388
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2389
-
checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
2390
-
dependencies = [
2391
-
"libc",
2392
-
]
2393
-
2394
-
[[package]]
2395
-
name = "objc2-core-foundation"
2396
-
version = "0.3.1"
2397
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2398
-
checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166"
2399
-
dependencies = [
2400
-
"bitflags 2.9.1",
2401
-
]
2402
-
2403
-
[[package]]
2404
name = "object"
2405
version = "0.36.7"
2406
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2414
version = "1.21.3"
2415
source = "registry+https://github.com/rust-lang/crates.io-index"
2416
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
2417
-
2418
-
[[package]]
2419
-
name = "once_cell_polyfill"
2420
-
version = "1.70.1"
2421
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2422
-
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
2423
2424
[[package]]
2425
name = "openssl"
···
3019
dependencies = [
3020
"aws-lc-rs",
3021
"once_cell",
3022
"rustls-pki-types",
3023
"rustls-webpki",
3024
"subtle",
···
3150
version = "1.0.26"
3151
source = "registry+https://github.com/rust-lang/crates.io-index"
3152
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
3153
-
dependencies = [
3154
-
"serde",
3155
-
]
3156
3157
[[package]]
3158
name = "serde"
···
3209
]
3210
3211
[[package]]
3212
name = "serde_json"
3213
version = "1.0.141"
3214
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3296
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
3297
3298
[[package]]
3299
name = "signature"
3300
version = "2.2.0"
3301
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3402
"memchr",
3403
"once_cell",
3404
"percent-encoding",
3405
"serde",
3406
"serde_json",
3407
"sha2",
···
3413
"tracing",
3414
"url",
3415
"uuid",
3416
]
3417
3418
[[package]]
···
3662
]
3663
3664
[[package]]
3665
-
name = "sys-info"
3666
-
version = "0.9.1"
3667
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3668
-
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
3669
-
dependencies = [
3670
-
"cc",
3671
-
"libc",
3672
-
]
3673
-
3674
-
[[package]]
3675
-
name = "sysinfo"
3676
-
version = "0.34.2"
3677
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3678
-
checksum = "a4b93974b3d3aeaa036504b8eefd4c039dced109171c1ae973f1dc63b2c7e4b2"
3679
-
dependencies = [
3680
-
"libc",
3681
-
"memchr",
3682
-
"ntapi",
3683
-
"objc2-core-foundation",
3684
-
"windows 0.57.0",
3685
-
]
3686
-
3687
-
[[package]]
3688
name = "system-configuration"
3689
version = "0.6.1"
3690
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3781
dependencies = [
3782
"deranged",
3783
"itoa",
3784
-
"libc",
3785
"num-conv",
3786
-
"num_threads",
3787
"powerfmt",
3788
"serde",
3789
"time-core",
···
3842
"io-uring",
3843
"libc",
3844
"mio",
3845
"pin-project-lite",
3846
"slab",
3847
"socket2 0.5.10",
3848
"tokio-macros",
···
4133
"serde_ipld_dagcbor",
4134
"serde_json",
4135
"thiserror 2.0.12",
4136
-
"uuid",
4137
]
4138
4139
[[package]]
···
4211
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
4212
4213
[[package]]
4214
-
name = "utf8parse"
4215
-
version = "0.2.2"
4216
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4217
-
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
4218
-
4219
-
[[package]]
4220
name = "uuid"
4221
version = "1.17.0"
4222
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4241
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
4242
4243
[[package]]
4244
-
name = "vergen"
4245
-
version = "9.0.6"
4246
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4247
-
checksum = "6b2bf58be11fc9414104c6d3a2e464163db5ef74b12296bda593cac37b6e4777"
4248
-
dependencies = [
4249
-
"anyhow",
4250
-
"cargo_metadata",
4251
-
"derive_builder",
4252
-
"regex",
4253
-
"rustc_version",
4254
-
"rustversion",
4255
-
"sysinfo",
4256
-
"time",
4257
-
"vergen-lib",
4258
-
]
4259
-
4260
-
[[package]]
4261
-
name = "vergen-gitcl"
4262
-
version = "1.0.8"
4263
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4264
-
checksum = "b9dfc1de6eb2e08a4ddf152f1b179529638bedc0ea95e6d667c014506377aefe"
4265
-
dependencies = [
4266
-
"anyhow",
4267
-
"derive_builder",
4268
-
"rustversion",
4269
-
"time",
4270
-
"vergen",
4271
-
"vergen-lib",
4272
-
]
4273
-
4274
-
[[package]]
4275
-
name = "vergen-lib"
4276
-
version = "0.1.6"
4277
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4278
-
checksum = "9b07e6010c0f3e59fcb164e0163834597da68d1f864e2b8ca49f74de01e9c166"
4279
-
dependencies = [
4280
-
"anyhow",
4281
-
"derive_builder",
4282
-
"rustversion",
4283
-
]
4284
-
4285
-
[[package]]
4286
name = "version_check"
4287
version = "0.9.5"
4288
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4410
]
4411
4412
[[package]]
4413
name = "which"
4414
version = "4.4.2"
4415
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4455
4456
[[package]]
4457
name = "windows"
4458
-
version = "0.57.0"
4459
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4460
-
checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143"
4461
-
dependencies = [
4462
-
"windows-core 0.57.0",
4463
-
"windows-targets 0.52.6",
4464
-
]
4465
-
4466
-
[[package]]
4467
-
name = "windows"
4468
version = "0.61.3"
4469
source = "registry+https://github.com/rust-lang/crates.io-index"
4470
checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
4471
dependencies = [
4472
"windows-collections",
4473
-
"windows-core 0.61.2",
4474
"windows-future",
4475
"windows-link",
4476
"windows-numerics",
···
4482
source = "registry+https://github.com/rust-lang/crates.io-index"
4483
checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
4484
dependencies = [
4485
-
"windows-core 0.61.2",
4486
-
]
4487
-
4488
-
[[package]]
4489
-
name = "windows-core"
4490
-
version = "0.57.0"
4491
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4492
-
checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d"
4493
-
dependencies = [
4494
-
"windows-implement 0.57.0",
4495
-
"windows-interface 0.57.0",
4496
-
"windows-result 0.1.2",
4497
-
"windows-targets 0.52.6",
4498
]
4499
4500
[[package]]
···
4503
source = "registry+https://github.com/rust-lang/crates.io-index"
4504
checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
4505
dependencies = [
4506
-
"windows-implement 0.60.0",
4507
-
"windows-interface 0.59.1",
4508
"windows-link",
4509
-
"windows-result 0.3.4",
4510
"windows-strings",
4511
]
4512
···
4516
source = "registry+https://github.com/rust-lang/crates.io-index"
4517
checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
4518
dependencies = [
4519
-
"windows-core 0.61.2",
4520
"windows-link",
4521
"windows-threading",
4522
]
4523
4524
[[package]]
4525
name = "windows-implement"
4526
-
version = "0.57.0"
4527
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4528
-
checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
4529
-
dependencies = [
4530
-
"proc-macro2",
4531
-
"quote",
4532
-
"syn 2.0.104",
4533
-
]
4534
-
4535
-
[[package]]
4536
-
name = "windows-implement"
4537
version = "0.60.0"
4538
source = "registry+https://github.com/rust-lang/crates.io-index"
4539
checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
···
4545
4546
[[package]]
4547
name = "windows-interface"
4548
-
version = "0.57.0"
4549
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4550
-
checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
4551
-
dependencies = [
4552
-
"proc-macro2",
4553
-
"quote",
4554
-
"syn 2.0.104",
4555
-
]
4556
-
4557
-
[[package]]
4558
-
name = "windows-interface"
4559
version = "0.59.1"
4560
source = "registry+https://github.com/rust-lang/crates.io-index"
4561
checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
···
4577
source = "registry+https://github.com/rust-lang/crates.io-index"
4578
checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
4579
dependencies = [
4580
-
"windows-core 0.61.2",
4581
"windows-link",
4582
]
4583
···
4588
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
4589
dependencies = [
4590
"windows-link",
4591
-
"windows-result 0.3.4",
4592
"windows-strings",
4593
-
]
4594
-
4595
-
[[package]]
4596
-
name = "windows-result"
4597
-
version = "0.1.2"
4598
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4599
-
checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
4600
-
dependencies = [
4601
-
"windows-targets 0.52.6",
4602
]
4603
4604
[[package]]
···
60
]
61
62
[[package]]
63
name = "anyhow"
64
version = "1.0.98"
65
source = "registry+https://github.com/rust-lang/crates.io-index"
66
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
67
68
[[package]]
69
name = "arc-swap"
70
version = "1.7.1"
71
source = "registry+https://github.com/rust-lang/crates.io-index"
···
106
]
107
108
[[package]]
109
+
name = "atmst"
110
+
version = "0.0.1"
111
+
source = "registry+https://github.com/rust-lang/crates.io-index"
112
+
checksum = "aeb2a4631a64a242ae62c3ceb140adfa2a8bdacb1b22a6549db5de2ce3389c1d"
113
+
dependencies = [
114
+
"async-trait",
115
+
"bytes",
116
+
"cid 0.11.1",
117
+
"dashmap",
118
+
"futures",
119
+
"ipld-core",
120
+
"iroh-car 0.5.1",
121
+
"log",
122
+
"multihash 0.19.3",
123
+
"serde",
124
+
"serde_ipld_dagcbor",
125
+
"serde_ipld_dagjson",
126
+
"sha2",
127
+
"thiserror 1.0.69",
128
+
"tokio",
129
+
]
130
+
131
+
[[package]]
132
name = "atoi"
133
version = "2.0.0"
134
source = "registry+https://github.com/rust-lang/crates.io-index"
···
229
checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
230
dependencies = [
231
"axum-core",
232
"bytes",
233
"form_urlencoded",
234
"futures-util",
···
241
"matchit",
242
"memchr",
243
"mime",
244
"percent-encoding",
245
"pin-project-lite",
246
"rustversion",
···
277
]
278
279
[[package]]
280
name = "backtrace"
281
version = "0.3.75"
282
source = "registry+https://github.com/rust-lang/crates.io-index"
···
433
version = "1.10.1"
434
source = "registry+https://github.com/rust-lang/crates.io-index"
435
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
436
+
dependencies = [
437
+
"serde",
438
+
]
439
440
[[package]]
441
name = "cadet"
···
443
dependencies = [
444
"anyhow",
445
"async-trait",
446
+
"atmst",
447
"atrium-api",
448
"base64",
449
"chrono",
450
"cid 0.11.1",
451
"dotenvy",
452
"flume",
453
+
"futures",
454
+
"iroh-car 0.4.0",
455
"libipld",
456
"metrics 0.23.1",
457
"metrics-exporter-prometheus",
···
462
"reqwest",
463
"rocketman",
464
"serde",
465
+
"serde_ipld_dagcbor",
466
"serde_json",
467
"sqlx",
468
"time",
···
476
]
477
478
[[package]]
479
name = "cbor4ii"
480
version = "0.2.14"
481
source = "registry+https://github.com/rust-lang/crates.io-index"
···
564
]
565
566
[[package]]
567
name = "cmake"
568
version = "0.1.54"
569
source = "registry+https://github.com/rust-lang/crates.io-index"
···
571
dependencies = [
572
"cc",
573
]
574
575
[[package]]
576
name = "combine"
···
1153
"libc",
1154
"log",
1155
"rustversion",
1156
+
"windows",
1157
]
1158
1159
[[package]]
···
1425
"js-sys",
1426
"log",
1427
"wasm-bindgen",
1428
+
"windows-core",
1429
]
1430
1431
[[package]]
···
1614
]
1615
1616
[[package]]
1617
+
name = "iroh-car"
1618
+
version = "0.5.1"
1619
source = "registry+https://github.com/rust-lang/crates.io-index"
1620
+
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
1621
+
dependencies = [
1622
+
"anyhow",
1623
+
"cid 0.11.1",
1624
+
"futures",
1625
+
"serde",
1626
+
"serde_ipld_dagcbor",
1627
+
"thiserror 1.0.69",
1628
+
"tokio",
1629
+
"unsigned-varint 0.7.2",
1630
+
]
1631
1632
[[package]]
1633
name = "itertools"
···
2017
]
2018
2019
[[package]]
2020
name = "multibase"
2021
version = "0.9.1"
2022
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2150
]
2151
2152
[[package]]
2153
name = "nu-ansi-term"
2154
version = "0.46.0"
2155
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2224
]
2225
2226
[[package]]
2227
name = "object"
2228
version = "0.36.7"
2229
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2237
version = "1.21.3"
2238
source = "registry+https://github.com/rust-lang/crates.io-index"
2239
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
2240
2241
[[package]]
2242
name = "openssl"
···
2836
dependencies = [
2837
"aws-lc-rs",
2838
"once_cell",
2839
+
"ring",
2840
"rustls-pki-types",
2841
"rustls-webpki",
2842
"subtle",
···
2968
version = "1.0.26"
2969
source = "registry+https://github.com/rust-lang/crates.io-index"
2970
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
2971
2972
[[package]]
2973
name = "serde"
···
3024
]
3025
3026
[[package]]
3027
+
name = "serde_ipld_dagjson"
3028
+
version = "0.2.0"
3029
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3030
+
checksum = "3359b47ba7f4a306ef5984665e10539e212e97217afa489437d533208eecda36"
3031
+
dependencies = [
3032
+
"ipld-core",
3033
+
"serde",
3034
+
"serde_json",
3035
+
]
3036
+
3037
+
[[package]]
3038
name = "serde_json"
3039
version = "1.0.141"
3040
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3122
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
3123
3124
[[package]]
3125
+
name = "signal-hook-registry"
3126
+
version = "1.4.5"
3127
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3128
+
checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
3129
+
dependencies = [
3130
+
"libc",
3131
+
]
3132
+
3133
+
[[package]]
3134
name = "signature"
3135
version = "2.2.0"
3136
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3237
"memchr",
3238
"once_cell",
3239
"percent-encoding",
3240
+
"rustls",
3241
"serde",
3242
"serde_json",
3243
"sha2",
···
3249
"tracing",
3250
"url",
3251
"uuid",
3252
+
"webpki-roots 0.26.11",
3253
]
3254
3255
[[package]]
···
3499
]
3500
3501
[[package]]
3502
name = "system-configuration"
3503
version = "0.6.1"
3504
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3595
dependencies = [
3596
"deranged",
3597
"itoa",
3598
"num-conv",
3599
"powerfmt",
3600
"serde",
3601
"time-core",
···
3654
"io-uring",
3655
"libc",
3656
"mio",
3657
+
"parking_lot",
3658
"pin-project-lite",
3659
+
"signal-hook-registry",
3660
"slab",
3661
"socket2 0.5.10",
3662
"tokio-macros",
···
3947
"serde_ipld_dagcbor",
3948
"serde_json",
3949
"thiserror 2.0.12",
3950
]
3951
3952
[[package]]
···
4024
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
4025
4026
[[package]]
4027
name = "uuid"
4028
version = "1.17.0"
4029
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4048
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
4049
4050
[[package]]
4051
name = "version_check"
4052
version = "0.9.5"
4053
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4175
]
4176
4177
[[package]]
4178
+
name = "webpki-roots"
4179
+
version = "0.26.11"
4180
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4181
+
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
4182
+
dependencies = [
4183
+
"webpki-roots 1.0.2",
4184
+
]
4185
+
4186
+
[[package]]
4187
+
name = "webpki-roots"
4188
+
version = "1.0.2"
4189
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4190
+
checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2"
4191
+
dependencies = [
4192
+
"rustls-pki-types",
4193
+
]
4194
+
4195
+
[[package]]
4196
name = "which"
4197
version = "4.4.2"
4198
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4238
4239
[[package]]
4240
name = "windows"
4241
version = "0.61.3"
4242
source = "registry+https://github.com/rust-lang/crates.io-index"
4243
checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
4244
dependencies = [
4245
"windows-collections",
4246
+
"windows-core",
4247
"windows-future",
4248
"windows-link",
4249
"windows-numerics",
···
4255
source = "registry+https://github.com/rust-lang/crates.io-index"
4256
checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
4257
dependencies = [
4258
+
"windows-core",
4259
]
4260
4261
[[package]]
···
4264
source = "registry+https://github.com/rust-lang/crates.io-index"
4265
checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
4266
dependencies = [
4267
+
"windows-implement",
4268
+
"windows-interface",
4269
"windows-link",
4270
+
"windows-result",
4271
"windows-strings",
4272
]
4273
···
4277
source = "registry+https://github.com/rust-lang/crates.io-index"
4278
checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
4279
dependencies = [
4280
+
"windows-core",
4281
"windows-link",
4282
"windows-threading",
4283
]
4284
4285
[[package]]
4286
name = "windows-implement"
4287
version = "0.60.0"
4288
source = "registry+https://github.com/rust-lang/crates.io-index"
4289
checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
···
4295
4296
[[package]]
4297
name = "windows-interface"
4298
version = "0.59.1"
4299
source = "registry+https://github.com/rust-lang/crates.io-index"
4300
checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
···
4316
source = "registry+https://github.com/rust-lang/crates.io-index"
4317
checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
4318
dependencies = [
4319
+
"windows-core",
4320
"windows-link",
4321
]
4322
···
4327
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
4328
dependencies = [
4329
"windows-link",
4330
+
"windows-result",
4331
"windows-strings",
4332
]
4333
4334
[[package]]
+5
-4
services/Cargo.toml
+5
-4
services/Cargo.toml
···
1
[workspace]
2
-
members = ["aqua", "cadet", "rocketman", "satellite", "types"]
3
resolver = "2"
4
5
[workspace.dependencies]
···
12
"postgres",
13
"uuid",
14
"chrono",
15
] }
16
serde = { version = "1.0", features = ["derive"] }
17
anyhow = "1.0"
···
19
tracing = "0.1"
20
tracing-subscriber = "0.3"
21
metrics = "0.23"
22
-
reqwest = { version = "0.12", features = ["json"] }
23
url = "2.5"
24
rand = "0.8"
25
flume = "0.11"
26
async-trait = "0.1"
27
time = "0.3"
28
dotenvy = "0.15"
29
-
tokio-tungstenite = "0.24"
30
atrium-api = "0.25"
31
chrono = { version = "0.4", features = ["serde"] }
32
uuid = { version = "1.0", features = ["v4", "serde"] }
33
types = { path = "types" }
34
-
rocketman = { path = "rocketman" }
35
36
# CAR and IPLD dependencies
37
iroh-car = "0.4"
···
1
[workspace]
2
+
members = ["cadet", "satellite", "types"]
3
resolver = "2"
4
5
[workspace.dependencies]
···
12
"postgres",
13
"uuid",
14
"chrono",
15
+
"tls-rustls",
16
] }
17
serde = { version = "1.0", features = ["derive"] }
18
anyhow = "1.0"
···
20
tracing = "0.1"
21
tracing-subscriber = "0.3"
22
metrics = "0.23"
23
+
reqwest.workspace = true
24
url = "2.5"
25
rand = "0.8"
26
flume = "0.11"
27
async-trait = "0.1"
28
time = "0.3"
29
dotenvy = "0.15"
30
+
tokio-tungstenite.workspace = true
31
atrium-api = "0.25"
32
chrono = { version = "0.4", features = ["serde"] }
33
uuid = { version = "1.0", features = ["v4", "serde"] }
34
types = { path = "types" }
35
+
rocketman = "0.2.5"
36
37
# CAR and IPLD dependencies
38
iroh-car = "0.4"
+20
services/Cross.toml
+20
services/Cross.toml
···
···
1
+
[build.env]
2
+
passthrough = [
3
+
"CARGO_HOME",
4
+
"CARGO_TARGET_DIR",
5
+
"SQLX_OFFLINE",
6
+
"PKG_CONFIG_ALLOW_CROSS",
7
+
]
8
+
9
+
[target.aarch64-unknown-linux-gnu]
10
+
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main"
11
+
12
+
[target.aarch64-unknown-linux-gnu.env]
13
+
passthrough = ["CARGO_HOME", "CARGO_TARGET_DIR", "SQLX_OFFLINE"]
14
+
# Allow cross-compilation of native dependencies
15
+
PKG_CONFIG_ALLOW_CROSS = "1"
16
+
# Use static linking to reduce runtime dependencies
17
+
RUSTFLAGS = "-C target-feature=+crt-static -C link-arg=-s"
18
+
# Disable problematic features that might require OpenSSL
19
+
CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc"
20
+
CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"
+4
services/cadet/Cargo.toml
+4
services/cadet/Cargo.toml
···
3
version = "0.1.0"
4
edition = "2021"
5
6
+
7
[dependencies]
8
atrium-api.workspace = true
9
tokio.workspace = true
···
33
libipld.workspace = true
34
cid.workspace = true
35
base64.workspace = true
36
+
atmst = "0.0.1"
37
+
serde_ipld_dagcbor = "0.6"
38
+
futures = "0.3"
39
40
# Redis for job queues
41
redis.workspace = true
+61
-1
services/cadet/Dockerfile
+61
-1
services/cadet/Dockerfile
···
1
FROM --platform=${BUILDPLATFORM} rust:latest AS buildah
2
3
# Create appuser
···
15
16
WORKDIR /buildah
17
18
COPY ./ .
19
20
-
RUN . ./target.sh && touch src/main.rs && echo "Building for $TARGET_ARCH" && cargo build --release --target $RUST_TARGET && cp target/$RUST_TARGET/release/cadet target/cadet
21
22
FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc
23
···
1
+
# Docker build args for cross-platform builds (must be at the top)
2
+
ARG TARGETPLATFORM
3
+
ARG BUILDPLATFORM
4
+
ARG TARGETARCH
5
+
ARG TARGETOS
6
+
7
FROM --platform=${BUILDPLATFORM} rust:latest AS buildah
8
9
# Create appuser
···
21
22
WORKDIR /buildah
23
24
+
# Re-declare ARGs after FROM (Docker requirement)
25
+
ARG TARGETPLATFORM
26
+
ARG BUILDPLATFORM
27
+
ARG TARGETARCH
28
+
ARG TARGETOS
29
+
30
+
# Debug platform detection before copying files
31
+
RUN echo "DEBUG Before copy: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH BUILDPLATFORM=$BUILDPLATFORM"
32
+
33
COPY ./ .
34
35
+
# Setup lexicons and install dependencies
36
+
RUN ./scripts/setup-lexicons.sh
37
+
38
+
# Install Node.js and pnpm for lexicon generation
39
+
RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/*
40
+
RUN npm install -g pnpm
41
+
42
+
# Install dependencies and generate lexicons
43
+
RUN pnpm install
44
+
RUN cd tools/lexicon-cli && pnpm build
45
+
RUN pnpm lex:gen
46
+
47
+
# Install cross-compilation toolchains
48
+
RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu
49
+
50
+
# Enable ARM64 architecture and install cross-compilation tools
51
+
RUN dpkg --add-architecture arm64 && \
52
+
apt-get update && \
53
+
apt-get install -y \
54
+
gcc-aarch64-linux-gnu \
55
+
libssl-dev:arm64 \
56
+
libssl-dev \
57
+
pkg-config \
58
+
&& rm -rf /var/lib/apt/lists/*
59
+
60
+
# Set up cross-compilation environment
61
+
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc
62
+
ENV PKG_CONFIG_ALLOW_CROSS=1
63
+
ENV PKG_CONFIG_PATH_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu/pkgconfig
64
+
ENV OPENSSL_DIR_aarch64_unknown_linux_gnu=/usr
65
+
ENV OPENSSL_LIB_DIR_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu
66
+
ENV OPENSSL_INCLUDE_DIR_aarch64_unknown_linux_gnu=/usr/include/openssl
67
+
68
+
# Force SQLx to use offline mode with workspace cache
69
+
ENV SQLX_OFFLINE=true
70
+
71
+
# copy sqlx in
72
+
COPY ./.sqlx ./services/cadet/.sqlx
73
+
74
+
# Debug platform detection and run build
75
+
RUN echo "DEBUG Before target.sh: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH" && \
76
+
. ./target.sh && \
77
+
touch services/cadet/src/main.rs && \
78
+
echo "Building for $TARGET_ARCH" && \
79
+
cargo build --release --target $RUST_TARGET --package cadet && \
80
+
cp target/$RUST_TARGET/release/cadet target/cadet
81
82
FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc
83
+240
services/cadet/src/ingestors/car/README.md
+240
services/cadet/src/ingestors/car/README.md
···
···
1
+
# CAR Import System with `atmst`
2
+
3
+
This directory contains the implementation of Teal's CAR (Content Addressable aRchive) import functionality, now powered by the `atmst` library for proper AT Protocol-style Merkle Search Tree handling.
4
+
5
+
## Overview
6
+
7
+
The CAR import system allows Teal to ingest historical music listening data from AT Protocol repositories. Previously, this was done with manual IPLD parsing, but we've now migrated to use the specialized `atmst` library for more accurate and robust CAR file processing.
8
+
9
+
## Key Components
10
+
11
+
### `CarImportIngestor`
12
+
13
+
The main entry point for CAR file processing. This ingestor:
14
+
15
+
1. **Accepts CAR data** via the `LexiconIngestor` interface (base64 or URL)
16
+
2. **Uses `atmst::CarImporter`** to parse CAR files with proper MST handling
17
+
3. **Converts to MST structure** for tree traversal and record extraction
18
+
4. **Delegates to existing ingestors** for Teal record types (play, profile, status)
19
+
20
+
### Migration from `iroh-car` to `atmst`
21
+
22
+
**Previous Implementation:**
23
+
- Used `iroh-car` for basic CAR parsing
24
+
- Manual IPLD block decoding with `libipld`
25
+
- Complex two-pass processing to extract rkey mappings from commit operations
26
+
- Error-prone MST parsing that could miss records
27
+
28
+
**New Implementation:**
29
+
- Uses `atmst::CarImporter` for specialized AT Protocol CAR handling
30
+
- Built-in MST structure understanding
31
+
- Proper tree traversal with guaranteed rkey extraction
32
+
- More reliable and maintainable code
33
+
34
+
## Usage
35
+
36
+
### As a LexiconIngestor
37
+
38
+
The CAR importer integrates seamlessly with Teal's existing ingestion pipeline:
39
+
40
+
```rust
41
+
// CAR data in a record
42
+
{
43
+
"$type": "com.teal.car.import",
44
+
"carData": "base64-encoded-car-file-here"
45
+
}
46
+
47
+
// Or as a URL reference
48
+
{
49
+
"$type": "com.teal.car.import",
50
+
"carData": {
51
+
"url": "https://example.com/repo.car"
52
+
}
53
+
}
54
+
```
55
+
56
+
### Direct Import
57
+
58
+
```rust
59
+
let ingestor = CarImportIngestor::new(db_pool);
60
+
61
+
// Import from bytes
62
+
let import_id = ingestor.import_car_bytes(&car_data, "did:plc:example").await?;
63
+
64
+
// Import from PDS
65
+
let import_id = ingestor.fetch_and_process_identity_car("user.bsky.social").await?;
66
+
```
67
+
68
+
## Supported Record Types
69
+
70
+
The CAR importer automatically detects and processes these Teal record types:
71
+
72
+
- **`fm.teal.alpha.feed.play`** - Music play records
73
+
- **`fm.teal.alpha.profile`** - User profile data
74
+
- **`fm.teal.alpha.status`** - User status updates
75
+
76
+
Records are processed using the same logic as real-time Jetstream ingestion, ensuring data consistency.
77
+
78
+
## Architecture
79
+
80
+
### MST Processing Flow
81
+
82
+
1. **CAR Import**: `atmst::CarImporter` loads and validates the CAR file
83
+
2. **MST Conversion**: CAR data is converted to an `atmst::Mst` structure
84
+
3. **Tree Traversal**: MST is traversed depth-first to find all records
85
+
4. **Record Extraction**: Each MST entry is examined for Teal record types
86
+
5. **Delegation**: Valid records are passed to existing Teal ingestors
87
+
88
+
### Key Benefits
89
+
90
+
- **Proper rkey handling**: MST structure ensures correct record key extraction
91
+
- **AT Protocol compliance**: Uses specialized library designed for AT Protocol
92
+
- **Maintainable code**: Eliminates complex manual MST parsing
93
+
- **Better error handling**: More robust than previous implementation
94
+
95
+
## Current Status
96
+
97
+
### โ
Completed
98
+
- Basic `atmst` integration
99
+
- MST structure setup and conversion
100
+
- Record type detection and routing
101
+
- Integration with existing Teal ingestors
102
+
- Error handling and logging
103
+
104
+
### ๐ง In Progress
105
+
- **Block data access**: Full implementation of record data extraction from MST
106
+
- **MST traversal**: Complete iteration through MST entries
107
+
- **Testing**: Comprehensive test suite with real CAR files
108
+
109
+
### ๐ TODO
110
+
- Complete `get_record_from_mst()` implementation
111
+
- Add MST entry iteration logic
112
+
- Performance optimization for large CAR files
113
+
- Comprehensive integration tests
114
+
115
+
## Implementation Notes
116
+
117
+
### Block Data Access
118
+
119
+
The current implementation has a placeholder for accessing actual record data from the MST:
120
+
121
+
```rust
122
+
fn get_record_from_mst(&self, cid: &atmst::Cid, mst: &Mst) -> Option<Value> {
123
+
// TODO: Implement proper block data access using atmst API
124
+
// This requires understanding how to extract IPLD data for a given CID
125
+
// from the MST's internal block storage
126
+
None
127
+
}
128
+
```
129
+
130
+
This is the key missing piece that needs to be completed based on `atmst` library documentation.
131
+
132
+
### MST Traversal
133
+
134
+
Similarly, the MST traversal logic needs completion:
135
+
136
+
```rust
137
+
// TODO: Implement proper MST iteration
138
+
// for (cid, node) in mst.iter() {
139
+
// // Process MST entries
140
+
// }
141
+
```
142
+
143
+
### Error Handling
144
+
145
+
The system is designed to be resilient:
146
+
- Invalid records are logged and skipped
147
+
- Network errors during PDS fetching are properly reported
148
+
- Database errors are propagated with context
149
+
150
+
## Testing
151
+
152
+
### Test Structure
153
+
154
+
```bash
155
+
# Unit tests (no database required)
156
+
cargo test test_parse_teal_key
157
+
cargo test test_is_teal_record_key
158
+
159
+
# Integration tests (requires database)
160
+
cargo test test_atmst_car_import --ignored
161
+
162
+
# CLI testing
163
+
cd tools/teal-cli
164
+
cargo run -- car analyze path/to/file.car
165
+
```
166
+
167
+
### Test Data
168
+
169
+
Test CAR files should be placed in `services/cadet/` for integration testing:
170
+
- `test.car` - Basic test file with Teal records
171
+
- `large.car` - Performance testing file
172
+
- `empty.car` - Edge case testing
173
+
174
+
## Dependencies
175
+
176
+
### Key Dependencies
177
+
- **`atmst`**: AT Protocol MST library (v0.0.1)
178
+
- **`serde_json`**: JSON serialization for record processing
179
+
- **`anyhow`**: Error handling
180
+
- **`uuid`**: Import ID generation
181
+
- **`reqwest`**: HTTP client for PDS fetching
182
+
183
+
### Workspace Dependencies
184
+
The implementation uses existing Teal workspace dependencies for database access, logging, and record processing.
185
+
186
+
## Configuration
187
+
188
+
No additional configuration is required. The CAR importer uses the same database connection and logging setup as other Teal ingestors.
189
+
190
+
## Monitoring
191
+
192
+
The CAR importer provides detailed logging:
193
+
194
+
- **Info**: Successful imports, record counts, processing progress
195
+
- **Warn**: Skipped records, missing data, network issues
196
+
- **Error**: Database failures, invalid CAR files, processing errors
197
+
198
+
Metrics are integrated with Teal's existing observability stack.
199
+
200
+
## Performance
201
+
202
+
### Optimization Strategies
203
+
204
+
1. **Streaming processing**: Records are processed as they're discovered
205
+
2. **Batch database operations**: Multiple records can be inserted in batches
206
+
3. **Memory management**: Large CAR files are processed without loading entirely into memory
207
+
4. **Parallel processing**: Future enhancement for concurrent record processing
208
+
209
+
### Benchmarks
210
+
211
+
Performance testing should be conducted with:
212
+
- Small CAR files (< 1MB, ~100 records)
213
+
- Medium CAR files (1-50MB, ~10K records)
214
+
- Large CAR files (> 50MB, ~100K+ records)
215
+
216
+
## Future Enhancements
217
+
218
+
### Planned Features
219
+
- **Incremental imports**: Support for delta/since-based CAR fetching
220
+
- **Batch processing**: Queue-based processing for multiple CAR files
221
+
- **Validation**: Pre-import validation of CAR file integrity
222
+
- **Metrics**: Detailed import statistics and performance monitoring
223
+
224
+
### Integration Opportunities
225
+
- **Admin API**: Trigger imports via HTTP API
226
+
- **Scheduled imports**: Cron-based periodic imports from known users
227
+
- **Real-time sync**: Hybrid approach combining Jetstream + CAR imports
228
+
229
+
---
230
+
231
+
## Contributing
232
+
233
+
When working on the CAR import system:
234
+
235
+
1. **Test thoroughly**: Use both unit and integration tests
236
+
2. **Document changes**: Update this README for significant modifications
237
+
3. **Monitor performance**: Large CAR files can impact system performance
238
+
4. **Handle errors gracefully**: Network and parsing errors are expected
239
+
240
+
For questions about `atmst` integration or MST processing, refer to the library documentation or consider reaching out to the `atmst` maintainers.
+677
-421
services/cadet/src/ingestors/car/car_import.rs
+677
-421
services/cadet/src/ingestors/car/car_import.rs
···
1
use anyhow::{anyhow, Result};
2
use async_trait::async_trait;
3
-
use base64::{engine::general_purpose, Engine as _};
4
-
use chrono;
5
-
use cid::Cid;
6
-
use iroh_car::{CarHeader, CarReader};
7
-
use libipld::cbor::DagCborCodec;
8
-
use libipld::{Block, Cid as LibipldCid, Ipld};
9
-
use reqwest;
10
use rocketman::{ingestion::LexiconIngestor, types::event::Event};
11
use serde_json::Value;
12
use sqlx::PgPool;
13
-
use std::io::Cursor;
14
use tracing::{info, warn};
15
-
use url;
16
17
pub struct CarImportIngestor {
18
sql: PgPool,
19
}
20
21
impl CarImportIngestor {
22
pub fn new(sql: PgPool) -> Self {
23
Self { sql }
24
}
25
26
-
/// Process a CAR file from bytes
27
-
async fn process_car_data(&self, car_data: &[u8], import_id: &str) -> Result<()> {
28
-
info!("Starting CAR file processing for import {}", import_id);
29
30
-
let cursor = Cursor::new(car_data);
31
-
let mut reader = CarReader::new(cursor).await?;
32
33
-
// Read the header
34
-
let header = reader.header();
35
-
info!("CAR header: {} root CIDs", header.roots().len());
36
37
-
// Track import metadata
38
-
// self.store_import_metadata(import_id, header).await?;
39
40
-
// Process blocks
41
-
let mut block_count = 0;
42
-
while let Some((cid, block_data)) = reader.next_block().await? {
43
-
// Convert iroh-car CID to our CID type for processing
44
-
let our_cid: Cid = cid.to_string().parse()?;
45
-
self.process_car_block(&our_cid, &block_data, import_id)
46
-
.await?;
47
-
block_count += 1;
48
49
-
if block_count % 100 == 0 {
50
-
info!("Processed {} blocks for import {}", block_count, import_id);
51
}
52
}
53
54
info!(
55
-
"Completed CAR file processing: {} blocks for import {}",
56
-
block_count, import_id
57
);
58
-
// self.mark_import_complete(import_id, block_count).await?;
59
60
Ok(())
61
}
62
63
-
/// Process an individual IPLD block from the CAR file
64
-
async fn process_car_block(&self, cid: &Cid, block_data: &[u8], import_id: &str) -> Result<()> {
65
-
// Store the raw block first
66
-
// self.store_raw_block(cid, block_data, import_id).await?;
67
68
-
// Try to decode as IPLD and extract meaningful data
69
-
match self.decode_and_extract_data(cid, block_data).await {
70
-
Ok(Some(extracted_data)) => {
71
-
self.process_extracted_data(&extracted_data, cid, import_id)
72
-
.await?;
73
-
}
74
-
Ok(None) => {
75
-
// Block doesn't contain extractable data, just stored raw
76
-
}
77
-
Err(e) => {
78
-
warn!("Failed to decode block {}: {}", cid, e);
79
-
// Continue processing other blocks
80
}
81
}
82
83
-
Ok(())
84
}
85
86
-
/// Decode IPLD block and extract AT Protocol data if present
87
-
async fn decode_and_extract_data(
88
&self,
89
-
cid: &Cid,
90
-
block_data: &[u8],
91
-
) -> Result<Option<ExtractedData>> {
92
-
// Create IPLD block (convert CID types)
93
-
let libipld_cid: LibipldCid = cid.to_string().parse()?;
94
-
let block: Block<libipld::DefaultParams> = Block::new(libipld_cid, block_data.to_vec())?;
95
-
96
-
// Decode to IPLD (try to decode as DAG-CBOR, which is common in AT Protocol)
97
-
let ipld: Ipld = match block.decode::<DagCborCodec, Ipld>() {
98
-
Ok(ipld) => ipld,
99
-
Err(_) => {
100
-
// If DAG-CBOR fails, try as raw data
101
-
return Ok(None);
102
-
}
103
-
};
104
-
105
-
// Check if this looks like AT Protocol data
106
-
if let Ipld::Map(map) = &ipld {
107
-
// Look for AT Protocol patterns
108
-
if let Some(collection) = map.get("$type").and_then(|v| {
109
-
if let Ipld::String(s) = v {
110
-
Some(s.as_str())
111
-
} else {
112
-
None
113
}
114
-
}) {
115
-
return Ok(Some(ExtractedData {
116
-
collection: collection.to_string(),
117
-
data: ipld,
118
-
cid: cid.clone(),
119
-
}));
120
}
121
-
122
-
// Check for commit structures
123
-
if map.contains_key("ops") && map.contains_key("prev") {
124
-
return Ok(Some(ExtractedData {
125
-
collection: "commit".to_string(),
126
-
data: ipld,
127
-
cid: cid.clone(),
128
-
}));
129
}
130
}
131
-
132
-
Ok(None)
133
}
134
135
-
/// Process extracted AT Protocol data
136
-
async fn process_extracted_data(
137
&self,
138
-
data: &ExtractedData,
139
-
cid: &Cid,
140
-
import_id: &str,
141
) -> Result<()> {
142
-
match data.collection.as_str() {
143
"fm.teal.alpha.feed.play" => {
144
-
self.process_play_record(&data.data, cid, import_id).await?;
145
}
146
"fm.teal.alpha.actor.profile" => {
147
-
self.process_profile_record(&data.data, cid, import_id)
148
-
.await?;
149
}
150
"fm.teal.alpha.actor.status" => {
151
-
self.process_status_record(&data.data, cid, import_id)
152
-
.await?;
153
-
}
154
-
"commit" => {
155
-
self.process_commit_record(&data.data, cid, import_id)
156
-
.await?;
157
}
158
_ => {
159
-
info!("Unhandled collection type: {}", data.collection);
160
}
161
}
162
163
-
Ok(())
164
}
165
166
-
/// Process a Teal play record from IPLD data
167
-
async fn process_play_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> {
168
-
// Convert IPLD to JSON value for processing by existing ingestors
169
-
let json_value = ipld_to_json(ipld)?;
170
171
-
// Delegate to existing play ingestor logic
172
-
if let Ok(play_record) =
173
-
serde_json::from_value::<types::fm::teal::alpha::feed::play::RecordData>(json_value)
174
{
175
-
info!("Importing play record from CAR: {}", play_record.track_name);
176
177
-
// Use existing play ingestor for consistency
178
-
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
179
180
-
// Create a synthetic AT URI for the imported record
181
-
let synthetic_did = format!("car-import:{}", import_id);
182
-
let rkey = cid.to_string();
183
-
let uri = super::super::teal::assemble_at_uri(
184
-
&synthetic_did,
185
-
"fm.teal.alpha.feed.play",
186
-
&rkey,
187
-
);
188
189
-
// Store using existing logic
190
-
play_ingestor
191
-
.insert_play(&play_record, &uri, &cid.to_string(), &synthetic_did, &rkey)
192
-
.await?;
193
194
-
// Track the extracted record
195
-
// self.store_extracted_record(import_id, cid, "fm.teal.alpha.feed.play", Some(&uri)).await?;
196
}
197
198
-
Ok(())
199
}
200
201
-
/// Process a Teal profile record from IPLD data
202
-
async fn process_profile_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> {
203
-
let json_value = ipld_to_json(ipld)?;
204
205
-
if let Ok(profile_record) =
206
-
serde_json::from_value::<types::fm::teal::alpha::actor::profile::RecordData>(json_value)
207
-
{
208
-
info!(
209
-
"Importing profile record from CAR: {:?}",
210
-
profile_record.display_name
211
-
);
212
213
-
// For now, just log until we have public methods on profile ingestor
214
-
info!(
215
-
"Would store profile record from CAR import {} with CID {}",
216
-
import_id, cid
217
-
);
218
219
-
// Track the extracted record
220
-
// self.store_extracted_record(import_id, cid, "fm.teal.alpha.actor.profile", None).await?;
221
-
}
222
223
-
Ok(())
224
}
225
226
-
/// Process a Teal status record from IPLD data
227
-
async fn process_status_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> {
228
-
let json_value = ipld_to_json(ipld)?;
229
230
-
if let Ok(_status_record) =
231
-
serde_json::from_value::<types::fm::teal::alpha::actor::status::RecordData>(json_value)
232
-
{
233
-
info!("Importing status record from CAR");
234
235
-
// For now, just log until we have public methods on status ingestor
236
-
info!(
237
-
"Would store status record from CAR import {} with CID {}",
238
-
import_id, cid
239
-
);
240
241
-
// Track the extracted record
242
-
// self.store_extracted_record(import_id, cid, "fm.teal.alpha.actor.status", None).await?;
243
}
244
245
-
Ok(())
246
}
247
248
-
/// Process a commit record from IPLD data
249
-
async fn process_commit_record(
250
-
&self,
251
-
_ipld: &Ipld,
252
-
_cid: &Cid,
253
-
_import_id: &str,
254
-
) -> Result<()> {
255
-
info!("Processing commit record from CAR import");
256
257
-
// Store commit metadata for tracking
258
-
// self.store_commit_metadata(ipld, cid, import_id).await?;
259
260
-
Ok(())
261
}
262
263
-
/// Store CAR import metadata
264
-
async fn store_import_metadata(&self, _import_id: &str, _header: &CarHeader) -> Result<()> {
265
-
// TODO: Implement when database tables are ready
266
-
Ok(())
267
}
268
269
-
/// Mark import as complete
270
-
async fn mark_import_complete(&self, _import_id: &str, _block_count: i32) -> Result<()> {
271
-
// TODO: Implement when database tables are ready
272
Ok(())
273
}
274
275
-
/// Store raw IPLD block
276
-
async fn store_raw_block(
277
-
&self,
278
-
_cid: &Cid,
279
-
_block_data: &[u8],
280
-
_import_id: &str,
281
-
) -> Result<()> {
282
-
// TODO: Implement when database tables are ready
283
-
Ok(())
284
}
285
286
-
/// Store commit metadata
287
-
async fn store_commit_metadata(&self, _ipld: &Ipld, _cid: &Cid, import_id: &str) -> Result<()> {
288
-
info!("Would store commit metadata from CAR import {}", import_id);
289
-
Ok(())
290
}
291
292
-
/// Store extracted record tracking
293
-
async fn store_extracted_record(
294
-
&self,
295
-
_import_id: &str,
296
-
_cid: &Cid,
297
-
_collection: &str,
298
-
_record_uri: Option<&str>,
299
-
) -> Result<()> {
300
-
// TODO: Implement when database tables are ready
301
-
Ok(())
302
}
303
304
-
/// Fetch and process CAR file for a given identity (handle or DID)
305
-
pub async fn fetch_and_process_identity_car(&self, identity: &str) -> Result<String> {
306
-
info!(
307
-
"Starting CAR fetch and processing for identity: {}",
308
-
identity
309
-
);
310
311
-
// Resolve identity to DID and PDS
312
-
let (user_did, pds_host) = self.resolve_user_to_pds(identity).await?;
313
-
info!(
314
-
"Resolved {} to DID {} on PDS {}",
315
-
identity, user_did, pds_host
316
-
);
317
318
-
// Fetch CAR file from PDS
319
-
let car_data = self.fetch_car_from_pds(&pds_host, &user_did, None).await?;
320
-
info!(
321
-
"Successfully fetched CAR file for {} ({} bytes)",
322
-
user_did,
323
-
car_data.len()
324
-
);
325
326
-
// Generate import ID
327
-
let import_id = format!(
328
-
"pds-{}-{}",
329
-
user_did.replace(":", "-"),
330
-
chrono::Utc::now().timestamp()
331
-
);
332
333
-
// Process through existing pipeline
334
-
self.process_car_data(&car_data, &import_id).await?;
335
336
-
info!("โ
CAR import completed successfully for {}", identity);
337
-
Ok(import_id)
338
-
}
339
340
-
/// Resolve a user identifier (DID or handle) to their DID and PDS host
341
-
async fn resolve_user_to_pds(&self, user_identifier: &str) -> Result<(String, String)> {
342
-
if user_identifier.starts_with("did:") {
343
-
// User provided a DID directly, resolve to PDS
344
-
let pds_host = self.resolve_did_to_pds(user_identifier).await?;
345
-
Ok((user_identifier.to_string(), pds_host))
346
-
} else {
347
-
// User provided a handle, resolve to DID then PDS
348
-
let user_did = self.resolve_handle_to_did(user_identifier).await?;
349
-
let pds_host = self.resolve_did_to_pds(&user_did).await?;
350
-
Ok((user_did, pds_host))
351
-
}
352
}
353
354
-
/// Resolve a handle to a DID using com.atproto.identity.resolveHandle
355
-
async fn resolve_handle_to_did(&self, handle: &str) -> Result<String> {
356
-
let url = format!(
357
-
"https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}",
358
-
handle
359
);
360
361
-
let response = reqwest::get(&url).await?;
362
-
if !response.status().is_success() {
363
-
return Err(anyhow!(
364
-
"Failed to resolve handle {}: {}",
365
-
handle,
366
-
response.status()
367
-
));
368
-
}
369
370
-
let json: serde_json::Value = response.json().await?;
371
-
let did = json["did"]
372
-
.as_str()
373
-
.ok_or_else(|| anyhow!("No DID found in response for handle {}", handle))?;
374
375
-
Ok(did.to_string())
376
-
}
377
378
-
/// Resolve a DID to their PDS host using DID document
379
-
async fn resolve_did_to_pds(&self, did: &str) -> Result<String> {
380
-
// For DID:plc, use the PLC directory
381
-
if did.starts_with("did:plc:") {
382
-
let url = format!("https://plc.directory/{}", did);
383
384
-
let response = reqwest::get(&url).await?;
385
-
if !response.status().is_success() {
386
-
return Err(anyhow!(
387
-
"Failed to resolve DID {}: {}",
388
-
did,
389
-
response.status()
390
-
));
391
-
}
392
393
-
let doc: serde_json::Value = response.json().await?;
394
395
-
// Find the PDS service endpoint
396
-
if let Some(services) = doc["service"].as_array() {
397
-
for service in services {
398
-
if service["id"].as_str() == Some("#atproto_pds") {
399
-
if let Some(endpoint) = service["serviceEndpoint"].as_str() {
400
-
// Extract hostname from URL
401
-
let parsed_url = url::Url::parse(endpoint)?;
402
-
let host = parsed_url
403
-
.host_str()
404
-
.ok_or_else(|| anyhow!("Invalid PDS endpoint URL: {}", endpoint))?;
405
-
return Ok(host.to_string());
406
-
}
407
-
}
408
-
}
409
-
}
410
411
-
Err(anyhow!("No PDS service found in DID document for {}", did))
412
} else {
413
-
Err(anyhow!("Unsupported DID method: {}", did))
414
}
415
}
416
417
-
/// Fetch CAR file from PDS using com.atproto.sync.getRepo
418
-
async fn fetch_car_from_pds(
419
-
&self,
420
-
pds_host: &str,
421
-
did: &str,
422
-
since: Option<&str>,
423
-
) -> Result<Vec<u8>> {
424
-
let mut url = format!(
425
-
"https://{}/xrpc/com.atproto.sync.getRepo?did={}",
426
-
pds_host, did
427
-
);
428
-
429
-
if let Some(since_rev) = since {
430
-
url.push_str(&format!("&since={}", since_rev));
431
}
432
433
-
info!("Fetching CAR file from: {}", url);
434
435
-
let response = reqwest::get(&url).await?;
436
-
if !response.status().is_success() {
437
-
return Err(anyhow!(
438
-
"Failed to fetch CAR from PDS {}: {}",
439
-
pds_host,
440
-
response.status()
441
-
));
442
-
}
443
444
-
// Verify content type
445
-
let content_type = response
446
-
.headers()
447
-
.get("content-type")
448
-
.and_then(|h| h.to_str().ok())
449
-
.unwrap_or("");
450
451
-
if !content_type.contains("application/vnd.ipld.car") {
452
-
return Err(anyhow!("Unexpected content type: {}", content_type));
453
}
454
455
-
let car_data = response.bytes().await?;
456
-
Ok(car_data.to_vec())
457
}
458
-
}
459
460
-
#[async_trait]
461
-
impl LexiconIngestor for CarImportIngestor {
462
-
async fn ingest(&self, message: Event<Value>) -> Result<()> {
463
-
// For CAR imports, we expect the message to contain CAR file data
464
-
// This could be a file path, URL, or base64 encoded data
465
466
-
if let Some(commit) = &message.commit {
467
-
if let Some(record) = &commit.record {
468
-
// Check if this is a CAR import request
469
-
if let Some(car_data_field) = record.get("carData") {
470
-
let import_id = format!("{}:{}", message.did, commit.rkey);
471
472
-
match car_data_field {
473
-
Value::String(base64_data) => {
474
-
// Decode base64 CAR data
475
-
if let Ok(car_bytes) = general_purpose::STANDARD.decode(base64_data) {
476
-
self.process_car_data(&car_bytes, &import_id).await?;
477
-
} else {
478
-
return Err(anyhow!("Invalid base64 CAR data"));
479
-
}
480
-
}
481
-
Value::Object(obj) => {
482
-
// Handle different CAR data formats (URL, file path, etc.)
483
-
if let Some(Value::String(url)) = obj.get("url") {
484
-
// Download and process CAR from URL
485
-
let car_bytes = self.download_car_file(url).await?;
486
-
self.process_car_data(&car_bytes, &import_id).await?;
487
-
}
488
-
}
489
-
_ => {
490
-
return Err(anyhow!("Unsupported CAR data format"));
491
-
}
492
-
}
493
-
} else {
494
-
return Err(anyhow!("No CAR data found in record"));
495
}
496
}
497
}
498
499
Ok(())
500
}
501
-
}
502
503
-
impl CarImportIngestor {
504
-
/// Download CAR file from URL
505
-
async fn download_car_file(&self, url: &str) -> Result<Vec<u8>> {
506
-
let response = reqwest::get(url).await?;
507
-
let bytes = response.bytes().await?;
508
-
Ok(bytes.to_vec())
509
-
}
510
-
}
511
512
-
/// Helper struct for extracted AT Protocol data
513
-
#[derive(Debug)]
514
-
struct ExtractedData {
515
-
collection: String,
516
-
data: Ipld,
517
-
cid: Cid,
518
-
}
519
520
-
/// Convert IPLD to JSON Value for compatibility with existing ingestors
521
-
fn ipld_to_json(ipld: &Ipld) -> Result<Value> {
522
-
match ipld {
523
-
Ipld::Null => Ok(Value::Null),
524
-
Ipld::Bool(b) => Ok(Value::Bool(*b)),
525
-
Ipld::Integer(i) => {
526
-
// Convert i128 to i64 for JSON compatibility
527
-
if let Ok(i64_val) = i64::try_from(*i) {
528
-
Ok(Value::Number(i64_val.into()))
529
-
} else {
530
-
// Fall back to string representation for very large integers
531
-
Ok(Value::String(i.to_string()))
532
-
}
533
-
}
534
-
Ipld::Float(f) => {
535
-
if let Some(num) = serde_json::Number::from_f64(*f) {
536
-
Ok(Value::Number(num))
537
-
} else {
538
-
Err(anyhow!("Invalid float value"))
539
}
540
-
}
541
-
Ipld::String(s) => Ok(Value::String(s.clone())),
542
-
Ipld::Bytes(b) => {
543
-
// Convert bytes to base64 string
544
-
Ok(Value::String(general_purpose::STANDARD.encode(b)))
545
-
}
546
-
Ipld::List(list) => {
547
-
let json_array: Result<Vec<Value>> = list.iter().map(ipld_to_json).collect();
548
-
Ok(Value::Array(json_array?))
549
-
}
550
-
Ipld::Map(map) => {
551
-
let mut json_map = serde_json::Map::new();
552
-
for (key, value) in map {
553
-
json_map.insert(key.clone(), ipld_to_json(value)?);
554
}
555
-
Ok(Value::Object(json_map))
556
}
557
-
Ipld::Link(cid) => {
558
-
// Convert CID to string representation
559
-
Ok(Value::String(cid.to_string()))
560
-
}
561
}
562
}
···
1
+
//! CAR (Content Addressable aRchive) Import Ingestor using atmst
2
+
//!
3
+
//! This module handles importing Teal records from CAR files using the atmst library,
4
+
//! which provides proper AT Protocol-style Merkle Search Tree handling. The CAR import process:
5
+
//!
6
+
//! 1. Receives CAR data via the LexiconIngestor interface (base64 encoded or URL)
7
+
//! 2. Uses atmst::CarImporter to parse the CAR file and extract MST structure
8
+
//! 3. Converts the CarImporter to an MST for proper tree traversal
9
+
//! 4. Iterates through MST nodes to find Teal record types (play, profile, status)
10
+
//! 5. Delegates to existing Teal ingestors using the actual DID and proper rkey
11
+
//!
12
+
//! ## Usage Example
13
+
//!
14
+
//! ```rust,ignore
15
+
//! // CAR data can be provided in a record like:
16
+
//! {
17
+
//! "carData": "base64-encoded-car-file-here"
18
+
//! }
19
+
//!
20
+
//! // Or as a URL reference:
21
+
//! {
22
+
//! "carData": {
23
+
//! "url": "https://example.com/my-archive.car"
24
+
//! }
25
+
//! }
26
+
//! ```
27
+
//!
28
+
//! The ingestor will automatically detect record types and store them using the
29
+
//! same logic as real-time Jetstream ingestion, ensuring data consistency.
30
+
//! All imported records will be attributed to the DID that initiated the import
31
+
//! and use the original rkey from the AT Protocol MST structure.
32
+
33
+
use crate::ingestors::car::jobs::{queue_keys, CarImportJob};
34
+
use crate::redis_client::RedisClient;
35
use anyhow::{anyhow, Result};
36
use async_trait::async_trait;
37
+
use atmst::{mst::Mst, Bytes, CarImporter};
38
+
use base64::Engine;
39
+
use futures::StreamExt;
40
+
use redis::AsyncCommands;
41
use rocketman::{ingestion::LexiconIngestor, types::event::Event};
42
use serde_json::Value;
43
use sqlx::PgPool;
44
use tracing::{info, warn};
45
46
+
/// Helper struct for extracted records
47
+
#[derive(Debug)]
48
+
pub struct ExtractedRecord {
49
+
pub collection: String,
50
+
pub rkey: String,
51
+
pub data: serde_json::Value,
52
+
}
53
+
54
+
/// CAR Import Ingestor handles importing Teal records from CAR files using atmst
55
pub struct CarImportIngestor {
56
sql: PgPool,
57
}
58
59
impl CarImportIngestor {
60
+
/// Create a new CAR import ingestor with database connection
61
pub fn new(sql: PgPool) -> Self {
62
Self { sql }
63
}
64
65
+
/// Helper to get a Redis connection for job queueing
66
+
pub async fn get_redis_connection(&self) -> Result<redis::aio::MultiplexedConnection> {
67
+
let redis_url =
68
+
std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
69
+
let client = RedisClient::new(&redis_url)?;
70
+
client
71
+
.get_connection()
72
+
.await
73
+
.map_err(|e| anyhow!("Redis connection error: {}", e))
74
+
}
75
+
76
+
/// Process CAR file data using atmst library and extract Teal records
77
+
async fn process_car_data(&self, car_data: &[u8], import_id: &str, did: &str) -> Result<()> {
78
+
info!(
79
+
"Starting CAR file processing with atmst for import {} (DID: {})",
80
+
import_id, did
81
+
);
82
+
83
+
// Convert to Bytes for atmst
84
+
let car_bytes: Bytes = Bytes::from(car_data.to_vec());
85
+
86
+
// Create CarImporter and import the CAR data
87
+
let mut car_importer = CarImporter::new();
88
+
car_importer
89
+
.import_from_bytes(car_bytes.clone())
90
+
.await
91
+
.map_err(|e| anyhow!("Failed to import CAR with atmst: {}", e))?;
92
93
+
info!(
94
+
"CAR imported successfully. Root CIDs: {:?}, Total blocks: {}",
95
+
car_importer.roots(),
96
+
car_importer.len()
97
+
);
98
+
99
+
// Convert CarImporter to MST for proper tree traversal
100
+
let mst = Mst::from_car_importer(car_importer)
101
+
.await
102
+
.map_err(|e| anyhow!("Failed to convert CAR to MST: {}", e))?;
103
104
+
info!("MST conversion successful, starting record extraction");
105
+
106
+
// Create a new CarImporter for data access since the previous one was consumed
107
+
let mut data_importer = CarImporter::new();
108
+
data_importer
109
+
.import_from_bytes(car_bytes)
110
+
.await
111
+
.map_err(|e| anyhow!("Failed to re-import CAR for data access: {}", e))?;
112
113
+
// Extract all records from the MST
114
+
let records = self
115
+
.extract_records_from_mst(&mst, &data_importer, did)
116
+
.await?;
117
118
+
info!("Extracted {} records from MST", records.len());
119
120
+
// Process each record through the appropriate ingestor
121
+
let mut processed_count = 0;
122
+
for record in records {
123
+
match self.process_extracted_record(&record, import_id, did).await {
124
+
Ok(()) => {
125
+
processed_count += 1;
126
+
if processed_count % 10 == 0 {
127
+
info!("Processed {} records so far", processed_count);
128
+
}
129
+
}
130
+
Err(e) => {
131
+
warn!("Failed to process record {}: {}", record.rkey, e);
132
+
// Continue processing other records
133
+
}
134
}
135
}
136
137
info!(
138
+
"Completed CAR file processing: {} records processed for import {}",
139
+
processed_count, import_id
140
);
141
142
Ok(())
143
}
144
145
+
/// Extract all Teal records from the MST
146
+
async fn extract_records_from_mst(
147
+
&self,
148
+
mst: &Mst,
149
+
car_importer: &CarImporter,
150
+
_did: &str,
151
+
) -> Result<Vec<ExtractedRecord>> {
152
+
let mut records = Vec::new();
153
+
154
+
// Use the MST iterator to traverse all entries
155
+
let mut stream = mst.iter().into_stream();
156
157
+
while let Some(result) = stream.next().await {
158
+
match result {
159
+
Ok((key, record_cid)) => {
160
+
// Check if this is a Teal record based on the key pattern
161
+
if self.is_teal_record_key(&key) {
162
+
info!("๐ต Found Teal record: {} -> {}", key, record_cid);
163
+
if let Some((collection, rkey)) = self.parse_teal_key(&key) {
164
+
info!(" Collection: {}, rkey: {}", collection, rkey);
165
+
// Get the actual record data using the CID
166
+
match self.get_record_data(&record_cid, car_importer).await {
167
+
Ok(Some(data)) => {
168
+
info!(" โ
Successfully got record data for {}", record_cid);
169
+
records.push(ExtractedRecord {
170
+
collection,
171
+
rkey,
172
+
data,
173
+
});
174
+
}
175
+
Ok(None) => {
176
+
warn!(" โ No data found for record CID: {}", record_cid);
177
+
}
178
+
Err(e) => {
179
+
warn!(
180
+
" โ Failed to get record data for {}: {}",
181
+
record_cid, e
182
+
);
183
+
}
184
+
}
185
+
} else {
186
+
warn!(" โ Failed to parse Teal key: {}", key);
187
+
}
188
+
}
189
+
}
190
+
Err(e) => {
191
+
warn!("Error iterating MST: {}", e);
192
+
// Continue with other entries
193
+
}
194
}
195
}
196
197
+
Ok(records)
198
}
199
200
+
/// Get record data from the CAR importer using a CID
201
+
async fn get_record_data(
202
&self,
203
+
cid: &atmst::Cid,
204
+
car_importer: &CarImporter,
205
+
) -> Result<Option<Value>> {
206
+
// Try to decode the block as CBOR IPLD directly with atmst::Cid
207
+
info!("๐ Attempting to decode CBOR for CID: {}", cid);
208
+
match car_importer.decode_cbor(cid) {
209
+
Ok(ipld) => {
210
+
info!(" โ
Successfully decoded CBOR for CID: {}", cid);
211
+
// Convert IPLD to JSON for processing by existing ingestors
212
+
match self.ipld_to_json(&ipld) {
213
+
Ok(json) => {
214
+
info!(" โ
Successfully converted IPLD to JSON for CID: {}", cid);
215
+
Ok(Some(json))
216
+
}
217
+
Err(e) => {
218
+
warn!(
219
+
" โ Failed to convert IPLD to JSON for CID {}: {}",
220
+
cid, e
221
+
);
222
+
Ok(None)
223
+
}
224
}
225
}
226
+
Err(e) => {
227
+
warn!(" โ Failed to decode CBOR for CID {}: {}", cid, e);
228
+
Ok(None)
229
}
230
}
231
}
232
233
+
/// Process a single extracted record through the appropriate ingestor
234
+
async fn process_extracted_record(
235
&self,
236
+
record: &ExtractedRecord,
237
+
_import_id: &str,
238
+
did: &str,
239
) -> Result<()> {
240
+
info!(
241
+
"Processing {} record with rkey: {}",
242
+
record.collection, record.rkey
243
+
);
244
+
245
+
info!(
246
+
"๐ Processing {} record: {}",
247
+
record.collection, record.rkey
248
+
);
249
+
match record.collection.as_str() {
250
"fm.teal.alpha.feed.play" => {
251
+
info!(" ๐ Processing play record...");
252
+
let result = self
253
+
.process_play_record(&record.data, did, &record.rkey)
254
+
.await;
255
+
if result.is_ok() {
256
+
info!(" โ
Successfully processed play record");
257
+
} else {
258
+
warn!(" โ Failed to process play record: {:?}", result);
259
+
}
260
+
result
261
}
262
"fm.teal.alpha.actor.profile" => {
263
+
info!(" ๐ค Processing profile record...");
264
+
let result = self
265
+
.process_profile_record(&record.data, did, &record.rkey)
266
+
.await;
267
+
if result.is_ok() {
268
+
info!(" โ
Successfully processed profile record");
269
+
} else {
270
+
warn!(" โ Failed to process profile record: {:?}", result);
271
+
}
272
+
result
273
}
274
"fm.teal.alpha.actor.status" => {
275
+
info!(" ๐ข Processing status record...");
276
+
let result = self
277
+
.process_status_record(&record.data, did, &record.rkey)
278
+
.await;
279
+
if result.is_ok() {
280
+
info!(" โ
Successfully processed status record");
281
+
} else {
282
+
warn!(" โ Failed to process status record: {:?}", result);
283
+
}
284
+
result
285
}
286
_ => {
287
+
warn!("โ Unknown Teal collection: {}", record.collection);
288
+
Ok(())
289
}
290
}
291
+
}
292
293
+
/// Check if a key represents a Teal record
294
+
fn is_teal_record_key(&self, key: &str) -> bool {
295
+
key.starts_with("fm.teal.alpha.") && key.contains("/")
296
}
297
298
+
/// Parse a Teal MST key to extract collection and rkey
299
+
fn parse_teal_key(&self, key: &str) -> Option<(String, String)> {
300
+
if let Some(slash_pos) = key.rfind('/') {
301
+
let collection = key[..slash_pos].to_string();
302
+
let rkey = key[slash_pos + 1..].to_string();
303
+
Some((collection, rkey))
304
+
} else {
305
+
None
306
+
}
307
+
}
308
309
+
/// Process a play record using the existing PlayIngestor
310
+
async fn process_play_record(&self, data: &Value, did: &str, rkey: &str) -> Result<()> {
311
+
match serde_json::from_value::<types::fm::teal::alpha::feed::play::RecordData>(data.clone())
312
{
313
+
Ok(play_record) => {
314
+
let play_ingestor =
315
+
super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
316
+
let uri = super::super::teal::assemble_at_uri(did, "fm.teal.alpha.feed.play", rkey);
317
318
+
play_ingestor
319
+
.insert_play(
320
+
&play_record,
321
+
&uri,
322
+
&format!("car-import-{}", uuid::Uuid::new_v4()),
323
+
did,
324
+
rkey,
325
+
)
326
+
.await?;
327
328
+
info!(
329
+
"Successfully stored play record: {} by {:?}",
330
+
play_record.track_name, play_record.artist_names
331
+
);
332
+
Ok(())
333
+
}
334
+
Err(e) => {
335
+
warn!("Failed to deserialize play record data: {}", e);
336
+
Err(anyhow!("Invalid play record format: {}", e))
337
+
}
338
+
}
339
+
}
340
341
+
/// Process a profile record using the existing ActorProfileIngestor
342
+
async fn process_profile_record(&self, data: &Value, did: &str, _rkey: &str) -> Result<()> {
343
+
match serde_json::from_value::<types::fm::teal::alpha::actor::profile::RecordData>(
344
+
data.clone(),
345
+
) {
346
+
Ok(profile_record) => {
347
+
let profile_ingestor =
348
+
super::super::teal::actor_profile::ActorProfileIngestor::new(self.sql.clone());
349
+
let did_typed = atrium_api::types::string::Did::new(did.to_string())
350
+
.map_err(|e| anyhow!("Failed to create Did: {}", e))?;
351
352
+
profile_ingestor
353
+
.insert_profile(did_typed, &profile_record)
354
+
.await?;
355
+
356
+
info!(
357
+
"Successfully stored profile record: {:?}",
358
+
profile_record.display_name
359
+
);
360
+
Ok(())
361
+
}
362
+
Err(e) => {
363
+
warn!("Failed to deserialize profile record data: {}", e);
364
+
Err(anyhow!("Invalid profile record format: {}", e))
365
+
}
366
}
367
+
}
368
369
+
/// Process a status record using the existing ActorStatusIngestor
370
+
async fn process_status_record(&self, data: &Value, did: &str, rkey: &str) -> Result<()> {
371
+
match serde_json::from_value::<types::fm::teal::alpha::actor::status::RecordData>(
372
+
data.clone(),
373
+
) {
374
+
Ok(status_record) => {
375
+
let status_ingestor =
376
+
super::super::teal::actor_status::ActorStatusIngestor::new(self.sql.clone());
377
+
let did_typed = atrium_api::types::string::Did::new(did.to_string())
378
+
.map_err(|e| anyhow!("Failed to create Did: {}", e))?;
379
+
380
+
status_ingestor
381
+
.insert_status(
382
+
did_typed,
383
+
rkey,
384
+
&format!("car-import-{}", uuid::Uuid::new_v4()),
385
+
&status_record,
386
+
)
387
+
.await?;
388
+
389
+
info!("Successfully stored status record from CAR import");
390
+
Ok(())
391
+
}
392
+
Err(e) => {
393
+
warn!("Failed to deserialize status record data: {}", e);
394
+
Err(anyhow!("Invalid status record format: {}", e))
395
+
}
396
+
}
397
}
398
399
+
/// Fetch and process a CAR file from a PDS for a given identity
400
+
pub async fn fetch_and_process_identity_car(&self, handle_or_did: &str) -> Result<String> {
401
+
info!("Fetching CAR file for identity: {}", handle_or_did);
402
+
403
+
// Resolve to DID if needed
404
+
let did = if handle_or_did.starts_with("did:") {
405
+
handle_or_did.to_string()
406
+
} else {
407
+
self.resolve_handle_to_did(handle_or_did).await?
408
+
};
409
+
410
+
// Resolve DID to PDS
411
+
let pds_url = self.resolve_did_to_pds(&did).await?;
412
+
info!("Resolved {} to PDS: {}", did, pds_url);
413
414
+
// Fetch CAR file
415
+
let car_data = self.fetch_car_from_pds(&pds_url, &did).await?;
416
417
+
// Generate import ID
418
+
let import_id = uuid::Uuid::new_v4().to_string();
419
420
+
// Process the CAR data
421
+
self.process_car_data(&car_data, &import_id, &did).await?;
422
423
+
Ok(import_id)
424
}
425
426
+
/// Resolve handle to DID
427
+
async fn resolve_handle_to_did(&self, handle: &str) -> Result<String> {
428
+
let url = format!(
429
+
"https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}",
430
+
handle
431
+
);
432
+
let response: Value = reqwest::get(&url).await?.json().await?;
433
434
+
response["did"]
435
+
.as_str()
436
+
.map(|s| s.to_string())
437
+
.ok_or_else(|| anyhow!("Failed to resolve handle to DID"))
438
+
}
439
440
+
/// Resolve DID to PDS URL
441
+
async fn resolve_did_to_pds(&self, did: &str) -> Result<String> {
442
+
let url = format!("https://plc.directory/{}", did);
443
+
let response: Value = reqwest::get(&url).await?.json().await?;
444
445
+
if let Some(services) = response["service"].as_array() {
446
+
for service in services {
447
+
if service["id"] == "#atproto_pds" {
448
+
if let Some(endpoint) = service["serviceEndpoint"].as_str() {
449
+
return Ok(endpoint.to_string());
450
+
}
451
+
}
452
+
}
453
}
454
455
+
Err(anyhow!("Could not resolve PDS for DID: {}", did))
456
}
457
458
+
/// Fetch CAR file from PDS
459
+
async fn fetch_car_from_pds(&self, pds_url: &str, did: &str) -> Result<Vec<u8>> {
460
+
let url = format!("{}/xrpc/com.atproto.sync.getRepo?did={}", pds_url, did);
461
+
let response = reqwest::get(&url).await?;
462
463
+
if !response.status().is_success() {
464
+
return Err(anyhow!(
465
+
"Failed to fetch CAR file: HTTP {}",
466
+
response.status()
467
+
));
468
+
}
469
+
470
+
let car_data = response.bytes().await?.to_vec();
471
+
info!("Fetched CAR file: {} bytes", car_data.len());
472
473
+
Ok(car_data)
474
}
475
476
+
/// Helper: Convert IPLD to JSON
477
+
#[allow(clippy::only_used_in_recursion)]
478
+
fn ipld_to_json(&self, ipld: &atmst::Ipld) -> Result<Value> {
479
+
use atmst::Ipld;
480
+
481
+
match ipld {
482
+
Ipld::Null => Ok(Value::Null),
483
+
Ipld::Bool(b) => Ok(Value::Bool(*b)),
484
+
Ipld::Integer(i) => {
485
+
if let Ok(i64_val) = i64::try_from(*i) {
486
+
Ok(Value::Number(i64_val.into()))
487
+
} else {
488
+
Ok(Value::String(i.to_string()))
489
+
}
490
+
}
491
+
Ipld::Float(f) => {
492
+
if let Some(num) = serde_json::Number::from_f64(*f) {
493
+
Ok(Value::Number(num))
494
+
} else {
495
+
Err(anyhow!("Invalid float value"))
496
+
}
497
+
}
498
+
Ipld::String(s) => Ok(Value::String(s.clone())),
499
+
Ipld::Bytes(b) => Ok(Value::String(
500
+
base64::engine::general_purpose::STANDARD.encode(b),
501
+
)),
502
+
Ipld::List(list) => {
503
+
let json_array: Result<Vec<Value>> =
504
+
list.iter().map(|v| self.ipld_to_json(v)).collect();
505
+
Ok(Value::Array(json_array?))
506
+
}
507
+
Ipld::Map(map) => {
508
+
let mut json_map = serde_json::Map::new();
509
+
for (key, value) in map {
510
+
json_map.insert(key.clone(), self.ipld_to_json(value)?);
511
+
}
512
+
Ok(Value::Object(json_map))
513
+
}
514
+
Ipld::Link(cid) => Ok(Value::String(cid.to_string())),
515
+
}
516
}
517
+
}
518
519
+
#[async_trait]
520
+
impl LexiconIngestor for CarImportIngestor {
521
+
async fn ingest(&self, message: Event<Value>) -> Result<()> {
522
+
let commit = message
523
+
.commit
524
+
.as_ref()
525
+
.ok_or_else(|| anyhow!("CarImportIngestor requires a commit event"))?;
526
+
527
+
let record = commit
528
+
.record
529
+
.as_ref()
530
+
.ok_or_else(|| anyhow!("CarImportIngestor requires a record in the commit"))?;
531
+
532
+
// Enqueue CAR import job into Redis
533
+
let job = CarImportJob {
534
+
request_id: uuid::Uuid::new_v4(),
535
+
identity: record
536
+
.get("identity")
537
+
.and_then(|v| v.as_str())
538
+
.ok_or_else(|| anyhow!("Missing identity in record"))?
539
+
.to_string(),
540
+
since: None,
541
+
created_at: chrono::Utc::now(),
542
+
description: None,
543
+
};
544
+
let job_payload = serde_json::to_string(&job)?;
545
+
let mut conn = self.get_redis_connection().await?;
546
+
// Specify the expected return type to avoid FromRedisValue fallback issues in edition 2024
547
+
let _: () = conn.lpush(queue_keys::CAR_IMPORT_JOBS, job_payload).await?;
548
+
tracing::info!("Enqueued CAR import job: {}", job.request_id);
549
+
550
Ok(())
551
}
552
+
}
553
554
+
#[allow(dead_code)]
555
+
impl CarImportIngestor {
556
+
/// Download CAR file from URL
557
+
async fn download_car_file(&self, url: &str) -> Result<Vec<u8>> {
558
+
let response = reqwest::get(url).await?;
559
+
Ok(response.bytes().await?.to_vec())
560
}
561
562
+
/// Import CAR data from bytes (public interface)
563
+
pub async fn import_car_bytes(&self, car_data: &[u8], did: &str) -> Result<String> {
564
+
let import_id = uuid::Uuid::new_v4().to_string();
565
+
self.process_car_data(car_data, &import_id, did).await?;
566
+
Ok(import_id)
567
}
568
569
+
/// Consolidate synthetic artists with MusicBrainz artists
570
+
pub async fn consolidate_synthetic_artists(&self, min_confidence: f64) -> Result<usize> {
571
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
572
+
play_ingestor
573
+
.consolidate_synthetic_artists(min_confidence)
574
+
.await
575
}
576
577
+
/// Consolidate duplicate releases
578
+
pub async fn consolidate_duplicate_releases(&self, min_confidence: f64) -> Result<usize> {
579
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
580
+
play_ingestor
581
+
.consolidate_duplicate_releases(min_confidence)
582
+
.await
583
+
}
584
585
+
/// Consolidate duplicate recordings
586
+
pub async fn consolidate_duplicate_recordings(&self, min_confidence: f64) -> Result<usize> {
587
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
588
+
play_ingestor
589
+
.consolidate_duplicate_recordings(min_confidence)
590
+
.await
591
+
}
592
593
+
/// Preview consolidation candidates before running consolidation
594
+
pub async fn preview_consolidation_candidates(&self, min_confidence: f64) -> Result<()> {
595
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
596
+
play_ingestor
597
+
.preview_consolidation_candidates(min_confidence)
598
+
.await
599
+
}
600
601
+
/// Run full batch consolidation for all entity types
602
+
pub async fn run_full_consolidation(&self) -> Result<()> {
603
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
604
+
play_ingestor.run_full_consolidation().await
605
+
}
606
+
}
607
608
+
// Removed unused helper struct for extracted records.
609
610
+
#[cfg(test)]
611
+
mod tests {
612
+
use super::*;
613
+
use atmst::{CarBuilder, Ipld};
614
+
use std::collections::BTreeMap;
615
616
+
fn create_mock_teal_play_record() -> Ipld {
617
+
let mut record = BTreeMap::new();
618
+
record.insert(
619
+
"$type".to_string(),
620
+
Ipld::String("fm.teal.alpha.feed.play".to_string()),
621
+
);
622
+
record.insert(
623
+
"track_name".to_string(),
624
+
Ipld::String("Test Song".to_string()),
625
+
);
626
+
record.insert(
627
+
"artist_names".to_string(),
628
+
Ipld::List(vec![Ipld::String("Test Artist".to_string())]),
629
+
);
630
+
record.insert("duration".to_string(), Ipld::Integer(180000));
631
+
record.insert(
632
+
"created_at".to_string(),
633
+
Ipld::String("2024-01-01T00:00:00Z".to_string()),
634
+
);
635
+
Ipld::Map(record)
636
}
637
638
+
fn create_mock_teal_profile_record() -> Ipld {
639
+
let mut record = BTreeMap::new();
640
+
record.insert(
641
+
"$type".to_string(),
642
+
Ipld::String("fm.teal.alpha.actor.profile".to_string()),
643
);
644
+
record.insert(
645
+
"display_name".to_string(),
646
+
Ipld::String("Test User".to_string()),
647
+
);
648
+
record.insert(
649
+
"description".to_string(),
650
+
Ipld::String("Music lover".to_string()),
651
+
);
652
+
Ipld::Map(record)
653
+
}
654
655
+
async fn create_test_car_with_teal_records() -> Result<Bytes> {
656
+
let mut builder = CarBuilder::new();
657
658
+
// Create test Teal records
659
+
let play_record = create_mock_teal_play_record();
660
+
let profile_record = create_mock_teal_profile_record();
661
662
+
// Add records to CAR
663
+
let play_cid = builder.add_cbor(&play_record)?;
664
+
let profile_cid = builder.add_cbor(&profile_record)?;
665
666
+
// Add roots (in a real MST, these would be MST nodes, but for testing this is sufficient)
667
+
builder.add_root(play_cid);
668
+
builder.add_root(profile_cid);
669
670
+
let importer = builder.build();
671
+
importer
672
+
.export_to_bytes()
673
+
.await
674
+
.map_err(|e| anyhow!("Failed to export CAR: {}", e))
675
+
}
676
677
+
#[test]
678
+
fn test_parse_teal_key() {
679
+
// This test doesn't need a database connection or async
680
+
let key = "fm.teal.alpha.feed.play/3k2akjdlkjsf";
681
682
+
// Test the parsing logic directly
683
+
if let Some(slash_pos) = key.rfind('/') {
684
+
let collection = key[..slash_pos].to_string();
685
+
let rkey = key[slash_pos + 1..].to_string();
686
687
+
assert_eq!(collection, "fm.teal.alpha.feed.play");
688
+
assert_eq!(rkey, "3k2akjdlkjsf");
689
} else {
690
+
panic!("Should have found slash in key");
691
}
692
}
693
694
+
#[test]
695
+
fn test_is_teal_record_key() {
696
+
// Test the logic directly without needing an ingestor instance
697
+
fn is_teal_record_key(key: &str) -> bool {
698
+
key.starts_with("fm.teal.alpha.") && key.contains("/")
699
}
700
701
+
assert!(is_teal_record_key("fm.teal.alpha.feed.play/abc123"));
702
+
assert!(is_teal_record_key("fm.teal.alpha.profile/def456"));
703
+
assert!(!is_teal_record_key("app.bsky.feed.post/xyz789"));
704
+
assert!(!is_teal_record_key("fm.teal.alpha.feed.play")); // No rkey
705
+
}
706
707
+
#[test]
708
+
fn test_ipld_to_json_conversion() {
709
+
// Test IPLD to JSON conversion logic directly
710
+
use atmst::Ipld;
711
+
use std::collections::BTreeMap;
712
713
+
let mut record = BTreeMap::new();
714
+
record.insert(
715
+
"$type".to_string(),
716
+
Ipld::String("fm.teal.alpha.feed.play".to_string()),
717
+
);
718
+
record.insert(
719
+
"track_name".to_string(),
720
+
Ipld::String("Test Song".to_string()),
721
+
);
722
+
record.insert("duration".to_string(), Ipld::Integer(180000));
723
+
let play_record = Ipld::Map(record);
724
725
+
// Test the conversion logic inline
726
+
fn ipld_to_json(ipld: &Ipld) -> Result<Value> {
727
+
match ipld {
728
+
Ipld::Null => Ok(Value::Null),
729
+
Ipld::Bool(b) => Ok(Value::Bool(*b)),
730
+
Ipld::Integer(i) => {
731
+
if let Ok(i64_val) = i64::try_from(*i) {
732
+
Ok(Value::Number(i64_val.into()))
733
+
} else {
734
+
Ok(Value::String(i.to_string()))
735
+
}
736
+
}
737
+
Ipld::String(s) => Ok(Value::String(s.clone())),
738
+
Ipld::Map(map) => {
739
+
let mut json_map = serde_json::Map::new();
740
+
for (key, value) in map {
741
+
json_map.insert(key.clone(), ipld_to_json(value)?);
742
+
}
743
+
Ok(Value::Object(json_map))
744
+
}
745
+
_ => Ok(Value::Null), // Simplified for test
746
+
}
747
}
748
749
+
let json_result = ipld_to_json(&play_record);
750
+
assert!(json_result.is_ok());
751
+
let json = json_result.unwrap();
752
+
assert_eq!(json["$type"], "fm.teal.alpha.feed.play");
753
+
assert_eq!(json["track_name"], "Test Song");
754
+
assert_eq!(json["duration"], 180000);
755
}
756
757
+
#[tokio::test]
758
+
async fn test_car_creation_and_basic_parsing() -> Result<()> {
759
+
// Test that we can create a CAR file with Teal records and parse it
760
+
let car_bytes = create_test_car_with_teal_records().await?;
761
762
+
// Verify we can import the CAR with atmst
763
+
let mut importer = CarImporter::new();
764
+
importer.import_from_bytes(car_bytes).await?;
765
+
766
+
assert!(!importer.is_empty());
767
+
assert!(importer.len() >= 2); // Should have at least our 2 test records
768
769
+
// Test that we can decode the records
770
+
for cid in importer.cids() {
771
+
if let Ok(Ipld::Map(map)) = importer.decode_cbor(&cid) {
772
+
if let Some(Ipld::String(record_type)) = map.get("$type") {
773
+
assert!(record_type.starts_with("fm.teal.alpha."));
774
+
println!("Found Teal record: {}", record_type);
775
}
776
}
777
}
778
779
Ok(())
780
}
781
+
782
+
#[tokio::test]
783
+
#[ignore = "requires database connection"]
784
+
async fn test_full_car_import_integration() -> Result<()> {
785
+
// This test requires a real database connection
786
+
let database_url = std::env::var("DATABASE_URL")
787
+
.unwrap_or_else(|_| "postgresql://localhost/teal_test".to_string());
788
+
789
+
let pool = sqlx::PgPool::connect(&database_url).await?;
790
+
let ingestor = CarImportIngestor::new(pool);
791
792
+
// Create test CAR with Teal records
793
+
let car_bytes = create_test_car_with_teal_records().await?;
794
+
795
+
// Test the full import process
796
+
let import_id = uuid::Uuid::new_v4().to_string();
797
+
let test_did = "did:plc:test123";
798
799
+
// This should work with our new atmst implementation
800
+
let result = ingestor
801
+
.process_car_data(&car_bytes, &import_id, test_did)
802
+
.await;
803
804
+
// For now, we expect this to work but records might not actually get stored
805
+
// because the test CAR doesn't have proper MST structure
806
+
match result {
807
+
Ok(()) => {
808
+
println!("โ
CAR import completed successfully");
809
}
810
+
Err(e) => {
811
+
println!("โ ๏ธ CAR import failed (expected for test data): {}", e);
812
+
// This is expected since our test CAR doesn't have proper MST structure
813
}
814
}
815
+
816
+
Ok(())
817
}
818
}
+51
services/cadet/src/ingestors/car/jobs.rs
+51
services/cadet/src/ingestors/car/jobs.rs
···
···
1
+
use chrono::{DateTime, Utc};
2
+
use serde::{Deserialize, Serialize};
3
+
use uuid::Uuid;
4
+
5
+
#[derive(Debug, Clone, Serialize, Deserialize)]
6
+
pub struct CarImportJob {
7
+
pub request_id: Uuid,
8
+
pub identity: String,
9
+
pub since: Option<DateTime<Utc>>,
10
+
pub created_at: DateTime<Utc>,
11
+
pub description: Option<String>,
12
+
}
13
+
14
+
#[derive(Debug, Clone, Serialize, Deserialize)]
15
+
pub struct CarImportJobStatus {
16
+
pub status: JobStatus,
17
+
pub created_at: DateTime<Utc>,
18
+
pub started_at: Option<DateTime<Utc>>,
19
+
pub completed_at: Option<DateTime<Utc>>,
20
+
pub error_message: Option<String>,
21
+
pub progress: Option<JobProgress>,
22
+
}
23
+
24
+
#[derive(Debug, Clone, Serialize, Deserialize)]
25
+
pub enum JobStatus {
26
+
Pending,
27
+
Processing,
28
+
Completed,
29
+
Failed,
30
+
Cancelled,
31
+
}
32
+
33
+
#[derive(Debug, Clone, Serialize, Deserialize)]
34
+
pub struct JobProgress {
35
+
pub step: String,
36
+
pub user_did: Option<String>,
37
+
pub pds_host: Option<String>,
38
+
pub car_size_bytes: Option<u64>,
39
+
pub blocks_processed: Option<u64>,
40
+
}
41
+
42
+
pub mod queue_keys {
43
+
use uuid::Uuid;
44
+
45
+
pub const CAR_IMPORT_JOBS: &str = "car_import_jobs";
46
+
pub const CAR_IMPORT_STATUS_PREFIX: &str = "car_import_status";
47
+
48
+
pub fn job_status_key(job_id: &Uuid) -> String {
49
+
format!("{}:{}", CAR_IMPORT_STATUS_PREFIX, job_id)
50
+
}
51
+
}
+2
-1
services/cadet/src/ingestors/car/mod.rs
+2
-1
services/cadet/src/ingestors/car/mod.rs
+1
-1
services/cadet/src/ingestors/mod.rs
+1
-1
services/cadet/src/ingestors/mod.rs
+7
-7
services/cadet/src/ingestors/teal/actor_status.rs
+7
-7
services/cadet/src/ingestors/teal/actor_status.rs
···
23
status: &types::fm::teal::alpha::actor::status::RecordData,
24
) -> anyhow::Result<()> {
25
let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey);
26
-
27
let record_json = serde_json::to_value(status)?;
28
-
29
sqlx::query!(
30
r#"
31
INSERT INTO statii (uri, did, rkey, cid, record)
···
43
)
44
.execute(&self.sql)
45
.await?;
46
-
47
Ok(())
48
}
49
50
pub async fn remove_status(&self, did: Did, rkey: &str) -> anyhow::Result<()> {
51
let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey);
52
-
53
sqlx::query!(
54
r#"
55
DELETE FROM statii WHERE uri = $1
···
58
)
59
.execute(&self.sql)
60
.await?;
61
-
62
Ok(())
63
}
64
}
···
71
let record = serde_json::from_value::<
72
types::fm::teal::alpha::actor::status::RecordData,
73
>(record.clone())?;
74
-
75
if let Some(ref commit) = message.commit {
76
if let Some(ref cid) = commit.cid {
77
self.insert_status(
···
98
}
99
Ok(())
100
}
101
-
}
···
23
status: &types::fm::teal::alpha::actor::status::RecordData,
24
) -> anyhow::Result<()> {
25
let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey);
26
+
27
let record_json = serde_json::to_value(status)?;
28
+
29
sqlx::query!(
30
r#"
31
INSERT INTO statii (uri, did, rkey, cid, record)
···
43
)
44
.execute(&self.sql)
45
.await?;
46
+
47
Ok(())
48
}
49
50
pub async fn remove_status(&self, did: Did, rkey: &str) -> anyhow::Result<()> {
51
let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey);
52
+
53
sqlx::query!(
54
r#"
55
DELETE FROM statii WHERE uri = $1
···
58
)
59
.execute(&self.sql)
60
.await?;
61
+
62
Ok(())
63
}
64
}
···
71
let record = serde_json::from_value::<
72
types::fm::teal::alpha::actor::status::RecordData,
73
>(record.clone())?;
74
+
75
if let Some(ref commit) = message.commit {
76
if let Some(ref cid) = commit.cid {
77
self.insert_status(
···
98
}
99
Ok(())
100
}
101
+
}
+1132
-62
services/cadet/src/ingestors/teal/feed_play.rs
+1132
-62
services/cadet/src/ingestors/teal/feed_play.rs
···
7
8
use super::assemble_at_uri;
9
10
pub struct PlayIngestor {
11
sql: PgPool,
12
}
···
58
Self { sql }
59
}
60
61
-
/// Inserts or updates an artist in the database.
62
-
/// Returns the Uuid of the artist.
63
-
async fn insert_artist(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> {
64
-
let artist_uuid = Uuid::parse_str(mbid)?;
65
-
let res = sqlx::query!(
66
r#"
67
-
INSERT INTO artists (mbid, name) VALUES ($1, $2)
68
-
ON CONFLICT (mbid) DO NOTHING
69
-
RETURNING mbid;
70
"#,
71
-
artist_uuid,
72
-
name
73
)
74
.fetch_all(&self.sql)
75
.await?;
76
77
-
if !res.is_empty() {
78
-
// TODO: send request to async scrape data from local MB instance
79
}
80
81
-
Ok(artist_uuid)
82
}
83
84
/// Inserts or updates a release in the database.
85
/// Returns the Uuid of the release.
86
async fn insert_release(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> {
87
let release_uuid = Uuid::parse_str(mbid)?;
88
let res = sqlx::query!(
89
r#"
90
-
INSERT INTO releases (mbid, name) VALUES ($1, $2)
91
-
ON CONFLICT (mbid) DO NOTHING
92
RETURNING mbid;
93
"#,
94
release_uuid,
95
-
name
96
)
97
.fetch_all(&self.sql)
98
.await?;
···
108
/// Returns the Uuid of the recording.
109
async fn insert_recording(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> {
110
let recording_uuid = Uuid::parse_str(mbid)?;
111
let res = sqlx::query!(
112
r#"
113
-
INSERT INTO recordings (mbid, name) VALUES ($1, $2)
114
-
ON CONFLICT (mbid) DO NOTHING
115
RETURNING mbid;
116
"#,
117
recording_uuid,
118
-
name
119
)
120
.fetch_all(&self.sql)
121
.await?;
···
126
127
Ok(recording_uuid)
128
}
129
130
pub async fn insert_play(
131
&self,
···
137
) -> anyhow::Result<()> {
138
dbg!("ingesting", play_record);
139
let play_record = clean(play_record);
140
-
let mut parsed_artists: Vec<(Uuid, String)> = vec![];
141
if let Some(ref artists) = &play_record.artists {
142
for artist in artists {
143
let artist_name = artist.artist_name.clone();
144
-
let artist_mbid = artist.artist_mb_id.clone();
145
-
if let Some(artist_mbid) = artist_mbid {
146
-
let artist_uuid = self.insert_artist(&artist_mbid, &artist_name).await?;
147
-
parsed_artists.push((artist_uuid, artist_name.clone()));
148
} else {
149
-
// Handle case where artist MBID is missing, maybe log a warning
150
-
eprintln!("Warning: Artist MBID missing for '{}'", artist_name);
151
-
}
152
}
153
} else {
154
-
if let Some(artist_names) = &play_record.artist_names {
155
-
for artist_name in artist_names {
156
-
// Assuming artist_mbid is optional, handle missing mbid gracefully
157
-
let artist_mbid_opt = if let Some(ref mbid_list) = play_record.artist_mb_ids {
158
-
mbid_list.get(
159
-
artist_names
160
-
.iter()
161
-
.position(|name| name == artist_name)
162
-
.unwrap_or(0),
163
-
)
164
-
} else {
165
-
None
166
-
};
167
168
-
if let Some(artist_mbid) = artist_mbid_opt {
169
-
let artist_uuid = self.insert_artist(artist_mbid, artist_name).await?;
170
-
parsed_artists.push((artist_uuid, artist_name.clone()));
171
-
} else {
172
-
// Handle case where artist MBID is missing, maybe log a warning
173
-
eprintln!("Warning: Artist MBID missing for '{}'", artist_name);
174
-
}
175
-
}
176
-
}
177
}
178
179
// Insert release if missing
···
203
time::OffsetDateTime::from_unix_timestamp(played_time.as_ref().timestamp())
204
.unwrap_or_else(|_| time::OffsetDateTime::now_utc());
205
206
-
// Our main insert into plays
207
sqlx::query!(
208
r#"
209
INSERT INTO plays (
210
uri, cid, did, rkey, isrc, duration, track_name, played_time,
211
processed_time, release_mbid, release_name, recording_mbid,
212
-
submission_client_agent, music_service_base_domain
213
) VALUES (
214
$1, $2, $3, $4, $5, $6, $7, $8,
215
-
NOW(), $9, $10, $11, $12, $13
216
) ON CONFLICT(uri) DO UPDATE SET
217
isrc = EXCLUDED.isrc,
218
duration = EXCLUDED.duration,
···
223
release_name = EXCLUDED.release_name,
224
recording_mbid = EXCLUDED.recording_mbid,
225
submission_client_agent = EXCLUDED.submission_client_agent,
226
-
music_service_base_domain = EXCLUDED.music_service_base_domain;
227
"#,
228
uri,
229
cid,
···
238
recording_mbid_opt,
239
play_record.submission_client_agent,
240
play_record.music_service_base_domain,
241
)
242
.execute(&self.sql)
243
.await?;
244
245
-
// Insert plays into join table
246
-
for (mbid, artist) in &parsed_artists {
247
-
let artist_name = artist.clone(); // Clone to move into the query
248
-
249
sqlx::query!(
250
r#"
251
-
INSERT INTO play_to_artists (play_uri, artist_mbid, artist_name) VALUES
252
-
($1, $2, $3)
253
-
ON CONFLICT (play_uri, artist_mbid) DO NOTHING;
254
-
"#,
255
uri,
256
-
mbid,
257
artist_name
258
)
259
.execute(&self.sql)
···
7
8
use super::assemble_at_uri;
9
10
+
#[derive(Debug, Clone)]
11
+
struct FuzzyMatchCandidate {
12
+
artist_id: i32,
13
+
name: String,
14
+
confidence: f64,
15
+
}
16
+
17
+
struct MusicBrainzCleaner;
18
+
19
+
impl MusicBrainzCleaner {
20
+
/// List of common "guff" words found in parentheses that should be removed
21
+
const GUFF_WORDS: &'static [&'static str] = &[
22
+
"a cappella",
23
+
"acoustic",
24
+
"bonus",
25
+
"censored",
26
+
"clean",
27
+
"club",
28
+
"clubmix",
29
+
"composition",
30
+
"cut",
31
+
"dance",
32
+
"demo",
33
+
"dialogue",
34
+
"dirty",
35
+
"edit",
36
+
"excerpt",
37
+
"explicit",
38
+
"extended",
39
+
"feat",
40
+
"featuring",
41
+
"ft",
42
+
"instrumental",
43
+
"interlude",
44
+
"intro",
45
+
"karaoke",
46
+
"live",
47
+
"long",
48
+
"main",
49
+
"maxi",
50
+
"megamix",
51
+
"mix",
52
+
"mono",
53
+
"official",
54
+
"orchestral",
55
+
"original",
56
+
"outro",
57
+
"outtake",
58
+
"outtakes",
59
+
"piano",
60
+
"quadraphonic",
61
+
"radio",
62
+
"rap",
63
+
"re-edit",
64
+
"reedit",
65
+
"refix",
66
+
"rehearsal",
67
+
"reinterpreted",
68
+
"released",
69
+
"release",
70
+
"remake",
71
+
"remastered",
72
+
"remaster",
73
+
"master",
74
+
"remix",
75
+
"remixed",
76
+
"remode",
77
+
"reprise",
78
+
"rework",
79
+
"reworked",
80
+
"rmx",
81
+
"session",
82
+
"short",
83
+
"single",
84
+
"skit",
85
+
"stereo",
86
+
"studio",
87
+
"take",
88
+
"takes",
89
+
"tape",
90
+
"track",
91
+
"tryout",
92
+
"uncensored",
93
+
"unknown",
94
+
"unplugged",
95
+
"untitled",
96
+
"version",
97
+
"ver",
98
+
"video",
99
+
"vocal",
100
+
"vs",
101
+
"with",
102
+
"without",
103
+
];
104
+
105
+
/// Clean artist name by removing common variations and guff
106
+
fn clean_artist_name(name: &str) -> String {
107
+
let mut cleaned = name.trim().to_string();
108
+
109
+
// Remove common featuring patterns
110
+
if let Some(pos) = cleaned.to_lowercase().find(" feat") {
111
+
cleaned = cleaned[..pos].trim().to_string();
112
+
}
113
+
if let Some(pos) = cleaned.to_lowercase().find(" ft.") {
114
+
cleaned = cleaned[..pos].trim().to_string();
115
+
}
116
+
if let Some(pos) = cleaned.to_lowercase().find(" featuring") {
117
+
cleaned = cleaned[..pos].trim().to_string();
118
+
}
119
+
120
+
// Remove parenthetical content if it looks like guff
121
+
if let Some(start) = cleaned.find('(') {
122
+
if let Some(end) = cleaned.find(')') {
123
+
let paren_content = &cleaned[start + 1..end].to_lowercase();
124
+
if Self::is_likely_guff(paren_content) {
125
+
cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..])
126
+
.trim()
127
+
.to_string();
128
+
}
129
+
}
130
+
}
131
+
132
+
// Remove brackets with guff
133
+
if let Some(start) = cleaned.find('[') {
134
+
if let Some(end) = cleaned.find(']') {
135
+
let bracket_content = &cleaned[start + 1..end].to_lowercase();
136
+
if Self::is_likely_guff(bracket_content) {
137
+
cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..])
138
+
.trim()
139
+
.to_string();
140
+
}
141
+
}
142
+
}
143
+
144
+
// Remove common prefixes/suffixes
145
+
if cleaned.to_lowercase().starts_with("the ") && cleaned.len() > 4 {
146
+
let without_the = &cleaned[4..];
147
+
if !without_the.trim().is_empty() {
148
+
return without_the.trim().to_string();
149
+
}
150
+
}
151
+
152
+
cleaned.trim().to_string()
153
+
}
154
+
155
+
/// Clean track name by removing common variations and guff
156
+
fn clean_track_name(name: &str) -> String {
157
+
let mut cleaned = name.trim().to_string();
158
+
159
+
// Remove parenthetical content if it looks like guff
160
+
if let Some(start) = cleaned.find('(') {
161
+
if let Some(end) = cleaned.find(')') {
162
+
let paren_content = &cleaned[start + 1..end].to_lowercase();
163
+
if Self::is_likely_guff(paren_content) {
164
+
cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..])
165
+
.trim()
166
+
.to_string();
167
+
}
168
+
}
169
+
}
170
+
171
+
// Remove featuring artists from track titles
172
+
if let Some(pos) = cleaned.to_lowercase().find(" feat") {
173
+
cleaned = cleaned[..pos].trim().to_string();
174
+
}
175
+
if let Some(pos) = cleaned.to_lowercase().find(" ft.") {
176
+
cleaned = cleaned[..pos].trim().to_string();
177
+
}
178
+
179
+
cleaned.trim().to_string()
180
+
}
181
+
182
+
/// Check if parenthetical content is likely "guff" that should be removed
183
+
fn is_likely_guff(content: &str) -> bool {
184
+
let content_lower = content.to_lowercase();
185
+
let words: Vec<&str> = content_lower.split_whitespace().collect();
186
+
187
+
// If most words are guff words, consider it guff
188
+
let guff_word_count = words
189
+
.iter()
190
+
.filter(|word| Self::GUFF_WORDS.contains(word))
191
+
.count();
192
+
193
+
// Also check for years (19XX or 20XX)
194
+
let has_year = content_lower.chars().collect::<String>().contains("19")
195
+
|| content_lower.contains("20");
196
+
197
+
// Consider it guff if >50% are guff words, or if it contains years, or if it's short and common
198
+
guff_word_count > words.len() / 2
199
+
|| has_year
200
+
|| (words.len() <= 2
201
+
&& Self::GUFF_WORDS
202
+
.iter()
203
+
.any(|&guff| content_lower.contains(guff)))
204
+
}
205
+
206
+
/// Normalize text for comparison (remove special chars, lowercase, etc.)
207
+
fn normalize_for_comparison(text: &str) -> String {
208
+
text.chars()
209
+
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
210
+
.collect::<String>()
211
+
.to_lowercase()
212
+
.split_whitespace()
213
+
.collect::<Vec<&str>>()
214
+
.join(" ")
215
+
}
216
+
}
217
+
218
pub struct PlayIngestor {
219
sql: PgPool,
220
}
···
266
Self { sql }
267
}
268
269
+
/// Batch consolidate synthetic artists that match existing MusicBrainz artists
270
+
pub async fn consolidate_synthetic_artists(
271
+
&self,
272
+
min_confidence: f64,
273
+
) -> anyhow::Result<usize> {
274
+
tracing::info!(
275
+
"๐ Starting batch consolidation of synthetic artists with confidence >= {:.2}",
276
+
min_confidence
277
+
);
278
+
279
+
let consolidation_candidates = sqlx::query!(
280
+
r#"
281
+
SELECT DISTINCT
282
+
ae1.id as synthetic_id,
283
+
ae1.name as synthetic_name,
284
+
ae2.id as target_id,
285
+
ae2.name as target_name,
286
+
ae2.mbid as target_mbid,
287
+
similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score
288
+
FROM artists_extended ae1
289
+
CROSS JOIN artists_extended ae2
290
+
WHERE ae1.id != ae2.id
291
+
AND ae1.mbid_type = 'synthetic'
292
+
AND ae2.mbid_type = 'musicbrainz'
293
+
AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1
294
+
ORDER BY similarity_score DESC
295
+
"#,
296
+
min_confidence as f32
297
+
)
298
+
.fetch_all(&self.sql)
299
+
.await?;
300
+
301
+
let mut consolidated_count = 0;
302
+
303
+
for candidate in consolidation_candidates {
304
+
let synthetic_id = candidate.synthetic_id;
305
+
let target_id = candidate.target_id;
306
+
let similarity = candidate.similarity_score.unwrap_or(0.0) as f64;
307
+
308
+
// Double-check with our improved similarity calculation
309
+
let calculated_similarity =
310
+
Self::calculate_similarity(&candidate.synthetic_name, &candidate.target_name, true);
311
+
312
+
let final_confidence = similarity.max(calculated_similarity);
313
+
314
+
if final_confidence >= min_confidence {
315
+
// Move all play relationships from synthetic artist to MusicBrainz artist
316
+
let moved_plays = sqlx::query!(
317
+
r#"
318
+
UPDATE play_to_artists_extended
319
+
SET artist_id = $1, artist_name = $2
320
+
WHERE artist_id = $3
321
+
AND NOT EXISTS (
322
+
SELECT 1 FROM play_to_artists_extended existing
323
+
WHERE existing.play_uri = play_to_artists_extended.play_uri
324
+
AND existing.artist_id = $1
325
+
)
326
+
"#,
327
+
target_id,
328
+
candidate.target_name,
329
+
synthetic_id
330
+
)
331
+
.execute(&self.sql)
332
+
.await?;
333
+
334
+
// Remove duplicate relationships that couldn't be moved
335
+
sqlx::query!(
336
+
"DELETE FROM play_to_artists_extended WHERE artist_id = $1",
337
+
synthetic_id
338
+
)
339
+
.execute(&self.sql)
340
+
.await?;
341
+
342
+
// Remove the synthetic artist
343
+
sqlx::query!("DELETE FROM artists_extended WHERE id = $1", synthetic_id)
344
+
.execute(&self.sql)
345
+
.await?;
346
+
347
+
consolidated_count += 1;
348
+
349
+
tracing::info!(
350
+
"โ
Consolidated '{}' โ '{}' (confidence: {:.2}, moved {} plays)",
351
+
candidate.synthetic_name,
352
+
candidate.target_name,
353
+
final_confidence,
354
+
moved_plays.rows_affected()
355
+
);
356
+
}
357
+
}
358
+
359
+
// Refresh materialized views after consolidation
360
+
if consolidated_count > 0 {
361
+
tracing::info!("๐ Refreshing materialized views after consolidation");
362
+
sqlx::query!("REFRESH MATERIALIZED VIEW mv_artist_play_counts;")
363
+
.execute(&self.sql)
364
+
.await?;
365
+
}
366
+
367
+
tracing::info!(
368
+
"๐ Batch consolidation complete: {} artists consolidated",
369
+
consolidated_count
370
+
);
371
+
Ok(consolidated_count)
372
+
}
373
+
374
+
/// Find and consolidate duplicate releases/albums (requires matching artist context)
375
+
pub async fn consolidate_duplicate_releases(
376
+
&self,
377
+
min_confidence: f64,
378
+
) -> anyhow::Result<usize> {
379
+
tracing::info!(
380
+
"๐ Starting release consolidation with confidence >= {:.2} (requires artist context)",
381
+
min_confidence
382
+
);
383
+
384
+
// Find releases that have similar names AND share at least one artist
385
+
let release_candidates = sqlx::query!(
386
r#"
387
+
SELECT DISTINCT
388
+
r1.mbid as release1_mbid,
389
+
r1.name as release1_name,
390
+
r2.mbid as release2_mbid,
391
+
r2.name as release2_name,
392
+
similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,
393
+
COUNT(DISTINCT ptae1.artist_id) as shared_artists
394
+
FROM releases r1
395
+
CROSS JOIN releases r2
396
+
INNER JOIN plays p1 ON p1.release_mbid = r1.mbid
397
+
INNER JOIN plays p2 ON p2.release_mbid = r2.mbid
398
+
INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri
399
+
INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri
400
+
WHERE r1.mbid != r2.mbid
401
+
AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1
402
+
AND ptae1.artist_id = ptae2.artist_id -- Same artist
403
+
AND (
404
+
(r1.discriminant IS NULL AND r2.discriminant IS NULL) OR
405
+
(LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))
406
+
) -- Same or no discriminants
407
+
GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score
408
+
HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist
409
+
ORDER BY similarity_score DESC, shared_artists DESC
410
"#,
411
+
min_confidence as f32
412
)
413
.fetch_all(&self.sql)
414
.await?;
415
416
+
let mut consolidated_count = 0;
417
+
418
+
for candidate in release_candidates {
419
+
let similarity = candidate.similarity_score.unwrap_or(0.0) as f64;
420
+
let shared_artists = candidate.shared_artists.unwrap_or(0);
421
+
422
+
// Use MusicBrainz-style cleaning for better matching
423
+
let cleaned_similarity = Self::calculate_similarity(
424
+
&candidate.release1_name,
425
+
&candidate.release2_name,
426
+
false, // is_artist = false for releases
427
+
);
428
+
429
+
let final_confidence = similarity.max(cleaned_similarity);
430
+
431
+
// Require high confidence AND shared artists for album consolidation
432
+
if final_confidence >= min_confidence && shared_artists > 0 {
433
+
// Choose the release with more plays as the canonical one
434
+
let r1_plays: i64 = sqlx::query_scalar!(
435
+
"SELECT COUNT(*) FROM plays WHERE release_mbid = $1",
436
+
candidate.release1_mbid
437
+
)
438
+
.fetch_one(&self.sql)
439
+
.await?
440
+
.unwrap_or(0);
441
+
442
+
let r2_plays: i64 = sqlx::query_scalar!(
443
+
"SELECT COUNT(*) FROM plays WHERE release_mbid = $1",
444
+
candidate.release2_mbid
445
+
)
446
+
.fetch_one(&self.sql)
447
+
.await?
448
+
.unwrap_or(0);
449
+
450
+
let (keep_mbid, remove_mbid, keep_name) = if r1_plays >= r2_plays {
451
+
(
452
+
candidate.release1_mbid,
453
+
candidate.release2_mbid,
454
+
candidate.release1_name.clone(),
455
+
)
456
+
} else {
457
+
(
458
+
candidate.release2_mbid,
459
+
candidate.release1_mbid,
460
+
candidate.release2_name.clone(),
461
+
)
462
+
};
463
+
464
+
// Update plays to use the canonical release
465
+
let updated_plays = sqlx::query!(
466
+
"UPDATE plays SET release_mbid = $1, release_name = $2 WHERE release_mbid = $3",
467
+
keep_mbid,
468
+
keep_name,
469
+
remove_mbid
470
+
)
471
+
.execute(&self.sql)
472
+
.await?;
473
+
474
+
// Remove the duplicate release
475
+
sqlx::query!("DELETE FROM releases WHERE mbid = $1", remove_mbid)
476
+
.execute(&self.sql)
477
+
.await?;
478
+
479
+
consolidated_count += 1;
480
+
481
+
tracing::info!(
482
+
"โ
Consolidated releases: '{}' โ '{}' (confidence: {:.2}, {} shared artists, updated {} plays)",
483
+
if r1_plays >= r2_plays {
484
+
&candidate.release2_name
485
+
} else {
486
+
&candidate.release1_name
487
+
},
488
+
keep_name,
489
+
final_confidence,
490
+
shared_artists,
491
+
updated_plays.rows_affected()
492
+
);
493
+
}
494
}
495
496
+
tracing::info!(
497
+
"๐ Release consolidation complete: {} releases consolidated",
498
+
consolidated_count
499
+
);
500
+
Ok(consolidated_count)
501
+
}
502
+
503
+
/// Find and consolidate duplicate recordings/tracks (requires matching artist context)
504
+
pub async fn consolidate_duplicate_recordings(
505
+
&self,
506
+
min_confidence: f64,
507
+
) -> anyhow::Result<usize> {
508
+
tracing::info!(
509
+
"๐ Starting recording consolidation with confidence >= {:.2} (requires artist context)",
510
+
min_confidence
511
+
);
512
+
513
+
// Find recordings that have similar names AND share at least one artist
514
+
let recording_candidates = sqlx::query!(
515
+
r#"
516
+
SELECT DISTINCT
517
+
r1.mbid as recording1_mbid,
518
+
r1.name as recording1_name,
519
+
r2.mbid as recording2_mbid,
520
+
r2.name as recording2_name,
521
+
similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,
522
+
COUNT(DISTINCT ptae1.artist_id) as shared_artists
523
+
FROM recordings r1
524
+
CROSS JOIN recordings r2
525
+
INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid
526
+
INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid
527
+
INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri
528
+
INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri
529
+
WHERE r1.mbid != r2.mbid
530
+
AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1
531
+
AND ptae1.artist_id = ptae2.artist_id -- Same artist
532
+
AND (
533
+
(r1.discriminant IS NULL AND r2.discriminant IS NULL) OR
534
+
(LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))
535
+
) -- Same or no discriminants
536
+
GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score
537
+
HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist
538
+
ORDER BY similarity_score DESC, shared_artists DESC
539
+
"#,
540
+
min_confidence as f32
541
+
)
542
+
.fetch_all(&self.sql)
543
+
.await?;
544
+
545
+
let mut consolidated_count = 0;
546
+
547
+
for candidate in recording_candidates {
548
+
let similarity = candidate.similarity_score.unwrap_or(0.0) as f64;
549
+
let shared_artists = candidate.shared_artists.unwrap_or(0);
550
+
551
+
// Use MusicBrainz-style cleaning for track names
552
+
let cleaned_similarity = Self::calculate_similarity(
553
+
&candidate.recording1_name,
554
+
&candidate.recording2_name,
555
+
false, // is_artist = false for recordings
556
+
);
557
+
558
+
let final_confidence = similarity.max(cleaned_similarity);
559
+
560
+
// Require high confidence AND shared artists for track consolidation
561
+
if final_confidence >= min_confidence && shared_artists > 0 {
562
+
// Choose the recording with more plays as canonical
563
+
let r1_plays: i64 = sqlx::query_scalar!(
564
+
"SELECT COUNT(*) FROM plays WHERE recording_mbid = $1",
565
+
candidate.recording1_mbid
566
+
)
567
+
.fetch_one(&self.sql)
568
+
.await?
569
+
.unwrap_or(0);
570
+
571
+
let r2_plays: i64 = sqlx::query_scalar!(
572
+
"SELECT COUNT(*) FROM plays WHERE recording_mbid = $1",
573
+
candidate.recording2_mbid
574
+
)
575
+
.fetch_one(&self.sql)
576
+
.await?
577
+
.unwrap_or(0);
578
+
579
+
let (keep_mbid, remove_mbid, keep_name) = if r1_plays >= r2_plays {
580
+
(
581
+
candidate.recording1_mbid,
582
+
candidate.recording2_mbid,
583
+
candidate.recording1_name.clone(),
584
+
)
585
+
} else {
586
+
(
587
+
candidate.recording2_mbid,
588
+
candidate.recording1_mbid,
589
+
candidate.recording2_name.clone(),
590
+
)
591
+
};
592
+
593
+
// Update plays to use the canonical recording
594
+
let updated_plays = sqlx::query!(
595
+
"UPDATE plays SET recording_mbid = $1 WHERE recording_mbid = $2",
596
+
keep_mbid,
597
+
remove_mbid
598
+
)
599
+
.execute(&self.sql)
600
+
.await?;
601
+
602
+
// Remove the duplicate recording
603
+
sqlx::query!("DELETE FROM recordings WHERE mbid = $1", remove_mbid)
604
+
.execute(&self.sql)
605
+
.await?;
606
+
607
+
consolidated_count += 1;
608
+
609
+
tracing::info!(
610
+
"โ
Consolidated recordings: '{}' โ '{}' (confidence: {:.2}, {} shared artists, updated {} plays)",
611
+
if r1_plays >= r2_plays {
612
+
&candidate.recording2_name
613
+
} else {
614
+
&candidate.recording1_name
615
+
},
616
+
keep_name,
617
+
final_confidence,
618
+
shared_artists,
619
+
updated_plays.rows_affected()
620
+
);
621
+
}
622
+
}
623
+
624
+
tracing::info!(
625
+
"๐ Recording consolidation complete: {} recordings consolidated",
626
+
consolidated_count
627
+
);
628
+
Ok(consolidated_count)
629
+
}
630
+
631
+
/// Preview consolidation candidates to show what would be merged
632
+
pub async fn preview_consolidation_candidates(
633
+
&self,
634
+
min_confidence: f64,
635
+
) -> anyhow::Result<()> {
636
+
tracing::info!(
637
+
"๐ Previewing consolidation candidates (confidence >= {:.2})",
638
+
min_confidence
639
+
);
640
+
641
+
// Preview artist consolidations
642
+
let artist_candidates = sqlx::query!(
643
+
r#"
644
+
SELECT DISTINCT
645
+
ae1.name as synthetic_name,
646
+
ae2.name as target_name,
647
+
similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score,
648
+
COUNT(ptae1.play_uri) as synthetic_plays,
649
+
COUNT(ptae2.play_uri) as target_plays
650
+
FROM artists_extended ae1
651
+
CROSS JOIN artists_extended ae2
652
+
LEFT JOIN play_to_artists_extended ptae1 ON ae1.id = ptae1.artist_id
653
+
LEFT JOIN play_to_artists_extended ptae2 ON ae2.id = ptae2.artist_id
654
+
WHERE ae1.id != ae2.id
655
+
AND ae1.mbid_type = 'synthetic'
656
+
AND ae2.mbid_type = 'musicbrainz'
657
+
AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1
658
+
GROUP BY ae1.id, ae1.name, ae2.id, ae2.name, similarity_score
659
+
ORDER BY similarity_score DESC
660
+
LIMIT 10
661
+
"#,
662
+
min_confidence as f32
663
+
)
664
+
.fetch_all(&self.sql)
665
+
.await?;
666
+
667
+
if !artist_candidates.is_empty() {
668
+
tracing::info!("๐ฏ Artist consolidation candidates:");
669
+
for candidate in artist_candidates {
670
+
tracing::info!(
671
+
" '{}' โ '{}' (confidence: {:.2}, {} + {} plays)",
672
+
candidate.synthetic_name,
673
+
candidate.target_name,
674
+
candidate.similarity_score.unwrap_or(0.0),
675
+
candidate.synthetic_plays.unwrap_or(0),
676
+
candidate.target_plays.unwrap_or(0)
677
+
);
678
+
}
679
+
}
680
+
681
+
// Preview release consolidations (with artist context)
682
+
let release_candidates = sqlx::query!(
683
+
r#"
684
+
SELECT DISTINCT
685
+
r1.name as release1_name,
686
+
r2.name as release2_name,
687
+
similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,
688
+
COUNT(DISTINCT ptae1.artist_id) as shared_artists,
689
+
STRING_AGG(DISTINCT ae.name, ', ') as artist_names
690
+
FROM releases r1
691
+
CROSS JOIN releases r2
692
+
INNER JOIN plays p1 ON p1.release_mbid = r1.mbid
693
+
INNER JOIN plays p2 ON p2.release_mbid = r2.mbid
694
+
INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri
695
+
INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri
696
+
INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id
697
+
WHERE r1.mbid != r2.mbid
698
+
AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1
699
+
AND ptae1.artist_id = ptae2.artist_id
700
+
GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score
701
+
HAVING COUNT(DISTINCT ptae1.artist_id) > 0
702
+
ORDER BY similarity_score DESC
703
+
LIMIT 5
704
+
"#,
705
+
min_confidence as f32
706
+
)
707
+
.fetch_all(&self.sql)
708
+
.await?;
709
+
710
+
if !release_candidates.is_empty() {
711
+
tracing::info!("๐ฟ Release consolidation candidates (with artist context):");
712
+
for candidate in release_candidates {
713
+
tracing::info!(
714
+
" '{}' โ '{}' (confidence: {:.2}, {} shared artists: {})",
715
+
candidate.release1_name,
716
+
candidate.release2_name,
717
+
candidate.similarity_score.unwrap_or(0.0),
718
+
candidate.shared_artists.unwrap_or(0),
719
+
candidate.artist_names.unwrap_or_default()
720
+
);
721
+
}
722
+
}
723
+
724
+
// Preview recording consolidations (with artist context)
725
+
let recording_candidates = sqlx::query!(
726
+
r#"
727
+
SELECT DISTINCT
728
+
r1.name as recording1_name,
729
+
r2.name as recording2_name,
730
+
similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,
731
+
COUNT(DISTINCT ptae1.artist_id) as shared_artists,
732
+
STRING_AGG(DISTINCT ae.name, ', ') as artist_names
733
+
FROM recordings r1
734
+
CROSS JOIN recordings r2
735
+
INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid
736
+
INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid
737
+
INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri
738
+
INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri
739
+
INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id
740
+
WHERE r1.mbid != r2.mbid
741
+
AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1
742
+
AND ptae1.artist_id = ptae2.artist_id
743
+
GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score
744
+
HAVING COUNT(DISTINCT ptae1.artist_id) > 0
745
+
ORDER BY similarity_score DESC
746
+
LIMIT 5
747
+
"#,
748
+
min_confidence as f32
749
+
)
750
+
.fetch_all(&self.sql)
751
+
.await?;
752
+
753
+
if !recording_candidates.is_empty() {
754
+
tracing::info!("๐ต Recording consolidation candidates (with artist context):");
755
+
for candidate in recording_candidates {
756
+
tracing::info!(
757
+
" '{}' โ '{}' (confidence: {:.2}, {} shared artists: {})",
758
+
candidate.recording1_name,
759
+
candidate.recording2_name,
760
+
candidate.similarity_score.unwrap_or(0.0),
761
+
candidate.shared_artists.unwrap_or(0),
762
+
candidate.artist_names.unwrap_or_default()
763
+
);
764
+
}
765
+
}
766
+
767
+
Ok(())
768
+
}
769
+
770
+
/// Run full batch consolidation for all entity types
771
+
pub async fn run_full_consolidation(&self) -> anyhow::Result<()> {
772
+
tracing::info!("๐ Starting full batch consolidation process");
773
+
774
+
// First, preview what we would consolidate
775
+
self.preview_consolidation_candidates(0.92).await?;
776
+
777
+
let artist_count = self.consolidate_synthetic_artists(0.92).await?;
778
+
let release_count = self.consolidate_duplicate_releases(0.92).await?;
779
+
let recording_count = self.consolidate_duplicate_recordings(0.92).await?;
780
+
781
+
tracing::info!(
782
+
"๐ Full consolidation complete! Artists: {}, Releases: {}, Recordings: {}",
783
+
artist_count,
784
+
release_count,
785
+
recording_count
786
+
);
787
+
788
+
Ok(())
789
+
}
790
+
791
+
/// Generate a synthetic MBID for artists without MusicBrainz data using database function
792
+
async fn generate_synthetic_mbid(&self, artist_name: &str) -> anyhow::Result<Uuid> {
793
+
let result = sqlx::query_scalar!("SELECT generate_synthetic_mbid($1)", artist_name)
794
+
.fetch_one(&self.sql)
795
+
.await?;
796
+
797
+
result.ok_or_else(|| anyhow!("Failed to generate synthetic MBID"))
798
+
}
799
+
800
+
/// Generate a fallback artist name for tracks without any artist information
801
+
fn generate_fallback_artist(track_name: &str) -> String {
802
+
format!(
803
+
"Unknown Artist ({})",
804
+
track_name.chars().take(20).collect::<String>()
805
+
)
806
+
}
807
+
808
+
/// Normalize text for fuzzy matching with MusicBrainz-style cleaning
809
+
fn normalize_text(text: &str, is_artist: bool) -> String {
810
+
let cleaned = if is_artist {
811
+
MusicBrainzCleaner::clean_artist_name(text)
812
+
} else {
813
+
MusicBrainzCleaner::clean_track_name(text)
814
+
};
815
+
816
+
MusicBrainzCleaner::normalize_for_comparison(&cleaned)
817
+
}
818
+
819
+
/// Calculate string similarity with MusicBrainz-style cleaning
820
+
fn calculate_similarity(s1: &str, s2: &str, is_artist: bool) -> f64 {
821
+
let s1_norm = Self::normalize_text(s1, is_artist);
822
+
let s2_norm = Self::normalize_text(s2, is_artist);
823
+
824
+
if s1_norm == s2_norm {
825
+
return 1.0;
826
+
}
827
+
828
+
if s1_norm.is_empty() || s2_norm.is_empty() {
829
+
return 0.0;
830
+
}
831
+
832
+
// Calculate basic similarity
833
+
let max_len = s1_norm.len().max(s2_norm.len()) as f64;
834
+
let min_len = s1_norm.len().min(s2_norm.len()) as f64;
835
+
836
+
// Character-based similarity
837
+
let common_chars = s1_norm
838
+
.chars()
839
+
.zip(s2_norm.chars())
840
+
.filter(|(a, b)| a == b)
841
+
.count() as f64;
842
+
843
+
// Word-based similarity boost
844
+
let s1_words: std::collections::HashSet<&str> = s1_norm.split_whitespace().collect();
845
+
let s2_words: std::collections::HashSet<&str> = s2_norm.split_whitespace().collect();
846
+
let common_words = s1_words.intersection(&s2_words).count() as f64;
847
+
let total_words = s1_words.union(&s2_words).count() as f64;
848
+
849
+
let word_similarity = if total_words > 0.0 {
850
+
common_words / total_words
851
+
} else {
852
+
0.0
853
+
};
854
+
let char_similarity = common_chars / max_len;
855
+
856
+
// Boost for very similar lengths (helps with minor differences)
857
+
let length_factor = if max_len > 0.0 {
858
+
min_len / max_len
859
+
} else {
860
+
0.0
861
+
};
862
+
863
+
// Weighted combination: 50% word similarity, 30% char similarity, 20% length factor
864
+
(word_similarity * 0.5) + (char_similarity * 0.3) + (length_factor * 0.2)
865
+
}
866
+
867
+
/// Find existing artists that fuzzy match the given name
868
+
async fn find_fuzzy_artist_matches(
869
+
&self,
870
+
artist_name: &str,
871
+
_track_name: &str,
872
+
_album_name: Option<&str>,
873
+
) -> anyhow::Result<Vec<FuzzyMatchCandidate>> {
874
+
let normalized_name = Self::normalize_text(artist_name, true);
875
+
876
+
// Search for artists with similar names using trigram similarity
877
+
let candidates = sqlx::query!(
878
+
r#"
879
+
SELECT
880
+
ae.id,
881
+
ae.name
882
+
FROM artists_extended ae
883
+
WHERE ae.mbid_type = 'musicbrainz'
884
+
AND (
885
+
LOWER(TRIM(ae.name)) = $1
886
+
OR LOWER(TRIM(ae.name)) LIKE '%' || $1 || '%'
887
+
OR $1 LIKE '%' || LOWER(TRIM(ae.name)) || '%'
888
+
OR similarity(LOWER(TRIM(ae.name)), $1) > 0.6
889
+
)
890
+
ORDER BY similarity(LOWER(TRIM(ae.name)), $1) DESC
891
+
LIMIT 10
892
+
"#,
893
+
normalized_name
894
+
)
895
+
.fetch_all(&self.sql)
896
+
.await
897
+
.unwrap_or_default();
898
+
899
+
let mut matches = Vec::new();
900
+
901
+
for candidate in candidates {
902
+
let name_similarity = Self::calculate_similarity(artist_name, &candidate.name, true);
903
+
904
+
// Base confidence from name similarity
905
+
let mut confidence = name_similarity;
906
+
907
+
// Boost confidence for exact matches after normalization
908
+
if Self::normalize_text(artist_name, true)
909
+
== Self::normalize_text(&candidate.name, true)
910
+
{
911
+
confidence = confidence.max(0.95);
912
+
}
913
+
914
+
// Additional boost for cleaned matches
915
+
let cleaned_input = MusicBrainzCleaner::clean_artist_name(artist_name);
916
+
let cleaned_candidate = MusicBrainzCleaner::clean_artist_name(&candidate.name);
917
+
if MusicBrainzCleaner::normalize_for_comparison(&cleaned_input)
918
+
== MusicBrainzCleaner::normalize_for_comparison(&cleaned_candidate)
919
+
{
920
+
confidence = confidence.max(0.9);
921
+
}
922
+
923
+
// Lower threshold since we have better cleaning now
924
+
if confidence >= 0.8 {
925
+
matches.push(FuzzyMatchCandidate {
926
+
artist_id: candidate.id,
927
+
name: candidate.name,
928
+
confidence,
929
+
});
930
+
}
931
+
}
932
+
933
+
// Sort by confidence descending
934
+
matches.sort_by(|a, b| {
935
+
b.confidence
936
+
.partial_cmp(&a.confidence)
937
+
.unwrap_or(std::cmp::Ordering::Equal)
938
+
});
939
+
940
+
Ok(matches)
941
+
}
942
+
943
+
/// Try to match an artist to existing MusicBrainz data using fuzzy matching
944
+
async fn find_or_create_artist_with_fuzzy_matching(
945
+
&self,
946
+
artist_name: &str,
947
+
mbid: Option<&str>,
948
+
track_name: &str,
949
+
album_name: Option<&str>,
950
+
) -> anyhow::Result<i32> {
951
+
// If we already have an MBID, use it directly
952
+
if let Some(mbid) = mbid {
953
+
return self.insert_artist_extended(Some(mbid), artist_name).await;
954
+
}
955
+
956
+
// Try fuzzy matching against existing MusicBrainz artists
957
+
let matches = self
958
+
.find_fuzzy_artist_matches(artist_name, track_name, album_name)
959
+
.await?;
960
+
961
+
if let Some(best_match) = matches.first() {
962
+
// Use high confidence threshold for automatic matching
963
+
if best_match.confidence >= 0.92 {
964
+
tracing::info!(
965
+
"๐ Fuzzy matched '{}' to existing artist '{}' (confidence: {:.2})",
966
+
artist_name,
967
+
best_match.name,
968
+
best_match.confidence
969
+
);
970
+
971
+
// Update the existing artist name if the new one seems more complete
972
+
if artist_name.len() > best_match.name.len() && best_match.confidence >= 0.95 {
973
+
sqlx::query!(
974
+
"UPDATE artists_extended SET name = $1, updated_at = NOW() WHERE id = $2",
975
+
artist_name,
976
+
best_match.artist_id
977
+
)
978
+
.execute(&self.sql)
979
+
.await?;
980
+
}
981
+
982
+
return Ok(best_match.artist_id);
983
+
} else if best_match.confidence >= 0.85 {
984
+
tracing::debug!(
985
+
"๐ค Potential match for '{}' -> '{}' (confidence: {:.2}) but below auto-match threshold",
986
+
artist_name,
987
+
best_match.name,
988
+
best_match.confidence
989
+
);
990
+
}
991
+
}
992
+
993
+
// No good match found, create synthetic artist
994
+
self.insert_artist_extended(None, artist_name).await
995
+
}
996
+
997
+
/// Inserts or updates an artist in the database using the extended table.
998
+
/// Returns the internal ID of the artist.
999
+
async fn insert_artist_extended(&self, mbid: Option<&str>, name: &str) -> anyhow::Result<i32> {
1000
+
if let Some(mbid) = mbid {
1001
+
let artist_uuid = Uuid::parse_str(mbid)?;
1002
+
let res = sqlx::query!(
1003
+
r#"
1004
+
INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'musicbrainz')
1005
+
ON CONFLICT (mbid) DO UPDATE SET
1006
+
name = EXCLUDED.name,
1007
+
updated_at = NOW()
1008
+
RETURNING id;
1009
+
"#,
1010
+
artist_uuid,
1011
+
name
1012
+
)
1013
+
.fetch_one(&self.sql)
1014
+
.await?;
1015
+
Ok(res.id)
1016
+
} else {
1017
+
// Artist without MBID - generate synthetic MBID
1018
+
let synthetic_uuid = self.generate_synthetic_mbid(name).await?;
1019
+
1020
+
let res = sqlx::query!(
1021
+
r#"
1022
+
INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'synthetic')
1023
+
ON CONFLICT (mbid) DO UPDATE SET
1024
+
name = EXCLUDED.name,
1025
+
updated_at = NOW()
1026
+
RETURNING id;
1027
+
"#,
1028
+
synthetic_uuid,
1029
+
name
1030
+
)
1031
+
.fetch_one(&self.sql)
1032
+
.await?;
1033
+
Ok(res.id)
1034
+
}
1035
}
1036
1037
/// Inserts or updates a release in the database.
1038
/// Returns the Uuid of the release.
1039
async fn insert_release(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> {
1040
let release_uuid = Uuid::parse_str(mbid)?;
1041
+
1042
+
// Extract discriminant from release name for new releases
1043
+
// Prioritize edition-specific patterns for better quality
1044
+
let discriminant = self
1045
+
.extract_edition_discriminant_from_db(name)
1046
+
.await
1047
+
.or_else(|| {
1048
+
futures::executor::block_on(async { self.extract_discriminant_from_db(name).await })
1049
+
});
1050
+
1051
let res = sqlx::query!(
1052
r#"
1053
+
INSERT INTO releases (mbid, name, discriminant) VALUES ($1, $2, $3)
1054
+
ON CONFLICT (mbid) DO UPDATE SET
1055
+
name = EXCLUDED.name,
1056
+
discriminant = COALESCE(EXCLUDED.discriminant, releases.discriminant)
1057
RETURNING mbid;
1058
"#,
1059
release_uuid,
1060
+
name,
1061
+
discriminant
1062
)
1063
.fetch_all(&self.sql)
1064
.await?;
···
1074
/// Returns the Uuid of the recording.
1075
async fn insert_recording(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> {
1076
let recording_uuid = Uuid::parse_str(mbid)?;
1077
+
1078
+
// Extract discriminant from recording name for new recordings
1079
+
// Prioritize edition-specific patterns for better quality
1080
+
let discriminant = self
1081
+
.extract_edition_discriminant_from_db(name)
1082
+
.await
1083
+
.or_else(|| {
1084
+
futures::executor::block_on(async { self.extract_discriminant_from_db(name).await })
1085
+
});
1086
+
1087
let res = sqlx::query!(
1088
r#"
1089
+
INSERT INTO recordings (mbid, name, discriminant) VALUES ($1, $2, $3)
1090
+
ON CONFLICT (mbid) DO UPDATE SET
1091
+
name = EXCLUDED.name,
1092
+
discriminant = COALESCE(EXCLUDED.discriminant, recordings.discriminant)
1093
RETURNING mbid;
1094
"#,
1095
recording_uuid,
1096
+
name,
1097
+
discriminant
1098
)
1099
.fetch_all(&self.sql)
1100
.await?;
···
1105
1106
Ok(recording_uuid)
1107
}
1108
+
1109
+
/// Extract discriminant from name using database function
1110
+
async fn extract_discriminant_from_db(&self, name: &str) -> Option<String> {
1111
+
sqlx::query_scalar!("SELECT extract_discriminant($1)", name)
1112
+
.fetch_one(&self.sql)
1113
+
.await
1114
+
.ok()
1115
+
.flatten()
1116
+
}
1117
+
1118
+
/// Extract edition-specific discriminant from name using database function
1119
+
async fn extract_edition_discriminant_from_db(&self, name: &str) -> Option<String> {
1120
+
sqlx::query_scalar!("SELECT extract_edition_discriminant($1)", name)
1121
+
.fetch_one(&self.sql)
1122
+
.await
1123
+
.ok()
1124
+
.flatten()
1125
+
}
1126
+
1127
+
// /// Get base name without discriminant using database function
1128
+
// async fn get_base_name_from_db(&self, name: &str) -> String {
1129
+
// sqlx::query_scalar!("SELECT get_base_name($1)", name)
1130
+
// .fetch_one(&self.sql)
1131
+
// .await
1132
+
// .ok()
1133
+
// .flatten()
1134
+
// .unwrap_or_else(|| name.to_string())
1135
+
// }
1136
1137
pub async fn insert_play(
1138
&self,
···
1144
) -> anyhow::Result<()> {
1145
dbg!("ingesting", play_record);
1146
let play_record = clean(play_record);
1147
+
let mut parsed_artists: Vec<(i32, String)> = vec![];
1148
+
let mut artist_names_raw: Vec<String> = vec![];
1149
+
1150
if let Some(ref artists) = &play_record.artists {
1151
for artist in artists {
1152
let artist_name = artist.artist_name.clone();
1153
+
artist_names_raw.push(artist_name.clone());
1154
+
let artist_mbid = artist.artist_mb_id.as_deref();
1155
+
1156
+
let artist_id = self
1157
+
.find_or_create_artist_with_fuzzy_matching(
1158
+
&artist_name,
1159
+
artist_mbid,
1160
+
&play_record.track_name,
1161
+
play_record.release_name.as_deref(),
1162
+
)
1163
+
.await?;
1164
+
parsed_artists.push((artist_id, artist_name.clone()));
1165
+
}
1166
+
} else if let Some(artist_names) = &play_record.artist_names {
1167
+
for (index, artist_name) in artist_names.iter().enumerate() {
1168
+
artist_names_raw.push(artist_name.clone());
1169
+
1170
+
let artist_mbid_opt = if let Some(ref mbid_list) = play_record.artist_mb_ids {
1171
+
mbid_list.get(index)
1172
} else {
1173
+
None
1174
+
};
1175
+
1176
+
let artist_id = self
1177
+
.find_or_create_artist_with_fuzzy_matching(
1178
+
artist_name,
1179
+
artist_mbid_opt.map(|s| s.as_str()),
1180
+
&play_record.track_name,
1181
+
play_record.release_name.as_deref(),
1182
+
)
1183
+
.await?;
1184
+
parsed_artists.push((artist_id, artist_name.clone()));
1185
}
1186
} else {
1187
+
// No artist information provided - create a fallback artist
1188
+
let fallback_artist_name = Self::generate_fallback_artist(&play_record.track_name);
1189
+
artist_names_raw.push(fallback_artist_name.clone());
1190
1191
+
let artist_id = self
1192
+
.find_or_create_artist_with_fuzzy_matching(
1193
+
&fallback_artist_name,
1194
+
None,
1195
+
&play_record.track_name,
1196
+
play_record.release_name.as_deref(),
1197
+
)
1198
+
.await?;
1199
+
parsed_artists.push((artist_id, fallback_artist_name));
1200
}
1201
1202
// Insert release if missing
···
1226
time::OffsetDateTime::from_unix_timestamp(played_time.as_ref().timestamp())
1227
.unwrap_or_else(|_| time::OffsetDateTime::now_utc());
1228
1229
+
// Extract discriminants from lexicon fields or infer from names
1230
+
// First try lexicon fields, then extract from names with preference for edition-specific patterns
1231
+
// TODO: Enable when types are updated with discriminant fields
1232
+
// let track_discriminant = play_record.track_discriminant.clone().or_else(|| {
1233
+
let track_discriminant = {
1234
+
// Try edition-specific patterns first, then general patterns
1235
+
futures::executor::block_on(async {
1236
+
self.extract_edition_discriminant_from_db(&play_record.track_name)
1237
+
.await
1238
+
.or_else(|| {
1239
+
futures::executor::block_on(async {
1240
+
self.extract_discriminant_from_db(&play_record.track_name)
1241
+
.await
1242
+
})
1243
+
})
1244
+
})
1245
+
};
1246
+
1247
+
// let release_discriminant = play_record.release_discriminant.clone().or_else(|| {
1248
+
let release_discriminant = {
1249
+
if let Some(ref release_name) = play_record.release_name {
1250
+
futures::executor::block_on(async {
1251
+
// Try edition-specific patterns first, then general patterns
1252
+
self.extract_edition_discriminant_from_db(release_name)
1253
+
.await
1254
+
.or_else(|| {
1255
+
futures::executor::block_on(async {
1256
+
self.extract_discriminant_from_db(release_name).await
1257
+
})
1258
+
})
1259
+
})
1260
+
} else {
1261
+
None
1262
+
}
1263
+
};
1264
+
1265
+
// Our main insert into plays with raw artist names and discriminants
1266
+
let artist_names_json = if !artist_names_raw.is_empty() {
1267
+
Some(serde_json::to_value(&artist_names_raw)?)
1268
+
} else {
1269
+
None
1270
+
};
1271
+
1272
sqlx::query!(
1273
r#"
1274
INSERT INTO plays (
1275
uri, cid, did, rkey, isrc, duration, track_name, played_time,
1276
processed_time, release_mbid, release_name, recording_mbid,
1277
+
submission_client_agent, music_service_base_domain, artist_names_raw,
1278
+
track_discriminant, release_discriminant
1279
) VALUES (
1280
$1, $2, $3, $4, $5, $6, $7, $8,
1281
+
NOW(), $9, $10, $11, $12, $13, $14, $15, $16
1282
) ON CONFLICT(uri) DO UPDATE SET
1283
isrc = EXCLUDED.isrc,
1284
duration = EXCLUDED.duration,
···
1289
release_name = EXCLUDED.release_name,
1290
recording_mbid = EXCLUDED.recording_mbid,
1291
submission_client_agent = EXCLUDED.submission_client_agent,
1292
+
music_service_base_domain = EXCLUDED.music_service_base_domain,
1293
+
artist_names_raw = EXCLUDED.artist_names_raw,
1294
+
track_discriminant = EXCLUDED.track_discriminant,
1295
+
release_discriminant = EXCLUDED.release_discriminant;
1296
"#,
1297
uri,
1298
cid,
···
1307
recording_mbid_opt,
1308
play_record.submission_client_agent,
1309
play_record.music_service_base_domain,
1310
+
artist_names_json,
1311
+
track_discriminant,
1312
+
release_discriminant
1313
)
1314
.execute(&self.sql)
1315
.await?;
1316
1317
+
// Insert plays into the extended join table (supports all artists)
1318
+
for (artist_id, artist_name) in &parsed_artists {
1319
sqlx::query!(
1320
r#"
1321
+
INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) VALUES
1322
+
($1, $2, $3)
1323
+
ON CONFLICT (play_uri, artist_id) DO NOTHING;
1324
+
"#,
1325
uri,
1326
+
artist_id,
1327
artist_name
1328
)
1329
.execute(&self.sql)
+51
-24
services/cadet/src/main.rs
+51
-24
services/cadet/src/main.rs
···
17
mod cursor;
18
mod db;
19
mod ingestors;
20
-
mod resolve;
21
mod redis_client;
22
23
fn setup_tracing() {
24
tracing_subscriber::fmt()
···
96
97
// CAR import job worker
98
let car_ingestor = ingestors::car::CarImportIngestor::new(pool.clone());
99
-
let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
100
-
101
match redis_client::RedisClient::new(&redis_url) {
102
Ok(redis_client) => {
103
// Spawn CAR import job processing task
104
tokio::spawn(async move {
105
-
use types::jobs::{CarImportJob, CarImportJobStatus, JobStatus, JobProgress, queue_keys};
106
-
use tracing::{info, error};
107
use chrono::Utc;
108
-
109
info!("Starting CAR import job worker, polling Redis queue...");
110
-
111
loop {
112
// Block for up to 10 seconds waiting for jobs
113
match redis_client.pop_job(queue_keys::CAR_IMPORT_JOBS, 10).await {
114
Ok(Some(job_data)) => {
115
info!("Received CAR import job: {}", job_data);
116
-
117
// Parse job
118
match serde_json::from_str::<CarImportJob>(&job_data) {
119
Ok(job) => {
···
132
blocks_processed: None,
133
}),
134
};
135
-
136
let status_key = queue_keys::job_status_key(&job.request_id);
137
-
if let Ok(status_data) = serde_json::to_string(&processing_status) {
138
-
let _ = redis_client.update_job_status(&status_key, &status_data).await;
139
}
140
-
141
// Process the job
142
-
match car_ingestor.fetch_and_process_identity_car(&job.identity).await {
143
Ok(import_id) => {
144
-
info!("โ
CAR import job completed successfully: {}", job.request_id);
145
-
146
let completed_status = CarImportJobStatus {
147
status: JobStatus::Completed,
148
created_at: job.created_at,
···
150
completed_at: Some(Utc::now()),
151
error_message: None,
152
progress: Some(JobProgress {
153
-
step: format!("CAR import completed: {}", import_id),
154
user_did: None,
155
pds_host: None,
156
car_size_bytes: None,
157
blocks_processed: None,
158
}),
159
};
160
-
161
-
if let Ok(status_data) = serde_json::to_string(&completed_status) {
162
-
let _ = redis_client.update_job_status(&status_key, &status_data).await;
163
}
164
}
165
Err(e) => {
166
-
error!("โ CAR import job failed: {}: {}", job.request_id, e);
167
-
168
let failed_status = CarImportJobStatus {
169
status: JobStatus::Failed,
170
created_at: job.created_at,
···
173
error_message: Some(e.to_string()),
174
progress: None,
175
};
176
-
177
-
if let Ok(status_data) = serde_json::to_string(&failed_status) {
178
-
let _ = redis_client.update_job_status(&status_key, &status_data).await;
179
}
180
}
181
}
···
17
mod cursor;
18
mod db;
19
mod ingestors;
20
mod redis_client;
21
+
mod resolve;
22
23
fn setup_tracing() {
24
tracing_subscriber::fmt()
···
96
97
// CAR import job worker
98
let car_ingestor = ingestors::car::CarImportIngestor::new(pool.clone());
99
+
let redis_url =
100
+
std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
101
+
102
match redis_client::RedisClient::new(&redis_url) {
103
Ok(redis_client) => {
104
// Spawn CAR import job processing task
105
tokio::spawn(async move {
106
use chrono::Utc;
107
+
use ingestors::car::jobs::{
108
+
queue_keys, CarImportJob, CarImportJobStatus, JobProgress, JobStatus,
109
+
};
110
+
use tracing::{error, info};
111
+
112
info!("Starting CAR import job worker, polling Redis queue...");
113
+
114
loop {
115
// Block for up to 10 seconds waiting for jobs
116
match redis_client.pop_job(queue_keys::CAR_IMPORT_JOBS, 10).await {
117
Ok(Some(job_data)) => {
118
info!("Received CAR import job: {}", job_data);
119
+
120
// Parse job
121
match serde_json::from_str::<CarImportJob>(&job_data) {
122
Ok(job) => {
···
135
blocks_processed: None,
136
}),
137
};
138
+
139
let status_key = queue_keys::job_status_key(&job.request_id);
140
+
if let Ok(status_data) =
141
+
serde_json::to_string(&processing_status)
142
+
{
143
+
let _ = redis_client
144
+
.update_job_status(&status_key, &status_data)
145
+
.await;
146
}
147
+
148
// Process the job
149
+
match car_ingestor
150
+
.fetch_and_process_identity_car(&job.identity)
151
+
.await
152
+
{
153
Ok(import_id) => {
154
+
info!(
155
+
"โ
CAR import job completed successfully: {}",
156
+
job.request_id
157
+
);
158
+
159
let completed_status = CarImportJobStatus {
160
status: JobStatus::Completed,
161
created_at: job.created_at,
···
163
completed_at: Some(Utc::now()),
164
error_message: None,
165
progress: Some(JobProgress {
166
+
step: format!(
167
+
"CAR import completed: {}",
168
+
import_id
169
+
),
170
user_did: None,
171
pds_host: None,
172
car_size_bytes: None,
173
blocks_processed: None,
174
}),
175
};
176
+
177
+
if let Ok(status_data) =
178
+
serde_json::to_string(&completed_status)
179
+
{
180
+
let _ = redis_client
181
+
.update_job_status(&status_key, &status_data)
182
+
.await;
183
}
184
}
185
Err(e) => {
186
+
error!(
187
+
"โ CAR import job failed: {}: {}",
188
+
job.request_id, e
189
+
);
190
+
191
let failed_status = CarImportJobStatus {
192
status: JobStatus::Failed,
193
created_at: job.created_at,
···
196
error_message: Some(e.to_string()),
197
progress: None,
198
};
199
+
200
+
if let Ok(status_data) =
201
+
serde_json::to_string(&failed_status)
202
+
{
203
+
let _ = redis_client
204
+
.update_job_status(&status_key, &status_data)
205
+
.await;
206
}
207
}
208
}
+3
-3
services/cadet/src/redis_client.rs
+3
-3
services/cadet/src/redis_client.rs
···
20
pub async fn pop_job(&self, queue_key: &str, timeout_seconds: u64) -> Result<Option<String>> {
21
let mut conn = self.get_connection().await?;
22
let result: Option<Vec<String>> = conn.brpop(queue_key, timeout_seconds as f64).await?;
23
-
24
match result {
25
Some(mut items) if items.len() >= 2 => {
26
// brpop returns [queue_name, item], we want the item
27
Ok(Some(items.remove(1)))
28
}
29
-
_ => Ok(None)
30
}
31
}
32
···
36
let _: () = conn.set(status_key, status_data).await?;
37
Ok(())
38
}
39
-
}
···
20
pub async fn pop_job(&self, queue_key: &str, timeout_seconds: u64) -> Result<Option<String>> {
21
let mut conn = self.get_connection().await?;
22
let result: Option<Vec<String>> = conn.brpop(queue_key, timeout_seconds as f64).await?;
23
+
24
match result {
25
Some(mut items) if items.len() >= 2 => {
26
// brpop returns [queue_name, item], we want the item
27
Ok(Some(items.remove(1)))
28
}
29
+
_ => Ok(None),
30
}
31
}
32
···
36
let _: () = conn.set(status_key, status_data).await?;
37
Ok(())
38
}
39
+
}
+55
services/cadet/target.sh
+55
services/cadet/target.sh
···
···
1
+
#!/bin/bash
2
+
set -e
3
+
4
+
# Debug: Print all available build variables
5
+
echo "DEBUG: TARGETPLATFORM=$TARGETPLATFORM"
6
+
echo "DEBUG: BUILDPLATFORM=$BUILDPLATFORM"
7
+
echo "DEBUG: TARGETARCH=$TARGETARCH"
8
+
echo "DEBUG: TARGETOS=$TARGETOS"
9
+
10
+
# Use TARGETARCH directly (more reliable than TARGETPLATFORM)
11
+
TARGET_ARCH_VAR="${TARGETARCH:-}"
12
+
13
+
# If TARGETARCH is not set, try to extract from TARGETPLATFORM
14
+
if [ -z "$TARGET_ARCH_VAR" ] && [ -n "$TARGETPLATFORM" ]; then
15
+
TARGET_ARCH_VAR=$(echo "$TARGETPLATFORM" | cut -d'/' -f2)
16
+
echo "DEBUG: Extracted TARGET_ARCH_VAR=$TARGET_ARCH_VAR from TARGETPLATFORM"
17
+
fi
18
+
19
+
# Final fallback: detect from uname
20
+
if [ -z "$TARGET_ARCH_VAR" ]; then
21
+
ARCH=$(uname -m)
22
+
case "$ARCH" in
23
+
"x86_64")
24
+
TARGET_ARCH_VAR="amd64"
25
+
;;
26
+
"aarch64")
27
+
TARGET_ARCH_VAR="arm64"
28
+
;;
29
+
*)
30
+
echo "ERROR: Could not detect target architecture. uname -m returned: $ARCH"
31
+
echo "Available variables: TARGETARCH=$TARGETARCH, TARGETPLATFORM=$TARGETPLATFORM"
32
+
exit 1
33
+
;;
34
+
esac
35
+
echo "DEBUG: Detected TARGET_ARCH_VAR=$TARGET_ARCH_VAR from uname"
36
+
fi
37
+
38
+
# Map architecture to Rust target
39
+
case "$TARGET_ARCH_VAR" in
40
+
"amd64")
41
+
export RUST_TARGET="x86_64-unknown-linux-gnu"
42
+
export TARGET_ARCH="amd64"
43
+
;;
44
+
"arm64")
45
+
export RUST_TARGET="aarch64-unknown-linux-gnu"
46
+
export TARGET_ARCH="arm64"
47
+
;;
48
+
*)
49
+
echo "ERROR: Unsupported target architecture: $TARGET_ARCH_VAR"
50
+
echo "Supported architectures: amd64, arm64"
51
+
exit 1
52
+
;;
53
+
esac
54
+
55
+
echo "SUCCESS: Using RUST_TARGET=$RUST_TARGET, TARGET_ARCH=$TARGET_ARCH"
-226
services/migrations/20241220000001_initial_schema.sql
-226
services/migrations/20241220000001_initial_schema.sql
···
1
-
-- Initial comprehensive schema for Teal music platform
2
-
-- Based on services/cadet/sql/base.sql
3
-
4
-
CREATE TABLE artists (
5
-
mbid UUID PRIMARY KEY,
6
-
name TEXT NOT NULL,
7
-
play_count INTEGER DEFAULT 0
8
-
);
9
-
10
-
-- releases are synologous to 'albums'
11
-
CREATE TABLE releases (
12
-
mbid UUID PRIMARY KEY,
13
-
name TEXT NOT NULL,
14
-
play_count INTEGER DEFAULT 0
15
-
);
16
-
17
-
-- recordings are synologous to 'tracks' BUT tracks can be in multiple releases!
18
-
CREATE TABLE recordings (
19
-
mbid UUID PRIMARY KEY,
20
-
name TEXT NOT NULL,
21
-
play_count INTEGER DEFAULT 0
22
-
);
23
-
24
-
CREATE TABLE plays (
25
-
uri TEXT PRIMARY KEY,
26
-
did TEXT NOT NULL,
27
-
rkey TEXT NOT NULL,
28
-
cid TEXT NOT NULL,
29
-
isrc TEXT,
30
-
duration INTEGER,
31
-
track_name TEXT NOT NULL,
32
-
played_time TIMESTAMP WITH TIME ZONE,
33
-
processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
34
-
release_mbid UUID,
35
-
release_name TEXT,
36
-
recording_mbid UUID,
37
-
submission_client_agent TEXT,
38
-
music_service_base_domain TEXT,
39
-
origin_url TEXT,
40
-
FOREIGN KEY (release_mbid) REFERENCES releases (mbid),
41
-
FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid)
42
-
);
43
-
44
-
CREATE INDEX idx_plays_release_mbid ON plays (release_mbid);
45
-
CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid);
46
-
CREATE INDEX idx_plays_played_time ON plays (played_time);
47
-
CREATE INDEX idx_plays_did ON plays (did);
48
-
49
-
CREATE TABLE play_to_artists (
50
-
play_uri TEXT, -- references plays(uri)
51
-
artist_mbid UUID REFERENCES artists (mbid),
52
-
artist_name TEXT, -- storing here for ease of use when joining
53
-
PRIMARY KEY (play_uri, artist_mbid),
54
-
FOREIGN KEY (play_uri) REFERENCES plays (uri)
55
-
);
56
-
57
-
CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid);
58
-
59
-
-- Profiles table
60
-
CREATE TABLE profiles (
61
-
did TEXT PRIMARY KEY,
62
-
handle TEXT,
63
-
display_name TEXT,
64
-
description TEXT,
65
-
description_facets JSONB,
66
-
avatar TEXT, -- IPLD of the image, bafy...
67
-
banner TEXT,
68
-
created_at TIMESTAMP WITH TIME ZONE
69
-
);
70
-
71
-
-- User featured items table
72
-
CREATE TABLE featured_items (
73
-
did TEXT PRIMARY KEY,
74
-
mbid TEXT NOT NULL,
75
-
type TEXT NOT NULL
76
-
);
77
-
78
-
-- Statii table (status records)
79
-
CREATE TABLE statii (
80
-
uri TEXT PRIMARY KEY,
81
-
did TEXT NOT NULL,
82
-
rkey TEXT NOT NULL,
83
-
cid TEXT NOT NULL,
84
-
record JSONB NOT NULL,
85
-
indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
86
-
);
87
-
88
-
CREATE INDEX idx_statii_did_rkey ON statii (did, rkey);
89
-
90
-
-- Materialized view for artists' play counts
91
-
CREATE MATERIALIZED VIEW mv_artist_play_counts AS
92
-
SELECT
93
-
a.mbid AS artist_mbid,
94
-
a.name AS artist_name,
95
-
COUNT(p.uri) AS play_count
96
-
FROM
97
-
artists a
98
-
LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
99
-
LEFT JOIN plays p ON p.uri = pta.play_uri
100
-
GROUP BY
101
-
a.mbid,
102
-
a.name;
103
-
104
-
CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid);
105
-
106
-
-- Materialized view for releases' play counts
107
-
CREATE MATERIALIZED VIEW mv_release_play_counts AS
108
-
SELECT
109
-
r.mbid AS release_mbid,
110
-
r.name AS release_name,
111
-
COUNT(p.uri) AS play_count
112
-
FROM
113
-
releases r
114
-
LEFT JOIN plays p ON p.release_mbid = r.mbid
115
-
GROUP BY
116
-
r.mbid,
117
-
r.name;
118
-
119
-
CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid);
120
-
121
-
-- Materialized view for recordings' play counts
122
-
CREATE MATERIALIZED VIEW mv_recording_play_counts AS
123
-
SELECT
124
-
rec.mbid AS recording_mbid,
125
-
rec.name AS recording_name,
126
-
COUNT(p.uri) AS play_count
127
-
FROM
128
-
recordings rec
129
-
LEFT JOIN plays p ON p.recording_mbid = rec.mbid
130
-
GROUP BY
131
-
rec.mbid,
132
-
rec.name;
133
-
134
-
CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid);
135
-
136
-
-- Global play count materialized view
137
-
CREATE MATERIALIZED VIEW mv_global_play_count AS
138
-
SELECT
139
-
COUNT(uri) AS total_plays,
140
-
COUNT(DISTINCT did) AS unique_listeners
141
-
FROM plays;
142
-
143
-
CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays);
144
-
145
-
-- Top artists in the last 30 days
146
-
CREATE MATERIALIZED VIEW mv_top_artists_30days AS
147
-
SELECT
148
-
a.mbid AS artist_mbid,
149
-
a.name AS artist_name,
150
-
COUNT(p.uri) AS play_count
151
-
FROM artists a
152
-
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
153
-
INNER JOIN plays p ON p.uri = pta.play_uri
154
-
WHERE p.played_time >= NOW() - INTERVAL '30 days'
155
-
GROUP BY a.mbid, a.name
156
-
ORDER BY COUNT(p.uri) DESC;
157
-
158
-
-- Top releases in the last 30 days
159
-
CREATE MATERIALIZED VIEW mv_top_releases_30days AS
160
-
SELECT
161
-
r.mbid AS release_mbid,
162
-
r.name AS release_name,
163
-
COUNT(p.uri) AS play_count
164
-
FROM releases r
165
-
INNER JOIN plays p ON p.release_mbid = r.mbid
166
-
WHERE p.played_time >= NOW() - INTERVAL '30 days'
167
-
GROUP BY r.mbid, r.name
168
-
ORDER BY COUNT(p.uri) DESC;
169
-
170
-
-- Top artists for user in the last 30 days
171
-
CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS
172
-
SELECT
173
-
prof.did,
174
-
a.mbid AS artist_mbid,
175
-
a.name AS artist_name,
176
-
COUNT(p.uri) AS play_count
177
-
FROM artists a
178
-
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
179
-
INNER JOIN plays p ON p.uri = pta.play_uri
180
-
INNER JOIN profiles prof ON prof.did = p.did
181
-
WHERE p.played_time >= NOW() - INTERVAL '30 days'
182
-
GROUP BY prof.did, a.mbid, a.name
183
-
ORDER BY COUNT(p.uri) DESC;
184
-
185
-
-- Top artists for user in the last 7 days
186
-
CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS
187
-
SELECT
188
-
prof.did,
189
-
a.mbid AS artist_mbid,
190
-
a.name AS artist_name,
191
-
COUNT(p.uri) AS play_count
192
-
FROM artists a
193
-
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
194
-
INNER JOIN plays p ON p.uri = pta.play_uri
195
-
INNER JOIN profiles prof ON prof.did = p.did
196
-
WHERE p.played_time >= NOW() - INTERVAL '7 days'
197
-
GROUP BY prof.did, a.mbid, a.name
198
-
ORDER BY COUNT(p.uri) DESC;
199
-
200
-
-- Top releases for user in the last 30 days
201
-
CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS
202
-
SELECT
203
-
prof.did,
204
-
r.mbid AS release_mbid,
205
-
r.name AS release_name,
206
-
COUNT(p.uri) AS play_count
207
-
FROM releases r
208
-
INNER JOIN plays p ON p.release_mbid = r.mbid
209
-
INNER JOIN profiles prof ON prof.did = p.did
210
-
WHERE p.played_time >= NOW() - INTERVAL '30 days'
211
-
GROUP BY prof.did, r.mbid, r.name
212
-
ORDER BY COUNT(p.uri) DESC;
213
-
214
-
-- Top releases for user in the last 7 days
215
-
CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS
216
-
SELECT
217
-
prof.did,
218
-
r.mbid AS release_mbid,
219
-
r.name AS release_name,
220
-
COUNT(p.uri) AS play_count
221
-
FROM releases r
222
-
INNER JOIN plays p ON p.release_mbid = r.mbid
223
-
INNER JOIN profiles prof ON prof.did = p.did
224
-
WHERE p.played_time >= NOW() - INTERVAL '7 days'
225
-
GROUP BY prof.did, r.mbid, r.name
226
-
ORDER BY COUNT(p.uri) DESC;
···
-59
services/migrations/20241220000002_car_import_tables.sql
-59
services/migrations/20241220000002_car_import_tables.sql
···
1
-
-- CAR import functionality tables
2
-
-- For handling AT Protocol CAR file imports and processing
3
-
4
-
-- Tracks uploaded CAR files that are queued for processing
5
-
CREATE TABLE IF NOT EXISTS car_import_requests (
6
-
import_id TEXT PRIMARY KEY,
7
-
car_data_base64 TEXT NOT NULL,
8
-
status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed
9
-
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
10
-
processed_at TIMESTAMP WITH TIME ZONE,
11
-
error_message TEXT,
12
-
file_size_bytes INTEGER,
13
-
block_count INTEGER,
14
-
extracted_records_count INTEGER DEFAULT 0
15
-
);
16
-
17
-
CREATE INDEX idx_car_import_requests_status ON car_import_requests (status);
18
-
CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at);
19
-
20
-
-- Tracks raw IPLD blocks extracted from CAR files
21
-
CREATE TABLE IF NOT EXISTS car_blocks (
22
-
cid TEXT PRIMARY KEY,
23
-
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
24
-
block_data BYTEA NOT NULL,
25
-
decoded_successfully BOOLEAN DEFAULT FALSE,
26
-
collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc.
27
-
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
28
-
);
29
-
30
-
CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id);
31
-
CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type);
32
-
33
-
-- Tracks records extracted from CAR imports that were successfully processed
34
-
CREATE TABLE IF NOT EXISTS car_extracted_records (
35
-
id SERIAL PRIMARY KEY,
36
-
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
37
-
cid TEXT NOT NULL REFERENCES car_blocks(cid),
38
-
collection_type TEXT NOT NULL,
39
-
record_uri TEXT, -- AT URI if applicable (e.g., for play records)
40
-
synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123')
41
-
rkey TEXT,
42
-
extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
43
-
processing_notes TEXT
44
-
);
45
-
46
-
CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id);
47
-
CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type);
48
-
CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri);
49
-
50
-
-- Tracks import metadata and commit information
51
-
CREATE TABLE IF NOT EXISTS car_import_metadata (
52
-
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
53
-
metadata_key TEXT NOT NULL,
54
-
metadata_value JSONB NOT NULL,
55
-
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
56
-
PRIMARY KEY (import_id, metadata_key)
57
-
);
58
-
59
-
CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);
···
-34
services/rocketman/Cargo.toml
-34
services/rocketman/Cargo.toml
···
1
-
[package]
2
-
name = "rocketman"
3
-
version = "0.2.3"
4
-
edition = "2021"
5
-
6
-
license = "MIT"
7
-
authors = ["Natalie B. <nat@natalie.sh>"]
8
-
repository = "https://github.com/espeon/cadet"
9
-
10
-
readme = "readme.md"
11
-
12
-
description = "A modular(ish) jetstream consumer."
13
-
14
-
[dependencies]
15
-
tokio.workspace = true
16
-
tokio-tungstenite.workspace = true
17
-
futures-util = "0.3"
18
-
url.workspace = true
19
-
rand.workspace = true
20
-
tracing.workspace = true
21
-
tracing-subscriber.workspace = true
22
-
metrics.workspace = true
23
-
derive_builder = "0.20.2"
24
-
bon = "3.3.2"
25
-
serde = { workspace = true, features = ["derive"] }
26
-
serde_json.workspace = true
27
-
flume.workspace = true
28
-
anyhow.workspace = true
29
-
async-trait.workspace = true
30
-
zstd = { version = "0.13.3", optional = true }
31
-
32
-
[features]
33
-
default = ["zstd"]
34
-
zstd = ["dep:zstd"]
···
-77
services/rocketman/examples/spew-bsky-posts.rs
-77
services/rocketman/examples/spew-bsky-posts.rs
···
1
-
use rocketman::{
2
-
connection::JetstreamConnection,
3
-
handler,
4
-
ingestion::LexiconIngestor,
5
-
options::JetstreamOptions,
6
-
types::event::{ Event, Commit },
7
-
};
8
-
use serde_json::Value;
9
-
use std::{
10
-
collections::HashMap,
11
-
sync::Arc,
12
-
sync::Mutex,
13
-
};
14
-
use async_trait::async_trait;
15
-
16
-
#[tokio::main]
17
-
async fn main() {
18
-
// init the builder
19
-
let opts = JetstreamOptions::builder()
20
-
// your EXACT nsids
21
-
.wanted_collections(vec!["app.bsky.feed.post".to_string()])
22
-
.build();
23
-
// create the jetstream connector
24
-
let jetstream = JetstreamConnection::new(opts);
25
-
26
-
// create your ingestors
27
-
let mut ingestors: HashMap<String, Box<dyn LexiconIngestor + Send + Sync>> = HashMap::new();
28
-
ingestors.insert(
29
-
// your EXACT nsid
30
-
"app.bsky.feed.post".to_string(),
31
-
Box::new(MyCoolIngestor),
32
-
);
33
-
34
-
35
-
// tracks the last message we've processed
36
-
let cursor: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
37
-
38
-
// get channels
39
-
let msg_rx = jetstream.get_msg_rx();
40
-
let reconnect_tx = jetstream.get_reconnect_tx();
41
-
42
-
// spawn a task to process messages from the queue.
43
-
// this is a simple implementation, you can use a more complex one based on needs.
44
-
let c_cursor = cursor.clone();
45
-
tokio::spawn(async move {
46
-
while let Ok(message) = msg_rx.recv_async().await {
47
-
if let Err(e) =
48
-
handler::handle_message(message, &ingestors, reconnect_tx.clone(), c_cursor.clone())
49
-
.await
50
-
{
51
-
eprintln!("Error processing message: {}", e);
52
-
};
53
-
}
54
-
});
55
-
56
-
// connect to jetstream
57
-
// retries internally, but may fail if there is an extreme error.
58
-
if let Err(e) = jetstream.connect(cursor.clone()).await {
59
-
eprintln!("Failed to connect to Jetstream: {}", e);
60
-
std::process::exit(1);
61
-
}
62
-
}
63
-
64
-
pub struct MyCoolIngestor;
65
-
66
-
/// A cool ingestor implementation. Will just print the message. Does not do verification.
67
-
#[async_trait]
68
-
impl LexiconIngestor for MyCoolIngestor {
69
-
async fn ingest(&self, message: Event<Value>) -> anyhow::Result<()> {
70
-
if let Some(Commit { record: Some(record), .. }) = message.commit {
71
-
if let Some(Value::String(text)) = record.get("text") {
72
-
println!("{text:?}");
73
-
}
74
-
}
75
-
Ok(())
76
-
}
77
-
}
···
-11
services/rocketman/package.json
-11
services/rocketman/package.json
-74
services/rocketman/readme.md
-74
services/rocketman/readme.md
···
1
-
## Rocketman
2
-
3
-
A modular(ish) jetstream consumer. Backed by Tungstenite.
4
-
5
-
6
-
### Installation
7
-
```toml
8
-
[dependencies]
9
-
rocketman = "latest" # pyt the latest version here
10
-
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
11
-
```
12
-
### Usage
13
-
```rs
14
-
#[tokio::main]
15
-
async fn main() {
16
-
// init the builder
17
-
let opts = JetstreamOptions::builder()
18
-
// your EXACT nsids
19
-
.wanted_collections(vec!["com.example.cool.nsid".to_string()])
20
-
.build();
21
-
// create the jetstream connector
22
-
let jetstream = JetstreamConnection::new(opts);
23
-
24
-
// create your ingestors
25
-
let mut ingestors: HashMap<String, Box<dyn LexiconIngestor + Send + Sync>> = HashMap::new();
26
-
ingestors.insert(
27
-
// your EXACT nsid
28
-
"com.example.cool.nsid".to_string(),
29
-
Box::new(MyCoolIngestor),
30
-
);
31
-
32
-
33
-
// tracks the last message we've processed
34
-
let cursor: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
35
-
36
-
// get channels
37
-
let msg_rx = jetstream.get_msg_rx();
38
-
let reconnect_tx = jetstream.get_reconnect_tx();
39
-
40
-
// spawn a task to process messages from the queue.
41
-
// this is a simple implementation, you can use a more complex one based on needs.
42
-
let c_cursor = cursor.clone();
43
-
tokio::spawn(async move {
44
-
while let Ok(message) = msg_rx.recv_async().await {
45
-
if let Err(e) =
46
-
handler::handle_message(message, &ingestors, reconnect_tx.clone(), c_cursor.clone())
47
-
.await
48
-
{
49
-
error!("Error processing message: {}", e);
50
-
};
51
-
}
52
-
});
53
-
54
-
// connect to jetstream
55
-
// retries internally, but may fail if there is an extreme error.
56
-
if let Err(e) = jetstream.connect(cursor.clone()).await {
57
-
error!("Failed to connect to Jetstream: {}", e);
58
-
std::process::exit(1);
59
-
}
60
-
}
61
-
62
-
pub struct MyCoolIngestor;
63
-
64
-
/// A cool ingestor implementation. Will just print the message. Does not do verification.
65
-
impl LexiconIngestor for MyCoolIngestor {
66
-
async fn ingest(&self, message: Event<Value>) -> Result<()> {
67
-
info!("{:?}", message);
68
-
// Process message for default lexicon.
69
-
Ok(())
70
-
}
71
-
}
72
-
```
73
-
### gratz
74
-
Based heavily on [phil's jetstream consumer on atcosm constellation.](https://github.com/atcosm/links/blob/main/constellation/src/consumer/jetstream.rs)
···
-335
services/rocketman/src/connection.rs
-335
services/rocketman/src/connection.rs
···
1
-
use flume::{Receiver, Sender};
2
-
use futures_util::StreamExt;
3
-
use metrics::{counter, describe_counter, describe_histogram, histogram, Unit};
4
-
use std::cmp::{max, min};
5
-
use std::sync::{Arc, Mutex};
6
-
use std::time::Instant;
7
-
use tokio::time::{sleep, Duration};
8
-
use tokio_tungstenite::{connect_async, tungstenite::Message};
9
-
use tracing::{error, info};
10
-
use url::Url;
11
-
12
-
use crate::options::JetstreamOptions;
13
-
use crate::time::system_time::SystemTimeProvider;
14
-
use crate::time::TimeProvider;
15
-
16
-
pub struct JetstreamConnection {
17
-
pub opts: JetstreamOptions,
18
-
reconnect_tx: flume::Sender<()>,
19
-
reconnect_rx: flume::Receiver<()>,
20
-
msg_tx: flume::Sender<Message>,
21
-
msg_rx: flume::Receiver<Message>,
22
-
}
23
-
24
-
impl JetstreamConnection {
25
-
pub fn new(opts: JetstreamOptions) -> Self {
26
-
let (reconnect_tx, reconnect_rx) = flume::bounded(opts.bound);
27
-
let (msg_tx, msg_rx) = flume::bounded(opts.bound);
28
-
Self {
29
-
opts,
30
-
reconnect_tx,
31
-
reconnect_rx,
32
-
msg_tx,
33
-
msg_rx,
34
-
}
35
-
}
36
-
37
-
pub fn get_reconnect_tx(&self) -> Sender<()> {
38
-
self.reconnect_tx.clone()
39
-
}
40
-
41
-
pub fn get_msg_rx(&self) -> Receiver<Message> {
42
-
self.msg_rx.clone()
43
-
}
44
-
45
-
fn build_ws_url(&self, cursor: Arc<Mutex<Option<u64>>>) -> String {
46
-
let mut url = Url::parse(&self.opts.ws_url.to_string()).unwrap();
47
-
48
-
// Append query params
49
-
if let Some(ref cols) = self.opts.wanted_collections {
50
-
for col in cols {
51
-
url.query_pairs_mut().append_pair("wantedCollections", col);
52
-
}
53
-
}
54
-
if let Some(ref dids) = self.opts.wanted_dids {
55
-
for did in dids {
56
-
url.query_pairs_mut().append_pair("wantedDids", did);
57
-
}
58
-
}
59
-
if let Some(cursor) = cursor.lock().unwrap().as_ref() {
60
-
url.query_pairs_mut()
61
-
.append_pair("cursor", &cursor.to_string());
62
-
}
63
-
#[cfg(feature = "zstd")]
64
-
if self.opts.compress {
65
-
url.query_pairs_mut().append_pair("compress", "true");
66
-
}
67
-
68
-
url.to_string()
69
-
}
70
-
71
-
pub async fn connect(
72
-
&self,
73
-
cursor: Arc<Mutex<Option<u64>>>,
74
-
) -> Result<(), Box<dyn std::error::Error>> {
75
-
describe_counter!(
76
-
"jetstream.connection.attempt",
77
-
Unit::Count,
78
-
"attempts to connect to jetstream service"
79
-
);
80
-
describe_counter!(
81
-
"jetstream.connection.error",
82
-
Unit::Count,
83
-
"errors connecting to jetstream service"
84
-
);
85
-
describe_histogram!(
86
-
"jetstream.connection.duration",
87
-
Unit::Seconds,
88
-
"Time connected to jetstream service"
89
-
);
90
-
describe_counter!(
91
-
"jetstream.connection.reconnect",
92
-
Unit::Count,
93
-
"reconnects to jetstream service"
94
-
);
95
-
let mut retry_interval = 1;
96
-
97
-
let time_provider = SystemTimeProvider::new();
98
-
99
-
let mut start_time = time_provider.now();
100
-
101
-
loop {
102
-
counter!("jetstream.connection.attempt").increment(1);
103
-
info!("Connecting to {}", self.opts.ws_url);
104
-
let start = Instant::now();
105
-
106
-
let ws_url = self.build_ws_url(cursor.clone());
107
-
108
-
match connect_async(ws_url).await {
109
-
Ok((ws_stream, response)) => {
110
-
let elapsed = start.elapsed();
111
-
info!("Connected. HTTP status: {}", response.status());
112
-
113
-
let (_, mut read) = ws_stream.split();
114
-
115
-
loop {
116
-
// Inner loop to handle messages, reconnect signals, and receive timeout
117
-
let receive_timeout =
118
-
sleep(Duration::from_secs(self.opts.timeout_time_sec as u64));
119
-
tokio::pin!(receive_timeout);
120
-
121
-
loop {
122
-
tokio::select! {
123
-
message_result = read.next() => {
124
-
match message_result {
125
-
Some(message) => {
126
-
// Reset timeout on message received
127
-
receive_timeout.as_mut().reset(tokio::time::Instant::now() + Duration::from_secs(self.opts.timeout_time_sec as u64));
128
-
129
-
histogram!("jetstream.connection.duration").record(elapsed.as_secs_f64());
130
-
match message {
131
-
Ok(message) => {
132
-
if let Err(err) = self.msg_tx.send_async(message).await {
133
-
counter!("jetstream.error").increment(1);
134
-
error!("Failed to queue message: {}", err);
135
-
}
136
-
}
137
-
Err(e) => {
138
-
counter!("jetstream.error").increment(1);
139
-
error!("Error: {}", e);
140
-
}
141
-
}
142
-
}
143
-
None => {
144
-
info!("Stream closed by server.");
145
-
counter!("jetstream.connection.reconnect").increment(1);
146
-
break; // Stream ended, break inner loop to reconnect
147
-
}
148
-
}
149
-
}
150
-
_ = self.reconnect_rx.recv_async() => {
151
-
info!("Reconnect signal received.");
152
-
counter!("jetstream.connection.reconnect").increment(1);
153
-
break;
154
-
}
155
-
_ = &mut receive_timeout => {
156
-
// last final poll, just in case
157
-
match read.next().await {
158
-
Some(Ok(message)) => {
159
-
if let Err(err) = self.msg_tx.send_async(message).await {
160
-
counter!("jetstream.error").increment(1);
161
-
error!("Failed to queue message: {}", err);
162
-
}
163
-
// Reset timeout to continue
164
-
receive_timeout.as_mut().reset(tokio::time::Instant::now() + Duration::from_secs(self.opts.timeout_time_sec as u64));
165
-
}
166
-
Some(Err(e)) => {
167
-
counter!("jetstream.error").increment(1);
168
-
error!("Error receiving message during final poll: {}", e);
169
-
counter!("jetstream.connection.reconnect").increment(1);
170
-
break;
171
-
}
172
-
None => {
173
-
info!("No commits received in {} seconds, reconnecting.", self.opts.timeout_time_sec);
174
-
counter!("jetstream.connection.reconnect").increment(1);
175
-
break;
176
-
}
177
-
}
178
-
}
179
-
}
180
-
}
181
-
}
182
-
}
183
-
Err(e) => {
184
-
let elapsed_time = time_provider.elapsed(start_time);
185
-
// reset if time connected > the time we set
186
-
if elapsed_time.as_secs() > self.opts.max_retry_interval_seconds {
187
-
retry_interval = 0;
188
-
start_time = time_provider.now();
189
-
}
190
-
counter!("jetstream.connection.error").increment(1);
191
-
error!("Connection error: {}", e);
192
-
}
193
-
}
194
-
195
-
let sleep_time = max(1, min(self.opts.max_retry_interval_seconds, retry_interval));
196
-
info!("Reconnecting in {} seconds...", sleep_time);
197
-
sleep(Duration::from_secs(sleep_time)).await;
198
-
199
-
if retry_interval > self.opts.max_retry_interval_seconds {
200
-
retry_interval = self.opts.max_retry_interval_seconds;
201
-
} else {
202
-
retry_interval *= 2;
203
-
}
204
-
}
205
-
}
206
-
207
-
pub fn force_reconnect(&self) -> Result<(), flume::SendError<()>> {
208
-
info!("Force reconnect requested.");
209
-
self.reconnect_tx.send(()) // Send a reconnect signal
210
-
}
211
-
}
212
-
213
-
#[cfg(test)]
214
-
mod tests {
215
-
use super::*;
216
-
use std::sync::{Arc, Mutex};
217
-
use tokio::task;
218
-
use tokio::time::{timeout, Duration};
219
-
use tokio_tungstenite::tungstenite::Message;
220
-
221
-
#[test]
222
-
fn test_build_ws_url() {
223
-
let opts = JetstreamOptions {
224
-
wanted_collections: Some(vec!["col1".to_string(), "col2".to_string()]),
225
-
wanted_dids: Some(vec!["did1".to_string()]),
226
-
..Default::default()
227
-
};
228
-
let connection = JetstreamConnection::new(opts);
229
-
230
-
let test = Arc::new(Mutex::new(Some(8373)));
231
-
232
-
let url = connection.build_ws_url(test);
233
-
234
-
assert!(url.starts_with("wss://"));
235
-
assert!(url.contains("cursor=8373"));
236
-
assert!(url.contains("wantedCollections=col1"));
237
-
assert!(url.contains("wantedCollections=col2"));
238
-
assert!(url.contains("wantedDids=did1"));
239
-
}
240
-
241
-
#[tokio::test]
242
-
async fn test_force_reconnect() {
243
-
let opts = JetstreamOptions::default();
244
-
let connection = JetstreamConnection::new(opts);
245
-
246
-
// Spawn a task to listen for the reconnect signal
247
-
let reconnect_rx = connection.reconnect_rx.clone();
248
-
let recv_task = task::spawn(async move {
249
-
reconnect_rx
250
-
.recv_async()
251
-
.await
252
-
.expect("Failed to receive reconnect signal");
253
-
});
254
-
255
-
connection
256
-
.force_reconnect()
257
-
.expect("Failed to send reconnect signal");
258
-
259
-
// Ensure reconnect signal was received
260
-
assert!(recv_task.await.is_ok());
261
-
}
262
-
263
-
#[tokio::test]
264
-
async fn test_message_queue() {
265
-
let opts = JetstreamOptions::default();
266
-
let connection = JetstreamConnection::new(opts);
267
-
268
-
let msg_rx = connection.get_msg_rx();
269
-
let msg = Message::Text("test message".into());
270
-
271
-
// Send a message to the queue
272
-
connection
273
-
.msg_tx
274
-
.send_async(msg.clone())
275
-
.await
276
-
.expect("Failed to send message");
277
-
278
-
// Receive and verify the message
279
-
let received = msg_rx
280
-
.recv_async()
281
-
.await
282
-
.expect("Failed to receive message");
283
-
assert_eq!(received, msg);
284
-
}
285
-
286
-
#[tokio::test]
287
-
async fn test_connection_retries_on_failure() {
288
-
let opts = JetstreamOptions::default();
289
-
let connection = Arc::new(JetstreamConnection::new(opts));
290
-
291
-
let cursor = Arc::new(Mutex::new(None));
292
-
293
-
// Timeout to prevent infinite loop
294
-
let result = timeout(Duration::from_secs(3), connection.connect(cursor)).await;
295
-
296
-
assert!(result.is_err(), "Expected timeout due to retry logic");
297
-
}
298
-
299
-
#[tokio::test]
300
-
async fn test_reconnect_after_receive_timeout() {
301
-
use tokio::net::TcpListener;
302
-
use tokio_tungstenite::accept_async;
303
-
304
-
let opts = JetstreamOptions {
305
-
ws_url: crate::endpoints::JetstreamEndpoints::Custom("ws://127.0.0.1:9001".to_string()),
306
-
bound: 5,
307
-
max_retry_interval_seconds: 1,
308
-
..Default::default()
309
-
};
310
-
let connection = JetstreamConnection::new(opts);
311
-
let cursor = Arc::new(Mutex::new(None));
312
-
313
-
// set up dummy "websocket"
314
-
let listener = TcpListener::bind("127.0.0.1:9001")
315
-
.await
316
-
.expect("Failed to bind");
317
-
let server_handle = tokio::spawn(async move {
318
-
if let Ok((stream, _)) = listener.accept().await {
319
-
let ws_stream = accept_async(stream).await.expect("Failed to accept");
320
-
// send nothing
321
-
tokio::time::sleep(Duration::from_secs(6)).await;
322
-
drop(ws_stream);
323
-
}
324
-
});
325
-
326
-
// spawn, then run for >30 seconds to trigger reconnect
327
-
let connect_handle = tokio::spawn(async move {
328
-
tokio::time::timeout(Duration::from_secs(5), connection.connect(cursor))
329
-
.await
330
-
.ok();
331
-
});
332
-
333
-
let _ = tokio::join!(server_handle, connect_handle);
334
-
}
335
-
}
···
-65
services/rocketman/src/endpoints.rs
-65
services/rocketman/src/endpoints.rs
···
1
-
use std::fmt::{Display, Formatter, Result};
2
-
3
-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
4
-
pub enum JetstreamEndpointLocations {
5
-
UsEast,
6
-
UsWest,
7
-
}
8
-
9
-
impl Display for JetstreamEndpointLocations {
10
-
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
11
-
write!(
12
-
f,
13
-
"{}",
14
-
match self {
15
-
Self::UsEast => "us-east",
16
-
Self::UsWest => "us-west",
17
-
}
18
-
)
19
-
}
20
-
}
21
-
22
-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23
-
pub enum JetstreamEndpoints {
24
-
Public(JetstreamEndpointLocations, i8),
25
-
Custom(String),
26
-
}
27
-
28
-
impl Display for JetstreamEndpoints {
29
-
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
30
-
match self {
31
-
Self::Public(location, id) => write!(
32
-
f,
33
-
"wss://jetstream{}.{}.bsky.network/subscribe",
34
-
id, location
35
-
),
36
-
Self::Custom(url) => write!(f, "{}", url),
37
-
}
38
-
}
39
-
}
40
-
41
-
impl Default for JetstreamEndpoints {
42
-
fn default() -> Self {
43
-
Self::Public(JetstreamEndpointLocations::UsEast, 2)
44
-
}
45
-
}
46
-
47
-
#[cfg(test)]
48
-
mod tests {
49
-
use super::*;
50
-
51
-
#[test]
52
-
fn test_display_public() {
53
-
let endpoint = JetstreamEndpoints::Public(JetstreamEndpointLocations::UsEast, 2);
54
-
assert_eq!(
55
-
endpoint.to_string(),
56
-
"wss://jetstream2.us-east.bsky.network/subscribe"
57
-
);
58
-
}
59
-
60
-
#[test]
61
-
fn test_display_custom() {
62
-
let endpoint = JetstreamEndpoints::Custom("wss://custom.bsky.network/subscribe".into());
63
-
assert_eq!(endpoint.to_string(), "wss://custom.bsky.network/subscribe");
64
-
}
65
-
}
···
-1
services/rocketman/src/err.rs
-1
services/rocketman/src/err.rs
···
1
-
// TODO: error types instead of using anyhow
···
-452
services/rocketman/src/handler.rs
-452
services/rocketman/src/handler.rs
···
1
-
use anyhow::Result;
2
-
use flume::Sender;
3
-
use metrics::{counter, describe_counter, Unit};
4
-
use serde_json::Value;
5
-
use std::{
6
-
collections::HashMap,
7
-
sync::{Arc, Mutex},
8
-
};
9
-
use tokio_tungstenite::tungstenite::{Error, Message};
10
-
use tracing::{debug, error};
11
-
12
-
#[cfg(feature = "zstd")]
13
-
use std::io::Cursor as IoCursor;
14
-
#[cfg(feature = "zstd")]
15
-
use std::sync::LazyLock;
16
-
#[cfg(feature = "zstd")]
17
-
use zstd::dict::DecoderDictionary;
18
-
19
-
use crate::{
20
-
ingestion::LexiconIngestor,
21
-
types::event::{Event, Kind},
22
-
};
23
-
24
-
/// The custom `zstd` dictionary used for decoding compressed Jetstream messages.
25
-
///
26
-
/// Sourced from the [official Bluesky Jetstream repo.](https://github.com/bluesky-social/jetstream/tree/main/pkg/models)
27
-
#[cfg(feature = "zstd")]
28
-
static ZSTD_DICTIONARY: LazyLock<DecoderDictionary> =
29
-
LazyLock::new(|| DecoderDictionary::copy(include_bytes!("../zstd/dictionary")));
30
-
31
-
pub async fn handle_message(
32
-
message: Message,
33
-
ingestors: &HashMap<String, Box<dyn LexiconIngestor + Send + Sync>>,
34
-
reconnect_tx: Sender<()>,
35
-
cursor: Arc<Mutex<Option<u64>>>,
36
-
) -> Result<()> {
37
-
describe_counter!(
38
-
"jetstream.event",
39
-
Unit::Count,
40
-
"number of event ingest attempts"
41
-
);
42
-
describe_counter!(
43
-
"jetstream.event.parse",
44
-
Unit::Count,
45
-
"events that were successfully processed"
46
-
);
47
-
describe_counter!(
48
-
"jetstream.event.fail",
49
-
Unit::Count,
50
-
"events that could not be read"
51
-
);
52
-
describe_counter!("jetstream.error", Unit::Count, "errors encountered");
53
-
match message {
54
-
Message::Text(text) => {
55
-
debug!("Text message received");
56
-
counter!("jetstream.event").increment(1);
57
-
let envelope: Event<Value> = serde_json::from_str(&text).map_err(|e| {
58
-
anyhow::anyhow!("Failed to parse message: {} with json string {}", e, text)
59
-
})?;
60
-
debug!("envelope: {:?}", envelope);
61
-
handle_envelope(envelope, cursor, ingestors).await?;
62
-
Ok(())
63
-
}
64
-
#[cfg(feature = "zstd")]
65
-
Message::Binary(bytes) => {
66
-
debug!("Binary message received");
67
-
counter!("jetstream.event").increment(1);
68
-
let decoder = zstd::stream::Decoder::with_prepared_dictionary(
69
-
IoCursor::new(bytes),
70
-
&*ZSTD_DICTIONARY,
71
-
)?;
72
-
let envelope: Event<Value> = serde_json::from_reader(decoder)
73
-
.map_err(|e| anyhow::anyhow!("Failed to parse binary message: {}", e))?;
74
-
debug!("envelope: {:?}", envelope);
75
-
handle_envelope(envelope, cursor, ingestors).await?;
76
-
Ok(())
77
-
}
78
-
#[cfg(not(feature = "zstd"))]
79
-
Message::Binary(_) => {
80
-
debug!("Binary message received");
81
-
Err(anyhow::anyhow!(
82
-
"binary message received but zstd feature is not enabled"
83
-
))
84
-
}
85
-
Message::Close(_) => {
86
-
debug!("Server closed connection");
87
-
if let Err(e) = reconnect_tx.send(()) {
88
-
counter!("jetstream.event.parse.error", "error" => "failed_to_send_reconnect_signal").increment(1);
89
-
error!("Failed to send reconnect signal: {}", e);
90
-
}
91
-
Err(Error::ConnectionClosed.into())
92
-
}
93
-
_ => Ok(()),
94
-
}
95
-
}
96
-
97
-
async fn handle_envelope(
98
-
envelope: Event<Value>,
99
-
cursor: Arc<Mutex<Option<u64>>>,
100
-
ingestors: &HashMap<String, Box<dyn LexiconIngestor + Send + Sync>>,
101
-
) -> Result<()> {
102
-
if let Some(ref time_us) = envelope.time_us {
103
-
debug!("Time: {}", time_us);
104
-
if let Some(cursor) = cursor.lock().unwrap().as_mut() {
105
-
debug!("Cursor: {}", cursor);
106
-
if time_us > cursor {
107
-
debug!("Cursor is behind, resetting");
108
-
*cursor = *time_us;
109
-
}
110
-
}
111
-
}
112
-
113
-
match envelope.kind {
114
-
Kind::Commit => match extract_commit_nsid(&envelope) {
115
-
Ok(nsid) => {
116
-
if let Some(fun) = ingestors.get(&nsid) {
117
-
match fun.ingest(envelope).await {
118
-
Ok(_) => {
119
-
counter!("jetstream.event.parse.commit", "nsid" => nsid).increment(1)
120
-
}
121
-
Err(e) => {
122
-
error!("Error ingesting commit with nsid {}: {}", nsid, e);
123
-
counter!("jetstream.error").increment(1);
124
-
counter!("jetstream.event.fail").increment(1);
125
-
}
126
-
}
127
-
}
128
-
}
129
-
Err(e) => error!("Error parsing commit: {}", e),
130
-
},
131
-
Kind::Identity => {
132
-
counter!("jetstream.event.parse.identity").increment(1);
133
-
}
134
-
Kind::Account => {
135
-
counter!("jetstream.event.parse.account").increment(1);
136
-
}
137
-
Kind::Unknown(kind) => {
138
-
counter!("jetstream.event.parse.unknown", "kind" => kind).increment(1);
139
-
}
140
-
}
141
-
Ok(())
142
-
}
143
-
144
-
fn extract_commit_nsid(envelope: &Event<Value>) -> anyhow::Result<String> {
145
-
// if the type is not a commit
146
-
if envelope.commit.is_none() {
147
-
return Err(anyhow::anyhow!(
148
-
"Message has no commit, so there is no nsid attached."
149
-
));
150
-
} else if let Some(ref commit) = envelope.commit {
151
-
return Ok(commit.collection.clone());
152
-
}
153
-
154
-
Err(anyhow::anyhow!("Failed to extract nsid: unknown error"))
155
-
}
156
-
157
-
#[cfg(test)]
158
-
mod tests {
159
-
use super::*;
160
-
use crate::types::event::Event;
161
-
use anyhow::Result;
162
-
use async_trait::async_trait;
163
-
use flume::{Receiver, Sender};
164
-
use serde_json::json;
165
-
use std::{
166
-
collections::HashMap,
167
-
sync::{Arc, Mutex},
168
-
};
169
-
use tokio_tungstenite::tungstenite::Message;
170
-
171
-
// Dummy ingestor that records if it was called.
172
-
struct DummyIngestor {
173
-
pub called: Arc<Mutex<bool>>,
174
-
}
175
-
176
-
#[async_trait]
177
-
impl crate::ingestion::LexiconIngestor for DummyIngestor {
178
-
async fn ingest(&self, _event: Event<serde_json::Value>) -> Result<(), anyhow::Error> {
179
-
let mut called = self.called.lock().unwrap();
180
-
*called = true;
181
-
Ok(())
182
-
}
183
-
}
184
-
185
-
// Dummy ingestor that always returns an error.
186
-
struct ErrorIngestor;
187
-
188
-
#[async_trait]
189
-
impl crate::ingestion::LexiconIngestor for ErrorIngestor {
190
-
async fn ingest(&self, _event: Event<serde_json::Value>) -> Result<(), anyhow::Error> {
191
-
Err(anyhow::anyhow!("Ingest error"))
192
-
}
193
-
}
194
-
195
-
// Helper to create a reconnect channel.
196
-
fn setup_reconnect_channel() -> (Sender<()>, Receiver<()>) {
197
-
flume::unbounded()
198
-
}
199
-
200
-
#[tokio::test]
201
-
async fn test_valid_commit_success() {
202
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
203
-
let cursor = Arc::new(Mutex::new(Some(100)));
204
-
let called_flag = Arc::new(Mutex::new(false));
205
-
206
-
// Create a valid commit event JSON.
207
-
let event_json = json!({
208
-
"did": "did:example:123",
209
-
"time_us": 200,
210
-
"kind": "commit",
211
-
"commit": {
212
-
"rev": "1",
213
-
"operation": "create",
214
-
"collection": "ns1",
215
-
"rkey": "rkey1",
216
-
"record": { "foo": "bar" },
217
-
"cid": "cid123"
218
-
},
219
-
})
220
-
.to_string();
221
-
222
-
let mut ingestors: HashMap<
223
-
String,
224
-
Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>,
225
-
> = HashMap::new();
226
-
ingestors.insert(
227
-
"ns1".to_string(),
228
-
Box::new(DummyIngestor {
229
-
called: called_flag.clone(),
230
-
}),
231
-
);
232
-
233
-
let result = handle_message(
234
-
Message::Text(event_json),
235
-
&ingestors,
236
-
reconnect_tx,
237
-
cursor.clone(),
238
-
)
239
-
.await;
240
-
assert!(result.is_ok());
241
-
// Check that the ingestor was called.
242
-
assert!(*called_flag.lock().unwrap());
243
-
// Verify that the cursor got updated.
244
-
assert_eq!(*cursor.lock().unwrap(), Some(200));
245
-
}
246
-
247
-
#[cfg(feature = "zstd")]
248
-
#[tokio::test]
249
-
async fn test_binary_valid_commit() {
250
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
251
-
let cursor = Arc::new(Mutex::new(Some(100)));
252
-
let called_flag = Arc::new(Mutex::new(false));
253
-
254
-
let uncompressed_json = json!({
255
-
"did": "did:example:123",
256
-
"time_us": 200,
257
-
"kind": "commit",
258
-
"commit": {
259
-
"rev": "1",
260
-
"operation": "create",
261
-
"collection": "ns1",
262
-
"rkey": "rkey1",
263
-
"record": { "foo": "bar" },
264
-
"cid": "cid123"
265
-
},
266
-
})
267
-
.to_string();
268
-
269
-
let compressed_dest: IoCursor<Vec<u8>> = IoCursor::new(vec![]);
270
-
let mut encoder = zstd::Encoder::with_prepared_dictionary(
271
-
compressed_dest,
272
-
&zstd::dict::EncoderDictionary::copy(include_bytes!("../zstd/dictionary"), 0),
273
-
)
274
-
.unwrap();
275
-
std::io::copy(
276
-
&mut IoCursor::new(uncompressed_json.as_bytes()),
277
-
&mut encoder,
278
-
)
279
-
.unwrap();
280
-
let compressed_dest = encoder.finish().unwrap();
281
-
282
-
let mut ingestors: HashMap<
283
-
String,
284
-
Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>,
285
-
> = HashMap::new();
286
-
ingestors.insert(
287
-
"ns1".to_string(),
288
-
Box::new(DummyIngestor {
289
-
called: called_flag.clone(),
290
-
}),
291
-
);
292
-
293
-
let result = handle_message(
294
-
Message::Binary(compressed_dest.into_inner()),
295
-
&ingestors,
296
-
reconnect_tx,
297
-
cursor.clone(),
298
-
)
299
-
.await;
300
-
301
-
assert!(result.is_ok());
302
-
// Check that the ingestor was called.
303
-
assert!(*called_flag.lock().unwrap());
304
-
// Verify that the cursor got updated.
305
-
assert_eq!(*cursor.lock().unwrap(), Some(200));
306
-
}
307
-
308
-
#[tokio::test]
309
-
async fn test_commit_ingest_failure() {
310
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
311
-
let cursor = Arc::new(Mutex::new(Some(100)));
312
-
313
-
// Valid commit event with an ingestor that fails.
314
-
let event_json = json!({
315
-
"did": "did:example:123",
316
-
"time_us": 300,
317
-
"kind": "commit",
318
-
"commit": {
319
-
"rev": "1",
320
-
"operation": "create",
321
-
"collection": "ns_error",
322
-
"rkey": "rkey1",
323
-
"record": { "foo": "bar" },
324
-
"cid": "cid123"
325
-
},
326
-
"identity": null
327
-
})
328
-
.to_string();
329
-
330
-
let mut ingestors: HashMap<
331
-
String,
332
-
Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>,
333
-
> = HashMap::new();
334
-
ingestors.insert("ns_error".to_string(), Box::new(ErrorIngestor));
335
-
336
-
// Even though ingestion fails, handle_message returns Ok(()).
337
-
let result = handle_message(
338
-
Message::Text(event_json),
339
-
&ingestors,
340
-
reconnect_tx,
341
-
cursor.clone(),
342
-
)
343
-
.await;
344
-
assert!(result.is_ok());
345
-
// Cursor should still update because it comes before the ingest call.
346
-
assert_eq!(*cursor.lock().unwrap(), Some(300));
347
-
}
348
-
349
-
#[tokio::test]
350
-
async fn test_identity_message() {
351
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
352
-
let cursor = Arc::new(Mutex::new(None));
353
-
// Valid identity event.
354
-
let event_json = json!({
355
-
"did": "did:example:123",
356
-
"time_us": 150,
357
-
"kind": "identity",
358
-
"commit": null,
359
-
"identity": {
360
-
"did": "did:example:123",
361
-
"handle": "user",
362
-
"seq": 1,
363
-
"time": "2025-01-01T00:00:00Z"
364
-
}
365
-
})
366
-
.to_string();
367
-
let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> =
368
-
HashMap::new();
369
-
370
-
let result =
371
-
handle_message(Message::Text(event_json), &ingestors, reconnect_tx, cursor).await;
372
-
assert!(result.is_ok());
373
-
}
374
-
375
-
#[tokio::test]
376
-
async fn test_close_message() {
377
-
let (reconnect_tx, reconnect_rx) = setup_reconnect_channel();
378
-
let cursor = Arc::new(Mutex::new(None));
379
-
let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> =
380
-
HashMap::new();
381
-
382
-
let result = handle_message(Message::Close(None), &ingestors, reconnect_tx, cursor).await;
383
-
// Should return an error due to connection close.
384
-
assert!(result.is_err());
385
-
// Verify that a reconnect signal was sent.
386
-
let signal = reconnect_rx.recv_async().await;
387
-
assert!(signal.is_ok());
388
-
}
389
-
390
-
#[tokio::test]
391
-
async fn test_invalid_json() {
392
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
393
-
let cursor = Arc::new(Mutex::new(None));
394
-
let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> =
395
-
HashMap::new();
396
-
397
-
let invalid_json = "this is not json".to_string();
398
-
let result = handle_message(
399
-
Message::Text(invalid_json),
400
-
&ingestors,
401
-
reconnect_tx,
402
-
cursor,
403
-
)
404
-
.await;
405
-
assert!(result.is_err());
406
-
}
407
-
408
-
#[tokio::test]
409
-
async fn test_cursor_not_updated_if_lower() {
410
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
411
-
// Set an initial cursor value.
412
-
let cursor = Arc::new(Mutex::new(Some(300)));
413
-
let event_json = json!({
414
-
"did": "did:example:123",
415
-
"time_us": 200,
416
-
"kind": "commit",
417
-
"commit": {
418
-
"rev": "1",
419
-
"operation": "create",
420
-
"collection": "ns1",
421
-
"rkey": "rkey1",
422
-
"record": { "foo": "bar" },
423
-
"cid": "cid123"
424
-
},
425
-
"identity": null
426
-
})
427
-
.to_string();
428
-
429
-
// Use a dummy ingestor that does nothing.
430
-
let mut ingestors: HashMap<
431
-
String,
432
-
Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>,
433
-
> = HashMap::new();
434
-
ingestors.insert(
435
-
"ns1".to_string(),
436
-
Box::new(DummyIngestor {
437
-
called: Arc::new(Mutex::new(false)),
438
-
}),
439
-
);
440
-
441
-
let result = handle_message(
442
-
Message::Text(event_json),
443
-
&ingestors,
444
-
reconnect_tx,
445
-
cursor.clone(),
446
-
)
447
-
.await;
448
-
assert!(result.is_ok());
449
-
// Cursor should remain unchanged.
450
-
assert_eq!(*cursor.lock().unwrap(), Some(300));
451
-
}
452
-
}
···
-22
services/rocketman/src/ingestion.rs
-22
services/rocketman/src/ingestion.rs
···
1
-
use anyhow::Result;
2
-
use async_trait::async_trait;
3
-
use serde_json::Value;
4
-
use tracing::info;
5
-
6
-
use crate::types::event::Event;
7
-
8
-
#[async_trait]
9
-
pub trait LexiconIngestor {
10
-
async fn ingest(&self, message: Event<Value>) -> Result<()>;
11
-
}
12
-
13
-
pub struct DefaultLexiconIngestor;
14
-
15
-
#[async_trait]
16
-
impl LexiconIngestor for DefaultLexiconIngestor {
17
-
async fn ingest(&self, message: Event<Value>) -> Result<()> {
18
-
info!("Default lexicon processing: {:?}", message);
19
-
// Process message for default lexicon.
20
-
Ok(())
21
-
}
22
-
}
···
-8
services/rocketman/src/lib.rs
-8
services/rocketman/src/lib.rs
-40
services/rocketman/src/options.rs
-40
services/rocketman/src/options.rs
···
1
-
use bon::Builder;
2
-
3
-
use crate::endpoints::JetstreamEndpoints;
4
-
5
-
#[derive(Builder, Debug)]
6
-
pub struct JetstreamOptions {
7
-
#[builder(default)]
8
-
pub ws_url: JetstreamEndpoints,
9
-
#[builder(default)]
10
-
pub max_retry_interval_seconds: u64,
11
-
#[builder(default)]
12
-
pub connection_success_time_seconds: u64,
13
-
#[builder(default)]
14
-
pub bound: usize,
15
-
#[builder(default)]
16
-
pub timeout_time_sec: usize,
17
-
#[cfg(feature = "zstd")]
18
-
#[builder(default = true)]
19
-
pub compress: bool,
20
-
pub wanted_collections: Option<Vec<String>>,
21
-
pub wanted_dids: Option<Vec<String>>,
22
-
pub cursor: Option<String>,
23
-
}
24
-
25
-
impl Default for JetstreamOptions {
26
-
fn default() -> Self {
27
-
Self {
28
-
ws_url: JetstreamEndpoints::default(),
29
-
max_retry_interval_seconds: 120,
30
-
connection_success_time_seconds: 60,
31
-
bound: 65536,
32
-
timeout_time_sec: 40,
33
-
#[cfg(feature = "zstd")]
34
-
compress: true,
35
-
wanted_collections: None,
36
-
wanted_dids: None,
37
-
cursor: None,
38
-
}
39
-
}
40
-
}
···
-11
services/rocketman/src/time/mod.rs
-11
services/rocketman/src/time/mod.rs
···
1
-
use std::time::{Duration, Instant, SystemTime};
2
-
3
-
pub mod system_time;
4
-
5
-
pub trait TimeProvider {
6
-
fn new() -> Self;
7
-
fn now(&self) -> SystemTime; // Get the current time
8
-
fn elapsed(&self, earlier: SystemTime) -> Duration; // Calculate the elapsed time.
9
-
fn instant_now(&self) -> Instant; // For compatibility with your existing code (if needed)
10
-
fn instant_elapsed(&self, earlier: Instant) -> Duration;
11
-
}
···
-28
services/rocketman/src/time/system_time.rs
-28
services/rocketman/src/time/system_time.rs
···
1
-
use std::time::{Duration, Instant, SystemTime};
2
-
3
-
use super::TimeProvider;
4
-
5
-
#[derive(Default, Clone, Copy)] // Add these derives for ease of use
6
-
pub struct SystemTimeProvider; // No fields needed, just a marker type
7
-
8
-
impl TimeProvider for SystemTimeProvider {
9
-
fn new() -> Self {
10
-
Self
11
-
}
12
-
13
-
fn now(&self) -> SystemTime {
14
-
SystemTime::now()
15
-
}
16
-
17
-
fn elapsed(&self, earlier: SystemTime) -> Duration {
18
-
earlier.elapsed().unwrap_or_else(|_| Duration::from_secs(0))
19
-
}
20
-
21
-
fn instant_now(&self) -> Instant {
22
-
Instant::now()
23
-
}
24
-
25
-
fn instant_elapsed(&self, earlier: Instant) -> Duration {
26
-
earlier.elapsed()
27
-
}
28
-
}
···
-116
services/rocketman/src/types/event.rs
-116
services/rocketman/src/types/event.rs
···
1
-
use serde::{Deserialize, Deserializer, Serialize};
2
-
3
-
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
4
-
#[serde(rename_all = "lowercase")]
5
-
pub enum Kind {
6
-
Account,
7
-
Identity,
8
-
Commit,
9
-
Unknown(String),
10
-
}
11
-
12
-
#[derive(Debug, Serialize, Deserialize)]
13
-
#[serde(rename_all = "snake_case")]
14
-
pub struct Event<T> {
15
-
pub did: String,
16
-
pub time_us: Option<u64>,
17
-
pub kind: Kind,
18
-
pub commit: Option<Commit<T>>,
19
-
pub identity: Option<Identity>,
20
-
}
21
-
22
-
#[derive(Debug, Serialize, Deserialize)]
23
-
pub struct Identity {
24
-
did: String,
25
-
handle: Option<String>,
26
-
seq: u64,
27
-
time: String,
28
-
}
29
-
30
-
#[derive(Debug, Serialize, Deserialize)]
31
-
#[serde(rename_all = "lowercase")]
32
-
enum AccountStatus {
33
-
TakenDown,
34
-
Suspended,
35
-
Deleted,
36
-
Activated,
37
-
}
38
-
39
-
#[derive(Debug, Serialize, Deserialize)]
40
-
pub struct Account {
41
-
did: String,
42
-
handle: String,
43
-
seq: u64,
44
-
time: String,
45
-
status: AccountStatus,
46
-
}
47
-
48
-
#[derive(Debug, Serialize)]
49
-
#[serde(rename_all = "camelCase")]
50
-
pub struct Commit<T> {
51
-
pub rev: String,
52
-
pub operation: Operation,
53
-
pub collection: String,
54
-
pub rkey: String,
55
-
pub record: Option<T>,
56
-
pub cid: Option<String>,
57
-
}
58
-
59
-
#[derive(Debug, Serialize, Deserialize)]
60
-
#[serde(rename_all = "lowercase")]
61
-
pub enum Operation {
62
-
Create,
63
-
Update,
64
-
Delete,
65
-
}
66
-
67
-
/// Enforce that record is None only when operation is 'delete'
68
-
impl<'de, T> Deserialize<'de> for Commit<T>
69
-
where
70
-
T: Deserialize<'de>,
71
-
{
72
-
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
73
-
where
74
-
D: Deserializer<'de>,
75
-
{
76
-
// Helper struct to perform the deserialization.
77
-
#[derive(Deserialize)]
78
-
#[serde(rename_all = "camelCase")]
79
-
struct Helper<T> {
80
-
rev: String,
81
-
operation: Operation,
82
-
collection: String,
83
-
rkey: String,
84
-
record: Option<T>,
85
-
cid: Option<String>,
86
-
}
87
-
88
-
let helper = Helper::deserialize(deserializer)?;
89
-
90
-
match helper.operation {
91
-
Operation::Delete => {
92
-
if helper.record.is_some() || helper.cid.is_some() {
93
-
return Err(<D::Error as serde::de::Error>::custom(
94
-
"record and cid must be null when operation is delete",
95
-
));
96
-
}
97
-
}
98
-
_ => {
99
-
if helper.record.is_none() || helper.cid.is_none() {
100
-
return Err(<D::Error as serde::de::Error>::custom(
101
-
"record and cid must be present unless operation is delete",
102
-
));
103
-
}
104
-
}
105
-
}
106
-
107
-
Ok(Commit {
108
-
rev: helper.rev,
109
-
operation: helper.operation,
110
-
collection: helper.collection,
111
-
rkey: helper.rkey,
112
-
record: helper.record,
113
-
cid: helper.cid,
114
-
})
115
-
}
116
-
}
···
-1
services/rocketman/src/types/mod.rs
-1
services/rocketman/src/types/mod.rs
···
1
-
pub mod event;
···
services/rocketman/zstd/dictionary
services/rocketman/zstd/dictionary
This is a binary file and will not be displayed.
+1
-6
services/satellite/src/counts.rs
+1
-6
services/satellite/src/counts.rs
···
3
http::StatusCode,
4
Json,
5
};
6
-
use chrono::{DateTime, Utc};
7
use serde::{Deserialize, Serialize};
8
use sqlx::FromRow;
9
use uuid::Uuid;
···
43
pub limit: i64,
44
}
45
46
-
#[derive(FromRow, Debug, Deserialize, Serialize)]
47
pub struct Play {
48
pub did: String,
49
pub track_name: String,
···
51
pub release_name: Option<String>,
52
pub release_mbid: Option<Uuid>,
53
pub duration: Option<i32>,
54
-
pub played_time: Option<DateTime<Utc>>,
55
pub uri: Option<String>,
56
// MASSIVE HUGE HACK
57
pub artists: Option<String>,
···
65
pub release_name: Option<String>,
66
pub release_mbid: Option<Uuid>,
67
pub duration: Option<i32>,
68
-
pub played_time: Option<DateTime<Utc>>,
69
pub uri: Option<String>,
70
pub artists: Vec<Artist>,
71
}
···
92
-- TODO: replace with actual
93
STRING_AGG(pa.artist_name || '|' || TEXT(pa.artist_mbid), ',') AS artists,
94
p.release_name,
95
-
p.played_time,
96
p.duration,
97
p.uri,
98
p.recording_mbid,
···
138
release_name: play.release_name,
139
release_mbid: play.release_mbid,
140
duration: play.duration,
141
-
played_time: play.played_time,
142
uri: play.uri,
143
artists,
144
}
···
3
http::StatusCode,
4
Json,
5
};
6
use serde::{Deserialize, Serialize};
7
use sqlx::FromRow;
8
use uuid::Uuid;
···
42
pub limit: i64,
43
}
44
45
+
#[derive(FromRow, Debug)]
46
pub struct Play {
47
pub did: String,
48
pub track_name: String,
···
50
pub release_name: Option<String>,
51
pub release_mbid: Option<Uuid>,
52
pub duration: Option<i32>,
53
pub uri: Option<String>,
54
// MASSIVE HUGE HACK
55
pub artists: Option<String>,
···
63
pub release_name: Option<String>,
64
pub release_mbid: Option<Uuid>,
65
pub duration: Option<i32>,
66
pub uri: Option<String>,
67
pub artists: Vec<Artist>,
68
}
···
89
-- TODO: replace with actual
90
STRING_AGG(pa.artist_name || '|' || TEXT(pa.artist_mbid), ',') AS artists,
91
p.release_name,
92
p.duration,
93
p.uri,
94
p.recording_mbid,
···
134
release_name: play.release_name,
135
release_mbid: play.release_mbid,
136
duration: play.duration,
137
uri: play.uri,
138
artists,
139
}
+31
services/types/Cargo.toml
+31
services/types/Cargo.toml
···
···
1
+
[package]
2
+
name = "types"
3
+
version = "0.1.0"
4
+
edition = "2021"
5
+
6
+
[dependencies]
7
+
atrium-api.workspace = true
8
+
atrium-xrpc = "0.12.1"
9
+
chrono = "0.4.39"
10
+
http = "1.2.0"
11
+
ipld-core = { version = "0.4.2", features = ["serde"] }
12
+
langtag = { version = "0.3", features = ["serde"] }
13
+
regex = "1.11.1"
14
+
serde = { workspace = true, features = ["derive"] }
15
+
serde_bytes = "0.11.15"
16
+
serde_ipld_dagcbor = "0.6.2"
17
+
serde_json.workspace = true
18
+
thiserror = "2.0.11"
19
+
20
+
# features
21
+
[features]
22
+
default = [
23
+
"namespace-fmteal",
24
+
"namespace-appbsky",
25
+
"namespace-toolsozone",
26
+
"namespace-chatbsky",
27
+
]
28
+
namespace-fmteal = []
29
+
namespace-appbsky = []
30
+
namespace-toolsozone = []
31
+
namespace-chatbsky = []
+10
services/types/readme.md
+10
services/types/readme.md
···
···
1
+
## Types
2
+
Rust lexicons for teal.fm and others.
3
+
4
+
### Generate lexicons
5
+
You will need to install [esquema-cli](https://github.com/fatfingers23/esquema) a fork of the [atrium codegen tool](https://github.com/sugyan/atrium).
6
+
7
+
Currently can install directly from the repo
8
+
`cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git`
9
+
10
+
Then can recreate with `esquema-cli generate local --lexdir ./lexicons --outdir ./src` from this directory
+55
target.sh
+55
target.sh
···
···
1
+
#!/bin/bash
2
+
set -e
3
+
4
+
# Debug: Print all available build variables
5
+
echo "DEBUG: TARGETPLATFORM=$TARGETPLATFORM"
6
+
echo "DEBUG: BUILDPLATFORM=$BUILDPLATFORM"
7
+
echo "DEBUG: TARGETARCH=$TARGETARCH"
8
+
echo "DEBUG: TARGETOS=$TARGETOS"
9
+
10
+
# Use TARGETARCH directly (more reliable than TARGETPLATFORM)
11
+
TARGET_ARCH_VAR="${TARGETARCH:-}"
12
+
13
+
# If TARGETARCH is not set, try to extract from TARGETPLATFORM
14
+
if [ -z "$TARGET_ARCH_VAR" ] && [ -n "$TARGETPLATFORM" ]; then
15
+
TARGET_ARCH_VAR=$(echo "$TARGETPLATFORM" | cut -d'/' -f2)
16
+
echo "DEBUG: Extracted TARGET_ARCH_VAR=$TARGET_ARCH_VAR from TARGETPLATFORM"
17
+
fi
18
+
19
+
# Final fallback: detect from uname
20
+
if [ -z "$TARGET_ARCH_VAR" ]; then
21
+
ARCH=$(uname -m)
22
+
case "$ARCH" in
23
+
"x86_64")
24
+
TARGET_ARCH_VAR="amd64"
25
+
;;
26
+
"aarch64")
27
+
TARGET_ARCH_VAR="arm64"
28
+
;;
29
+
*)
30
+
echo "ERROR: Could not detect target architecture. uname -m returned: $ARCH"
31
+
echo "Available variables: TARGETARCH=$TARGETARCH, TARGETPLATFORM=$TARGETPLATFORM"
32
+
exit 1
33
+
;;
34
+
esac
35
+
echo "DEBUG: Detected TARGET_ARCH_VAR=$TARGET_ARCH_VAR from uname"
36
+
fi
37
+
38
+
# Map architecture to Rust target
39
+
case "$TARGET_ARCH_VAR" in
40
+
"amd64")
41
+
export RUST_TARGET="x86_64-unknown-linux-gnu"
42
+
export TARGET_ARCH="amd64"
43
+
;;
44
+
"arm64")
45
+
export RUST_TARGET="aarch64-unknown-linux-gnu"
46
+
export TARGET_ARCH="arm64"
47
+
;;
48
+
*)
49
+
echo "ERROR: Unsupported target architecture: $TARGET_ARCH_VAR"
50
+
echo "Supported architectures: amd64, arm64"
51
+
exit 1
52
+
;;
53
+
esac
54
+
55
+
echo "SUCCESS: Using RUST_TARGET=$RUST_TARGET, TARGET_ARCH=$TARGET_ARCH"
+92
-62
tools/lexicon-cli/src/commands/generate.ts
+92
-62
tools/lexicon-cli/src/commands/generate.ts
···
1
-
import { execa } from 'execa';
2
-
import { existsSync } from 'fs';
3
-
import { join } from 'path';
4
-
import pc from 'picocolors';
5
-
import { findWorkspaceRoot } from '../utils/workspace.js';
6
7
interface GenerateOptions {
8
tsOnly?: boolean;
···
12
13
export async function generate(options: GenerateOptions = {}) {
14
const workspaceRoot = findWorkspaceRoot();
15
-
16
-
console.log(pc.blue('๐ง Generating lexicon types...'));
17
-
18
try {
19
if (!options.rustOnly) {
20
await generateTypeScript(workspaceRoot, options.force);
21
}
22
-
23
if (!options.tsOnly) {
24
await generateRust(workspaceRoot, options.force);
25
}
26
-
27
-
console.log(pc.green('โ
Lexicon generation complete!'));
28
} catch (error) {
29
-
console.error(pc.red('โ Generation failed:'), error instanceof Error ? error.message : String(error));
30
process.exit(1);
31
}
32
}
33
34
async function generateTypeScript(workspaceRoot: string, force?: boolean) {
35
-
const lexiconsPath = join(workspaceRoot, 'lexicons');
36
-
37
if (!existsSync(lexiconsPath)) {
38
-
throw new Error('Lexicons directory not found at workspace root');
39
}
40
-
41
// Check if packages/lexicons exists for TypeScript generation
42
-
const packagesLexiconsPath = join(workspaceRoot, 'packages/lexicons');
43
if (!existsSync(packagesLexiconsPath)) {
44
-
console.log(pc.yellow(' โ ๏ธ TypeScript lexicons package not found, skipping TypeScript generation'));
45
return;
46
}
47
-
48
-
console.log(pc.cyan(' ๐ฆ Generating TypeScript types...'));
49
-
50
try {
51
-
await execa('pnpm', ['lex:gen-server'], {
52
cwd: packagesLexiconsPath,
53
-
stdio: 'inherit'
54
});
55
-
console.log(pc.green(' โ TypeScript types generated'));
56
} catch (error) {
57
-
throw new Error(`TypeScript generation failed: ${error instanceof Error ? error.message : String(error)}`);
58
}
59
}
60
61
async function generateRust(workspaceRoot: string, force?: boolean) {
62
-
const typesPath = join(workspaceRoot, 'services/types');
63
-
const lexiconsPath = join(workspaceRoot, 'lexicons');
64
-
65
if (!existsSync(typesPath)) {
66
-
throw new Error('Rust types service not found');
67
}
68
-
69
if (!existsSync(lexiconsPath)) {
70
-
throw new Error('Lexicons directory not found at workspace root');
71
}
72
-
73
-
console.log(pc.cyan(' ๐ฆ Generating Rust types...'));
74
-
75
try {
76
// Check if esquema-cli is available
77
try {
78
-
await execa('esquema-cli', ['--version'], { stdio: 'pipe' });
79
} catch {
80
-
console.log(pc.yellow(' โ ๏ธ esquema-cli not found. Installing...'));
81
try {
82
-
await execa('cargo', [
83
-
'install',
84
-
'esquema-cli',
85
-
'--git',
86
-
'https://github.com/fatfingers23/esquema.git'
87
-
], {
88
-
stdio: 'inherit'
89
-
});
90
-
console.log(pc.green(' โ esquema-cli installed successfully'));
91
} catch (installError) {
92
-
throw new Error('Failed to install esquema-cli. Please install manually: cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git');
93
}
94
}
95
-
96
-
await execa('esquema-cli', [
97
-
'generate',
98
-
'local',
99
-
'--lexdir',
100
-
lexiconsPath,
101
-
'--outdir',
102
-
join(typesPath, 'src')
103
-
], {
104
-
cwd: typesPath,
105
-
stdio: 'inherit'
106
-
});
107
-
108
-
console.log(pc.green(' โ Rust types generated'));
109
} catch (error) {
110
-
throw new Error(`Rust generation failed: ${error instanceof Error ? error.message : String(error)}`);
111
}
112
-
}
···
1
+
import { existsSync } from "fs";
2
+
import { join } from "path";
3
+
import { execa } from "execa";
4
+
import pc from "picocolors";
5
+
6
+
import { findWorkspaceRoot } from "../utils/workspace.js";
7
8
interface GenerateOptions {
9
tsOnly?: boolean;
···
13
14
export async function generate(options: GenerateOptions = {}) {
15
const workspaceRoot = findWorkspaceRoot();
16
+
17
+
console.log(pc.blue("๐ง Generating lexicon types..."));
18
+
19
try {
20
if (!options.rustOnly) {
21
await generateTypeScript(workspaceRoot, options.force);
22
}
23
+
24
if (!options.tsOnly) {
25
await generateRust(workspaceRoot, options.force);
26
}
27
+
28
+
console.log(pc.green("โ
Lexicon generation complete!"));
29
} catch (error) {
30
+
console.error(
31
+
pc.red("โ Generation failed:"),
32
+
error instanceof Error ? error.message : String(error),
33
+
);
34
process.exit(1);
35
}
36
}
37
38
async function generateTypeScript(workspaceRoot: string, force?: boolean) {
39
+
const lexiconsPath = join(workspaceRoot, "lexicons");
40
+
41
if (!existsSync(lexiconsPath)) {
42
+
throw new Error("Lexicons directory not found at workspace root");
43
}
44
+
45
// Check if packages/lexicons exists for TypeScript generation
46
+
const packagesLexiconsPath = join(workspaceRoot, "packages/lexicons");
47
if (!existsSync(packagesLexiconsPath)) {
48
+
console.log(
49
+
pc.yellow(
50
+
" โ ๏ธ TypeScript lexicons package not found, skipping TypeScript generation",
51
+
),
52
+
);
53
return;
54
}
55
+
56
+
console.log(pc.cyan(" ๐ฆ Generating TypeScript types..."));
57
+
58
try {
59
+
await execa("pnpm", ["lex:gen-server"], {
60
cwd: packagesLexiconsPath,
61
+
stdio: "inherit",
62
});
63
+
console.log(pc.green(" โ TypeScript types generated"));
64
} catch (error) {
65
+
throw new Error(
66
+
`TypeScript generation failed: ${error instanceof Error ? error.message : String(error)}`,
67
+
);
68
}
69
}
70
71
async function generateRust(workspaceRoot: string, force?: boolean) {
72
+
const typesPath = join(workspaceRoot, "services/types");
73
+
const lexiconsPath = join(workspaceRoot, "lexicons");
74
+
75
if (!existsSync(typesPath)) {
76
+
throw new Error("Rust types service not found");
77
}
78
+
79
if (!existsSync(lexiconsPath)) {
80
+
throw new Error("Lexicons directory not found at workspace root");
81
}
82
+
83
+
console.log(pc.cyan(" ๐ฆ Generating Rust types..."));
84
+
85
try {
86
// Check if esquema-cli is available
87
try {
88
+
await execa("esquema-cli", ["--version"], { stdio: "pipe" });
89
} catch {
90
+
console.log(pc.yellow(" โ ๏ธ esquema-cli not found. Installing..."));
91
try {
92
+
await execa(
93
+
"cargo",
94
+
[
95
+
"install",
96
+
"esquema-cli",
97
+
"--git",
98
+
"https://github.com/fatfingers23/esquema.git",
99
+
],
100
+
{
101
+
stdio: "inherit",
102
+
},
103
+
);
104
+
console.log(pc.green(" โ esquema-cli installed successfully"));
105
} catch (installError) {
106
+
throw new Error(
107
+
"Failed to install esquema-cli. Please install manually: cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git",
108
+
);
109
}
110
}
111
+
112
+
// create typespath/src if it doesn't exist
113
+
if (!existsSync(join(typesPath, "src"))) {
114
+
console.log(pc.yellow(" Creating src directory for Rust types..."));
115
+
await execa("mkdir", ["-p", join(typesPath, "src")], {
116
+
stdio: "inherit",
117
+
});
118
+
}
119
+
120
+
await execa(
121
+
"esquema-cli",
122
+
[
123
+
"generate",
124
+
"local",
125
+
"--lexdir",
126
+
lexiconsPath,
127
+
"--outdir",
128
+
join(typesPath, "src"),
129
+
],
130
+
{
131
+
cwd: typesPath,
132
+
stdio: "inherit",
133
+
},
134
+
);
135
+
136
+
console.log(pc.green(" โ Rust types generated"));
137
} catch (error) {
138
+
throw new Error(
139
+
`Rust generation failed: ${error instanceof Error ? error.message : String(error)}`,
140
+
);
141
}
142
+
}
+44
tools/teal-cli/Cargo.toml
+44
tools/teal-cli/Cargo.toml
···
···
1
+
[package]
2
+
name = "teal-cli"
3
+
version = "0.1.0"
4
+
edition = "2021"
5
+
description = "A simple management tool for teal.fm AT Protocol services"
6
+
7
+
[[bin]]
8
+
name = "teal"
9
+
path = "src/main.rs"
10
+
11
+
[dependencies]
12
+
# CLI framework
13
+
clap = { version = "4.0", features = ["derive"] }
14
+
anyhow = "1.0"
15
+
serde = { version = "1.0", features = ["derive"] }
16
+
serde_json = "1.0"
17
+
18
+
# Async runtime
19
+
tokio = { version = "1.0", features = [
20
+
"rt",
21
+
"macros",
22
+
"fs",
23
+
"rt-multi-thread",
24
+
] }
25
+
26
+
# Cryptography
27
+
k256 = { version = "0.13", features = ["ecdsa"] }
28
+
multibase = "0.9"
29
+
hex = "0.4"
30
+
rand = "0.8"
31
+
32
+
# File system and paths
33
+
dirs = "5.0"
34
+
35
+
# Utilities
36
+
chrono = { version = "0.4", features = ["serde"] }
37
+
colored = "2.0"
38
+
39
+
40
+
[features]
41
+
default = []
42
+
43
+
[dev-dependencies]
44
+
tempfile = "3.0"
+257
tools/teal-cli/README.md
+257
tools/teal-cli/README.md
···
···
1
+
# Teal CLI
2
+
3
+
A comprehensive management tool for Teal AT Protocol services, featuring cryptographic key management and CAR (Content Addressable aRchive) file exploration.
4
+
5
+
## Installation
6
+
7
+
From the project root:
8
+
9
+
```bash
10
+
cargo build --release --bin teal
11
+
```
12
+
13
+
The binary will be available at `target/release/teal`.
14
+
15
+
## Usage
16
+
17
+
### CAR File Explorer
18
+
19
+
Explore and analyze CAR files containing AT Protocol and Teal records.
20
+
21
+
#### Fetch CAR file from the internet
22
+
23
+
```bash
24
+
# Fetch from AT Protocol handle
25
+
teal car fetch --identity alice.bsky.social
26
+
27
+
# Fetch from DID
28
+
teal car fetch --identity did:plc:vdjlpwlhbnug4fnjodwr3vzh
29
+
30
+
# Fetch and save to specific file
31
+
teal car fetch --identity mmatt.net --output mmatt.car
32
+
33
+
# Fetch and immediately explore
34
+
teal car fetch --identity mmatt.net --explore
35
+
```
36
+
37
+
#### Explore a CAR file
38
+
39
+
```bash
40
+
# Basic exploration
41
+
teal car explore --file path/to/archive.car
42
+
43
+
# Verbose output with detailed information
44
+
teal car explore --file path/to/archive.car --verbose
45
+
```
46
+
47
+
#### Search for specific content
48
+
49
+
```bash
50
+
# Search for records containing "play"
51
+
teal car search --file path/to/archive.car --query "play"
52
+
53
+
# Search with verbose JSON output
54
+
teal car search --file path/to/archive.car --query "queen" --verbose
55
+
```
56
+
57
+
#### Export Teal records to JSON
58
+
59
+
```bash
60
+
# Export to default directory (./teal_exports)
61
+
teal car export --file path/to/archive.car
62
+
63
+
# Export to custom directory
64
+
teal car export --file path/to/archive.car --output ./my_exports
65
+
```
66
+
67
+
### Generate a new K256 key pair
68
+
69
+
```bash
70
+
# Generate with default settings (saves to ~/.teal/keys/)
71
+
teal gen-key
72
+
73
+
# Generate with custom name
74
+
teal gen-key --name production
75
+
76
+
# Generate with custom output directory
77
+
teal gen-key --output ./keys
78
+
79
+
# Overwrite existing keys
80
+
teal gen-key --force
81
+
82
+
# Output only the multibase (useful for scripts)
83
+
teal gen-key --format multibase
84
+
85
+
# Output as JSON
86
+
teal gen-key --format json
87
+
```
88
+
89
+
### Extract public key from existing private key
90
+
91
+
```bash
92
+
# Extract as multibase (default)
93
+
teal extract-pubkey --private-key ./keys/repo.key
94
+
95
+
# Extract as hex
96
+
teal extract-pubkey --private-key ./keys/repo.key --format hex
97
+
98
+
# Extract as JSON with both formats
99
+
teal extract-pubkey --private-key ./keys/repo.key --format json
100
+
```
101
+
102
+
### List available keys
103
+
104
+
```bash
105
+
# List keys in default directory
106
+
teal list
107
+
108
+
# List keys in custom directory
109
+
teal list --directory ./keys
110
+
```
111
+
112
+
### Rotate keys (backup old, generate new)
113
+
114
+
```bash
115
+
# Rotate the default 'repo' key
116
+
teal rotate --name repo
117
+
118
+
# Rotate with custom backup directory
119
+
teal rotate --name repo --backup-dir ./backups
120
+
```
121
+
122
+
## CAR File Analysis
123
+
124
+
The CAR explorer can analyze AT Protocol archives and identify:
125
+
126
+
- **Teal Records**: Music plays (`fm.teal.alpha.feed.play`), profiles (`fm.teal.alpha.actor.profile`), and status updates
127
+
- **AT Protocol Records**: BlueSky posts, likes, follows, and other social data
128
+
- **Commit Operations**: Repository changes and metadata
129
+
- **IPLD Structure**: Content addressing and linking
130
+
131
+
### Example Output
132
+
133
+
```
134
+
๐ CAR Analysis Results
135
+
==================================================
136
+
137
+
๐ File Overview:
138
+
File size: 10267026 bytes
139
+
Total blocks: 30195
140
+
Root CIDs: 1
141
+
142
+
๐ Record Types:
143
+
app.bsky.feed.like: 11034
144
+
app.bsky.feed.post: 7510
145
+
fm.teal.alpha.feed.play: 2605
146
+
fm.teal.alpha.actor.profile: 1
147
+
148
+
๐ต Teal Records Found:
149
+
fm.teal.alpha.feed.play: 2605
150
+
fm.teal.alpha.actor.profile: 1
151
+
152
+
๐ Sample Teal Records:
153
+
1. fm.teal.alpha.feed.play (bafyreigmu...)
154
+
๐ต Track: Bohemian Rhapsody
155
+
๐ค Artists: Queen
156
+
โฑ๏ธ Duration: 355000ms
157
+
```
158
+
159
+
### Exported JSON Structure
160
+
161
+
```json
162
+
[
163
+
{
164
+
"cid": "bafyreigmuwliezhxczoxgxq5hjtsdzaj3jl54kg...",
165
+
"data": {
166
+
"$type": "fm.teal.alpha.feed.play",
167
+
"track_name": "Bohemian Rhapsody",
168
+
"artist_names": ["Queen"],
169
+
"duration": 355000,
170
+
"played_time": "2024-01-15T14:30:00Z"
171
+
}
172
+
}
173
+
]
174
+
```
175
+
176
+
## Key Management
177
+
178
+
The tool generates K256 (secp256k1) keys compatible with AT Protocol:
179
+
180
+
- **Private Key**: 32-byte secp256k1 private key stored as binary
181
+
- **Public Key**: Base58-encoded multibase of the compressed public key
182
+
- **Default Location**: `~/.teal/keys/`
183
+
184
+
### File Structure
185
+
186
+
```
187
+
~/.teal/keys/
188
+
โโโ repo.key # Private key (32 bytes, binary)
189
+
โโโ repo.pub # Public key multibase (text)
190
+
โโโ production.key # Another private key
191
+
โโโ production.pub # Another public key multibase
192
+
```
193
+
194
+
## Integration
195
+
196
+
Replace the hardcoded multibase in your DID document:
197
+
198
+
```rust
199
+
// Before (hardcoded)
200
+
"publicKeyMultibase": "z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK"
201
+
202
+
// After (using generated key)
203
+
let pubkey = std::fs::read_to_string("~/.teal/keys/repo.pub")?;
204
+
// Use pubkey in your DID document
205
+
```
206
+
207
+
## Examples
208
+
209
+
### CAR File Analysis
210
+
211
+
```bash
212
+
# Fetch CAR file from a user's handle
213
+
teal car fetch --identity mmatt.net --output mmatt.car
214
+
215
+
# Fetch and immediately explore
216
+
teal car fetch --identity alice.bsky.social --explore
217
+
218
+
# Analyze a local CAR export
219
+
teal car explore --file nat.car
220
+
221
+
# Search for specific tracks
222
+
teal car search --file nat.car --query "bohemian rhapsody"
223
+
224
+
# Export all Teal records for data analysis
225
+
teal car export --file nat.car --output ./music_data
226
+
227
+
# View exported play records
228
+
cat ./music_data/fm_teal_alpha_feed_play.json | jq '.[0]'
229
+
```
230
+
231
+
### Quick setup
232
+
233
+
```bash
234
+
# Generate a key for development
235
+
teal gen-key --name dev
236
+
237
+
# Get the multibase for your DID document
238
+
teal extract-pubkey --private-key ~/.teal/keys/dev.key
239
+
```
240
+
241
+
### Production deployment
242
+
243
+
```bash
244
+
# Generate production keys in a secure location
245
+
teal gen-key --name production --output /secure/keys
246
+
247
+
# Extract multibase for configuration
248
+
PUBKEY=$(teal extract-pubkey --private-key /secure/keys/production.key)
249
+
echo "Public key: $PUBKEY"
250
+
```
251
+
252
+
## Security Notes
253
+
254
+
- Private keys are stored as raw 32-byte files with restrictive permissions (600 on Unix)
255
+
- Keys are generated using cryptographically secure random number generation
256
+
- Never commit private keys to version control
257
+
- Consider using secure key management systems in production
+104
tools/teal-cli/rkey_example.md
+104
tools/teal-cli/rkey_example.md
···
···
1
+
# How to Extract rkey from AT Protocol CAR Files
2
+
3
+
The **rkey** (record key) is not stored inside the IPLD record data itself. Instead, it's found in **commit operations** that map collection paths to record CIDs.
4
+
5
+
## AT Protocol Structure
6
+
7
+
```
8
+
Repository Structure:
9
+
โโโ Records (IPLD blocks)
10
+
โ โโโ bafyrei123... (actual play record data)
11
+
โ โโโ bafyrei456... (actual profile record data)
12
+
โ โโโ bafyrei789... (actual post record data)
13
+
โโโ Commits (IPLD blocks)
14
+
โโโ bafycommit1... (operations mapping paths to CIDs)
15
+
โโโ bafycommit2... (more operations)
16
+
```
17
+
18
+
## Example: Record IPLD (without rkey)
19
+
20
+
```json
21
+
{
22
+
"$type": "fm.teal.alpha.feed.play",
23
+
"track_name": "Bohemian Rhapsody",
24
+
"artist_names": ["Queen"],
25
+
"duration": 355000,
26
+
"played_time": "2024-01-15T14:30:00Z"
27
+
}
28
+
```
29
+
30
+
**โ No rkey here!** The record contains the data but not its key.
31
+
32
+
## Example: Commit IPLD (with rkey mappings)
33
+
34
+
```json
35
+
{
36
+
"ops": [
37
+
{
38
+
"action": "create",
39
+
"path": "fm.teal.alpha.feed.play/3k2akjdlkjsf", // โ collection/rkey
40
+
"cid": "bafyrei123..." // โ points to the record above
41
+
},
42
+
{
43
+
"action": "create",
44
+
"path": "fm.teal.alpha.actor.profile/self",
45
+
"cid": "bafyrei456..."
46
+
}
47
+
],
48
+
"prev": "bafyrei...",
49
+
"rev": "3k2bkl...",
50
+
"time": "2024-01-15T14:35:00Z"
51
+
}
52
+
```
53
+
54
+
**โ
rkey is here!** Extract it from the `path` field: `"3k2akjdlkjsf"`
55
+
56
+
## Extraction Algorithm
57
+
58
+
```rust
59
+
fn extract_rkeys_from_commits(commits: &[CommitInfo]) -> HashMap<String, String> {
60
+
let mut cid_to_rkey = HashMap::new();
61
+
62
+
for commit in commits {
63
+
for operation in &commit.operations {
64
+
// Path format: "collection/rkey"
65
+
if let Some(rkey) = operation.path.split('/').last() {
66
+
if let Some(ref record_cid) = operation.record_cid {
67
+
cid_to_rkey.insert(record_cid.clone(), rkey.to_string());
68
+
}
69
+
}
70
+
}
71
+
}
72
+
73
+
cid_to_rkey
74
+
}
75
+
```
76
+
77
+
## Complete Example
78
+
79
+
1. **Find commit blocks** in CAR file
80
+
2. **Extract operations** from commit IPLD
81
+
3. **Parse paths** like `"fm.teal.alpha.feed.play/3k2akjdlkjsf"`
82
+
4. **Map CID โ rkey**: `bafyrei123... โ 3k2akjdlkjsf`
83
+
5. **Use rkey** when processing records
84
+
85
+
## Why This Matters
86
+
87
+
The rkey is essential for:
88
+
- **AT URI construction**: `at://did:plc:user123/fm.teal.alpha.feed.play/3k2akjdlkjsf`
89
+
- **Record identity**: Uniquely identifies the record within the collection
90
+
- **Data integrity**: Maintains proper AT Protocol addressing
91
+
92
+
## CLI Usage
93
+
94
+
```bash
95
+
# Explore CAR file and show rkey extraction
96
+
teal car explore --file archive.car --verbose
97
+
98
+
# The verbose output will show:
99
+
# ๐ rkey Extraction Examples:
100
+
# 1. bafyrei123... โ rkey: 3k2akjdlkjsf
101
+
# 2. bafyrei456... โ rkey: self
102
+
```
103
+
104
+
**Note**: Some CAR files may not contain commit operations with rkey mappings, especially if they're partial exports or contain only raw records without repository structure.
+116
tools/teal-cli/src/commands/dev.rs
+116
tools/teal-cli/src/commands/dev.rs
···
···
1
+
use anyhow::Result;
2
+
use colored::*;
3
+
4
+
use crate::config::TealConfig;
5
+
use crate::DevCommands;
6
+
7
+
pub async fn run(cmd: DevCommands, config: &TealConfig) -> Result<()> {
8
+
match cmd {
9
+
DevCommands::Setup {
10
+
skip_docker,
11
+
skip_db,
12
+
} => setup_dev_environment(skip_docker, skip_db, config).await,
13
+
DevCommands::Clean { all } => clean_dev_artifacts(all).await,
14
+
DevCommands::Dev { port, watch } => run_dev_server(port, watch, config).await,
15
+
DevCommands::Seed { count, data_type } => generate_seed_data(count, data_type, config).await,
16
+
}
17
+
}
18
+
19
+
async fn setup_dev_environment(
20
+
skip_docker: bool,
21
+
skip_db: bool,
22
+
config: &TealConfig,
23
+
) -> Result<()> {
24
+
println!("{} Setting up development environment...", "๐ ๏ธ".blue());
25
+
println!();
26
+
27
+
if !skip_docker {
28
+
println!("{} Docker Setup:", "๐ณ".blue());
29
+
println!(" {} Checking Docker...", "โข".bold());
30
+
31
+
// TODO: Check if Docker is installed and running
32
+
println!(" {} Docker check not implemented", "โ ๏ธ".yellow());
33
+
println!(" {} Manually ensure Docker is running", "๐ก".blue());
34
+
println!();
35
+
}
36
+
37
+
if !skip_db {
38
+
println!("{} Database Setup:", "๐๏ธ".blue());
39
+
println!(" {} Database URL: {}", "โข".bold(), mask_db_url(&config.database.url));
40
+
41
+
// TODO: Run database initialization and migrations
42
+
println!(" {} Database setup not implemented", "โ ๏ธ".yellow());
43
+
println!(" {} Run: teal database init", "๐ก".blue());
44
+
println!(" {} Run: teal database migrate", "๐ก".blue());
45
+
println!();
46
+
}
47
+
48
+
println!("{} Keys Setup:", "๐".blue());
49
+
let key_path = config.get_key_path(&config.crypto.default_key_name);
50
+
if key_path.exists() {
51
+
println!(" {} Default key already exists", "โ
".green());
52
+
} else {
53
+
println!(" {} Generating default key...", "โข".bold());
54
+
// TODO: Auto-generate key
55
+
println!(" {} Run: teal crypto gen-key", "๐ก".blue());
56
+
}
57
+
println!();
58
+
59
+
println!("{} Development environment setup complete!", "โ
".green());
60
+
println!();
61
+
println!("{} Next steps:", "๐ก".yellow());
62
+
println!(" 1. teal crypto gen-key --name dev");
63
+
println!(" 2. teal database init");
64
+
println!(" 3. teal dev dev --watch");
65
+
66
+
Ok(())
67
+
}
68
+
69
+
async fn clean_dev_artifacts(all: bool) -> Result<()> {
70
+
println!("{} Cleaning development artifacts...", "๐งน".blue());
71
+
println!();
72
+
73
+
let mut cleaned_items = Vec::new();
74
+
75
+
// Clean logs
76
+
if let Ok(entries) = std::fs::read_dir("logs") {
77
+
let mut log_count = 0;
78
+
for entry in entries.flatten() {
79
+
if entry.path().extension().map_or(false, |ext| ext == "log") {
80
+
// TODO: Actually delete log files
81
+
log_count += 1;
82
+
}
83
+
}
84
+
if log_count > 0 {
85
+
cleaned_items.push(format!("{} log files", log_count));
86
+
}
87
+
}
88
+
89
+
// Clean temporary files
90
+
if let Ok(entries) = std::fs::read_dir(".") {
91
+
let mut temp_count = 0;
92
+
for entry in entries.flatten() {
93
+
let path = entry.path();
94
+
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
95
+
if name.starts_with("tmp_") || name.ends_with(".tmp") {
96
+
temp_count += 1;
97
+
}
98
+
}
99
+
}
100
+
if temp_count > 0 {
101
+
cleaned_items.push(format!("{} temporary files", temp_count));
102
+
}
103
+
}
104
+
105
+
if all {
106
+
// Clean build artifacts
107
+
cleaned_items.push("build artifacts".to_string());
108
+
println!(" {} Would clean: target/ directory", "โข".bold());
109
+
110
+
// Clean Docker artifacts
111
+
cleaned_items.push("Docker artifacts".to_string());
112
+
println!(" {} Would clean: Docker images and containers", "โข".bold());
113
+
}
114
+
115
+
if cleaned_items.is_empty() {
116
+
println!("{} No artifacts to clean", "โน๏ธ".blue
+349
tools/teal-cli/src/crypto.rs
+349
tools/teal-cli/src/crypto.rs
···
···
1
+
use anyhow::{Context, Result};
2
+
use colored::*;
3
+
use k256::ecdsa::{SigningKey, VerifyingKey};
4
+
use k256::SecretKey;
5
+
use multibase::Base;
6
+
use rand::rngs::OsRng;
7
+
use serde_json::json;
8
+
use std::path::PathBuf;
9
+
use tokio::fs;
10
+
11
+
/// Generate a new K256 private key
12
+
pub fn generate_private_key() -> SigningKey {
13
+
SigningKey::random(&mut OsRng)
14
+
}
15
+
16
+
/// Load a private key from a file
17
+
pub async fn load_private_key(path: &PathBuf) -> Result<SigningKey> {
18
+
let key_bytes = fs::read(path)
19
+
.await
20
+
.with_context(|| format!("Failed to read private key from {:?}", path))?;
21
+
22
+
if key_bytes.len() != 32 {
23
+
anyhow::bail!(
24
+
"Invalid private key length. Expected 32 bytes, got {}",
25
+
key_bytes.len()
26
+
);
27
+
}
28
+
29
+
let secret_key = SecretKey::from_slice(&key_bytes).context("Failed to parse private key")?;
30
+
31
+
Ok(SigningKey::from(secret_key))
32
+
}
33
+
34
+
/// Save a private key to a file
35
+
pub async fn save_private_key(key: &SigningKey, path: &PathBuf) -> Result<()> {
36
+
let key_bytes = key.as_nonzero_scalar().to_bytes();
37
+
38
+
// Create parent directory if it doesn't exist
39
+
if let Some(parent) = path.parent() {
40
+
fs::create_dir_all(parent)
41
+
.await
42
+
.with_context(|| format!("Failed to create key directory: {:?}", parent))?;
43
+
}
44
+
45
+
fs::write(path, key_bytes)
46
+
.await
47
+
.with_context(|| format!("Failed to write private key to {:?}", path))?;
48
+
49
+
// Set restrictive permissions on Unix systems
50
+
#[cfg(unix)]
51
+
{
52
+
use std::os::unix::fs::PermissionsExt;
53
+
let mut perms = fs::metadata(path).await?.permissions();
54
+
perms.set_mode(0o600); // rw-------
55
+
fs::set_permissions(path, perms).await?;
56
+
}
57
+
58
+
Ok(())
59
+
}
60
+
61
+
/// Convert a public key to AT Protocol compatible multibase format
62
+
pub fn public_key_to_multibase(public_key: &VerifyingKey) -> Result<String> {
63
+
// Get the compressed public key bytes (33 bytes)
64
+
let public_key_bytes = public_key.to_encoded_point(true).as_bytes().to_vec();
65
+
66
+
// Encode as multibase with base58btc (z prefix)
67
+
let multibase_string = multibase::encode(Base::Base58Btc, &public_key_bytes);
68
+
69
+
Ok(multibase_string)
70
+
}
71
+
72
+
/// Generate a new key pair and save to files
73
+
pub async fn generate_key(
74
+
name: String,
75
+
keys_dir: PathBuf,
76
+
force: bool,
77
+
format: String,
78
+
) -> Result<()> {
79
+
let private_key_path = keys_dir.join(format!("{}.key", name));
80
+
let public_key_path = keys_dir.join(format!("{}.pub", name));
81
+
82
+
// Check if files already exist
83
+
if !force && (private_key_path.exists() || public_key_path.exists()) {
84
+
anyhow::bail!(
85
+
"Key files already exist for '{}'. Use --force to overwrite.\n Private: {:?}\n Public: {:?}",
86
+
name,
87
+
private_key_path,
88
+
public_key_path
89
+
);
90
+
}
91
+
92
+
println!(
93
+
"{} Generating K256 key pair for '{}'...",
94
+
"๐".blue(),
95
+
name.bold()
96
+
);
97
+
98
+
// Generate new private key
99
+
let private_key = generate_private_key();
100
+
let public_key = private_key.verifying_key();
101
+
102
+
// Save private key
103
+
save_private_key(&private_key, &private_key_path)
104
+
.await
105
+
.with_context(|| format!("Failed to save private key to {:?}", private_key_path))?;
106
+
107
+
// Generate public key multibase
108
+
let public_key_multibase =
109
+
public_key_to_multibase(public_key).context("Failed to generate public key multibase")?;
110
+
111
+
// Output based on format
112
+
match format.as_str() {
113
+
"json" => {
114
+
let output = json!({
115
+
"keyName": name,
116
+
"privateKeyPath": private_key_path,
117
+
"publicKeyPath": public_key_path,
118
+
"publicKeyMultibase": public_key_multibase,
119
+
"publicKeyHex": hex::encode(public_key.to_encoded_point(false).as_bytes()),
120
+
});
121
+
println!("{}", serde_json::to_string_pretty(&output)?);
122
+
}
123
+
"multibase" => {
124
+
println!("{}", public_key_multibase);
125
+
}
126
+
_ => {
127
+
// includes "files"
128
+
// Save public key multibase to file
129
+
fs::write(&public_key_path, &public_key_multibase)
130
+
.await
131
+
.with_context(|| format!("Failed to write public key to {:?}", public_key_path))?;
132
+
133
+
println!("{} Key pair generated successfully!", "โ
".green());
134
+
println!(" {} {}", "Name:".bold(), name);
135
+
println!(" {} {:?}", "Private key:".bold(), private_key_path);
136
+
println!(" {} {:?}", "Public key:".bold(), public_key_path);
137
+
println!(
138
+
" {} {}",
139
+
"Multibase:".bold(),
140
+
public_key_multibase.bright_blue()
141
+
);
142
+
println!();
143
+
println!("{} Add this to your DID document:", "๐ก".yellow());
144
+
println!(" \"publicKeyMultibase\": \"{}\"", public_key_multibase);
145
+
}
146
+
}
147
+
148
+
Ok(())
149
+
}
150
+
151
+
/// Extract public key from private key file
152
+
pub async fn extract_pubkey(private_key_path: PathBuf, format: String) -> Result<()> {
153
+
println!(
154
+
"{} Extracting public key from {:?}...",
155
+
"๐".blue(),
156
+
private_key_path
157
+
);
158
+
159
+
let private_key = load_private_key(&private_key_path)
160
+
.await
161
+
.with_context(|| format!("Failed to load private key from {:?}", private_key_path))?;
162
+
163
+
let public_key = private_key.verifying_key();
164
+
165
+
match format.as_str() {
166
+
"multibase" => {
167
+
let multibase = public_key_to_multibase(public_key)?;
168
+
println!("{}", multibase);
169
+
}
170
+
"hex" => {
171
+
let hex = hex::encode(public_key.to_encoded_point(false).as_bytes());
172
+
println!("{}", hex);
173
+
}
174
+
"compressed-hex" => {
175
+
let hex = hex::encode(public_key.to_encoded_point(true).as_bytes());
176
+
println!("{}", hex);
177
+
}
178
+
"json" => {
179
+
let multibase = public_key_to_multibase(public_key)?;
180
+
let hex_uncompressed = hex::encode(public_key.to_encoded_point(false).as_bytes());
181
+
let hex_compressed = hex::encode(public_key.to_encoded_point(true).as_bytes());
182
+
183
+
let output = json!({
184
+
"publicKeyMultibase": multibase,
185
+
"publicKeyHex": hex_uncompressed,
186
+
"publicKeyHexCompressed": hex_compressed,
187
+
});
188
+
println!("{}", serde_json::to_string_pretty(&output)?);
189
+
}
190
+
_ => {
191
+
anyhow::bail!(
192
+
"Invalid format '{}'. Use: multibase, hex, compressed-hex, or json",
193
+
format
194
+
);
195
+
}
196
+
}
197
+
198
+
Ok(())
199
+
}
200
+
201
+
/// List available keys in directory
202
+
pub async fn list_keys(keys_dir: PathBuf) -> Result<()> {
203
+
if !keys_dir.exists() {
204
+
println!("{} No keys directory found at {:?}", "โน๏ธ".blue(), keys_dir);
205
+
println!("Run 'teal gen-key' to create your first key.");
206
+
return Ok(());
207
+
}
208
+
209
+
let mut keys = Vec::new();
210
+
let mut entries = fs::read_dir(&keys_dir).await?;
211
+
212
+
while let Some(entry) = entries.next_entry().await? {
213
+
let path = entry.path();
214
+
if let Some(extension) = path.extension() {
215
+
if extension == "key" {
216
+
if let Some(stem) = path.file_stem() {
217
+
if let Some(name) = stem.to_str() {
218
+
keys.push(name.to_string());
219
+
}
220
+
}
221
+
}
222
+
}
223
+
}
224
+
225
+
if keys.is_empty() {
226
+
println!("{} No keys found in {:?}", "โน๏ธ".blue(), keys_dir);
227
+
println!("Run 'teal gen-key' to create your first key.");
228
+
return Ok(());
229
+
}
230
+
231
+
keys.sort();
232
+
233
+
println!("{} Available keys in {:?}:", "๐".blue(), keys_dir);
234
+
println!();
235
+
236
+
let keys_count = keys.len();
237
+
238
+
for key_name in keys {
239
+
let private_path = keys_dir.join(format!("{}.key", key_name));
240
+
let public_path = keys_dir.join(format!("{}.pub", key_name));
241
+
242
+
let mut status_parts = Vec::new();
243
+
244
+
if private_path.exists() {
245
+
status_parts.push("private".green().to_string());
246
+
}
247
+
248
+
if public_path.exists() {
249
+
status_parts.push("public".cyan().to_string());
250
+
251
+
// Try to read and display the multibase
252
+
if let Ok(multibase) = fs::read_to_string(&public_path).await {
253
+
let multibase = multibase.trim();
254
+
println!(
255
+
" {} {} ({})",
256
+
"โข".bold(),
257
+
key_name.bold(),
258
+
status_parts.join(", ")
259
+
);
260
+
println!(" {}: {}", "Multibase".dimmed(), multibase.bright_blue());
261
+
} else {
262
+
println!(
263
+
" {} {} ({})",
264
+
"โข".bold(),
265
+
key_name.bold(),
266
+
status_parts.join(", ")
267
+
);
268
+
}
269
+
} else {
270
+
println!(
271
+
" {} {} ({})",
272
+
"โข".bold(),
273
+
key_name.bold(),
274
+
status_parts.join(", ")
275
+
);
276
+
}
277
+
278
+
// Show file modification times
279
+
if let Ok(metadata) = fs::metadata(&private_path).await {
280
+
if let Ok(modified) = metadata.modified() {
281
+
let datetime = chrono::DateTime::<chrono::Local>::from(modified);
282
+
println!(
283
+
" {}: {}",
284
+
"Created".dimmed(),
285
+
datetime.format("%Y-%m-%d %H:%M:%S").to_string().dimmed()
286
+
);
287
+
}
288
+
}
289
+
println!();
290
+
}
291
+
292
+
println!(
293
+
"{} Total: {} key(s)",
294
+
"๐".blue(),
295
+
keys_count.to_string().bold()
296
+
);
297
+
298
+
Ok(())
299
+
}
300
+
301
+
/// Rotate a key (backup old, generate new)
302
+
pub async fn rotate_key(
303
+
keys_dir: PathBuf,
304
+
name: String,
305
+
backup_dir: Option<PathBuf>,
306
+
) -> Result<()> {
307
+
let private_key_path = keys_dir.join(format!("{}.key", name));
308
+
309
+
if !private_key_path.exists() {
310
+
anyhow::bail!("Key '{}' does not exist in {:?}", name, keys_dir);
311
+
}
312
+
313
+
println!("{} Rotating key '{}'...", "๐".blue(), name.bold());
314
+
315
+
// Backup existing key
316
+
let backup_location = backup_dir.unwrap_or_else(|| keys_dir.join("backups"));
317
+
318
+
fs::create_dir_all(&backup_location).await?;
319
+
320
+
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
321
+
let backup_private = backup_location.join(format!("{}_{}.key", name, timestamp));
322
+
let backup_public = backup_location.join(format!("{}_{}.pub", name, timestamp));
323
+
324
+
fs::copy(&private_key_path, &backup_private).await?;
325
+
326
+
let public_key_path = keys_dir.join(format!("{}.pub", name));
327
+
if public_key_path.exists() {
328
+
fs::copy(&public_key_path, &backup_public).await?;
329
+
}
330
+
331
+
println!("Backed up existing key to: {:?}", backup_private);
332
+
333
+
// Generate new key
334
+
let new_key = generate_private_key();
335
+
save_private_key(&new_key, &private_key_path).await?;
336
+
337
+
// Save new public key multibase
338
+
let public_key = new_key.verifying_key();
339
+
let multibase = public_key_to_multibase(public_key)?;
340
+
fs::write(&public_key_path, &multibase).await?;
341
+
342
+
println!("{} Key rotation completed!", "โ
".green());
343
+
println!(" {} {}", "New multibase:".bold(), multibase.bright_blue());
344
+
println!();
345
+
println!("{} Update your DID document with:", "๐ก".yellow());
346
+
println!(" \"publicKeyMultibase\": \"{}\"", multibase);
347
+
348
+
Ok(())
349
+
}
+102
tools/teal-cli/src/main.rs
+102
tools/teal-cli/src/main.rs
···
···
1
+
use anyhow::Result;
2
+
use clap::{Parser, Subcommand};
3
+
4
+
use std::path::PathBuf;
5
+
6
+
mod crypto;
7
+
8
+
#[derive(Parser)]
9
+
#[command(name = "teal")]
10
+
#[command(about = "Teal management utilities")]
11
+
#[command(version = "0.1.0")]
12
+
struct Cli {
13
+
#[command(subcommand)]
14
+
command: Commands,
15
+
}
16
+
17
+
#[derive(Subcommand)]
18
+
enum Commands {
19
+
/// Generate a new K256 key pair
20
+
GenKey {
21
+
/// Key name/identifier
22
+
#[arg(short, long, default_value = "repo")]
23
+
name: String,
24
+
25
+
/// Output directory (defaults to ~/.teal/keys)
26
+
#[arg(short, long)]
27
+
output: Option<PathBuf>,
28
+
29
+
/// Overwrite existing keys
30
+
#[arg(short, long)]
31
+
force: bool,
32
+
33
+
/// Output format: json, multibase, or files
34
+
#[arg(long, default_value = "files")]
35
+
format: String,
36
+
},
37
+
38
+
/// Extract public key multibase from private key
39
+
ExtractPubkey {
40
+
/// Path to private key file
41
+
#[arg(short, long)]
42
+
private_key: PathBuf,
43
+
44
+
/// Output format
45
+
#[arg(short, long, default_value = "multibase")]
46
+
format: String,
47
+
},
48
+
49
+
/// List available keys
50
+
List {
51
+
/// Keys directory (defaults to ~/.teal/keys)
52
+
#[arg(short, long)]
53
+
directory: Option<PathBuf>,
54
+
},
55
+
56
+
/// Rotate keys (generate new, backup old)
57
+
Rotate {
58
+
/// Key name to rotate
59
+
#[arg(short, long)]
60
+
name: String,
61
+
62
+
/// Backup directory
63
+
#[arg(short, long)]
64
+
backup_dir: Option<PathBuf>,
65
+
},
66
+
}
67
+
68
+
fn get_default_keys_dir() -> PathBuf {
69
+
dirs::home_dir()
70
+
.unwrap_or_else(|| PathBuf::from("."))
71
+
.join(".teal")
72
+
.join("keys")
73
+
}
74
+
75
+
#[tokio::main]
76
+
async fn main() -> Result<()> {
77
+
let cli = Cli::parse();
78
+
79
+
match cli.command {
80
+
Commands::GenKey {
81
+
name,
82
+
output,
83
+
force,
84
+
format,
85
+
} => {
86
+
let keys_dir = output.unwrap_or_else(get_default_keys_dir);
87
+
crypto::generate_key(name, keys_dir, force, format).await
88
+
}
89
+
Commands::ExtractPubkey {
90
+
private_key,
91
+
format,
92
+
} => crypto::extract_pubkey(private_key, format).await,
93
+
Commands::List { directory } => {
94
+
let keys_dir = directory.unwrap_or_else(get_default_keys_dir);
95
+
crypto::list_keys(keys_dir).await
96
+
}
97
+
Commands::Rotate { name, backup_dir } => {
98
+
let keys_dir = get_default_keys_dir();
99
+
crypto::rotate_key(keys_dir, name, backup_dir).await
100
+
}
101
+
}
102
+
}
+13
-1
turbo.json
+13
-1
turbo.json
···
23
},
24
"lex:gen-server": {
25
"dependsOn": [],
26
+
"outputs": ["./src/**"]
27
},
28
"lex:gen": {
29
"dependsOn": [],
···
43
},
44
"db:migrate": {
45
"cache": false
46
+
},
47
+
"@teal/amethyst#build": {
48
+
"dependsOn": ["@teal/lexicons#lex:gen-server"],
49
+
"outputs": ["./build/**"]
50
+
},
51
+
"@teal/amethyst#build:web": {
52
+
"dependsOn": ["@teal/lexicons#lex:gen-server"],
53
+
"outputs": ["./build/**"]
54
+
},
55
+
"@teal/amethyst#build:ios": {
56
+
"dependsOn": ["@teal/lexicons#lex:gen-server"],
57
+
"outputs": ["./build/**"]
58
}
59
}
60
}