+51
.dockerignore
+51
.dockerignore
···
1
+
# Rust build artifacts
2
+
target/
3
+
**/target/
4
+
services/target/
5
+
apps/*/target/
6
+
7
+
# Node.js dependencies and build artifacts
8
+
node_modules/
9
+
**/node_modules/
10
+
.turbo/
11
+
**/.turbo/
12
+
build/
13
+
dist/
14
+
.next/
15
+
16
+
# Development and cache files
17
+
.gitignore
18
+
**/.DS_Store
19
+
*.log
20
+
*.tmp
21
+
*.temp
22
+
23
+
# IDE and editor files
24
+
.vscode/
25
+
.idea/
26
+
*.swp
27
+
*.swo
28
+
*~
29
+
30
+
# Environment and config files
31
+
.env
32
+
.env.local
33
+
.env.*.local
34
+
35
+
# Database files
36
+
*.db
37
+
*.sqlite
38
+
*.sqlite3
39
+
40
+
# Test coverage
41
+
coverage/
42
+
**/coverage/
43
+
44
+
# Temporary files
45
+
tmp/
46
+
temp/
47
+
48
+
# SQLx offline query cache
49
+
# Include workspace-level cache for monorepo builds
50
+
# Uncomment the line below if you want to force online compilation
51
+
# .sqlx/
+22
.env.development
+22
.env.development
···
1
+
# Test Database Environment Configuration
2
+
# This file provides database credentials for testing discriminant improvements
3
+
4
+
# Database Configuration
5
+
DB_USER=postgres
6
+
DB_PASSWORD=testpass123
7
+
DB_NAME=teal_test
8
+
9
+
# Docker Database URL (used by services in compose)
10
+
DOCKER_DB_URL=postgres://postgres:testpass123@postgres:5432/teal_test
11
+
12
+
# Local Database URL (used by migration tools and local testing)
13
+
DATABASE_URL=postgres://postgres:testpass123@localhost:5433/teal_test
14
+
15
+
# Redis Configuration (if needed)
16
+
REDIS_URL=redis://garnet:6379
17
+
18
+
# AT Protocol Configuration (placeholder for testing)
19
+
AT_PROTOCOL_JWT_SECRET=test-jwt-secret-for-development-only
20
+
21
+
# Client Configuration
22
+
CLIENT_ADDRESS=localhost
+10
-7
.env.template
+10
-7
.env.template
···
2
2
NODE_ENV=development
3
3
PORT=3000
4
4
HOST=0.0.0.0
5
-
PUBLIC_URL=A publicly accessible url for aqua
5
+
PUBLIC_URL= # A publicly accessible url for aqua
6
6
DB_USER=postgres
7
7
DB_PASSWORD=supersecurepassword123987
8
8
DB_NAME=teal
9
9
DATABASE_URL="postgresql://${DB_USER}:${DB_PASSWORD}@localhost:5432/${DB_NAME}"
10
10
DOCKER_DB_URL="postgresql://${DB_USER}:${DB_PASSWORD}@host.docker.internal:5432/${DB_NAME}"
11
-
#This is not currently being used fully so can just use this default pubkey for now
11
+
# `cargo run --bin teal gen-key` to generate a new pubkey
12
12
DID_WEB_PUBKEY=zQ3sheEnMKhEK87PSu4P2mjAevViqHcjKmgxBWsDQPjLRM9wP
13
-
CLIENT_ADDRESS=A publicly accessible host for amethyst like amethyst.teal.fm
14
-
PUBLIC_DID_WEB=did:web:{aqua's PUBLIC_URL goes here after did:web:}
13
+
CLIENT_ADDRESS= # A publicly accessible host for amethyst like amethyst.teal.fm
14
+
PUBLIC_DID_WEB= # did:web:{aqua's PUBLIC_URL goes here after did:web:}
15
+
16
+
# amethyst
17
+
EXPO_PUBLIC_DID_WEB= # same as PUBLIC_DID_WEB
18
+
EXPO_PUBLIC_BASE_URL= # same as CLIENT_ADDRESS but with http scheme like https://amethyst.teal.fm
15
19
16
-
#amethyst
17
-
EXPO_PUBLIC_DID_WEB=same as PUBLIC_DID_WEB
18
-
EXPO_PUBLIC_BASE_URL=same as CLIENT_ADDRESS but with http scheme like https://amethyst.teal.fm
20
+
SQLX_OFFLINE=true
21
+
SQLX_OFFLINE_DIR="./.sqlx"
+201
.github/WORKFLOWS.md
+201
.github/WORKFLOWS.md
···
1
+
# GitHub Actions Workflows Documentation
2
+
3
+
This document describes the CI/CD workflows configured for the Teal project.
4
+
5
+
## Overview
6
+
7
+
The project uses GitHub Actions for continuous integration, deployment, and security scanning. The workflows are designed to handle a polyglot codebase with Rust services, Node.js packages, and a React Native application.
8
+
9
+
## Workflows
10
+
11
+
### ๐ง CI (`ci.yml`)
12
+
13
+
**Triggers:** Push/PR to `main` or `develop` branches
14
+
15
+
**Purpose:** Primary continuous integration workflow that runs tests, linting, and type checking.
16
+
17
+
**Jobs:**
18
+
- **rust-check**: Formats, lints (clippy), and tests all Rust code in both `services/` and `apps/`
19
+
- **node-check**: Type checking, linting, building, and testing Node.js packages
20
+
- **lexicon-check**: Validates lexicon files and ensures generated code is up to date
21
+
22
+
**Key Features:**
23
+
- Caches Rust and Node.js dependencies for faster builds
24
+
- Runs in parallel for optimal performance
25
+
- Fails fast if any check fails
26
+
27
+
### ๐ Aqua (`aqua.yml`)
28
+
29
+
**Triggers:** Push/PR to `main` with changes to `apps/aqua/**`
30
+
31
+
**Purpose:** Builds and pushes the Aqua Rust application Docker image.
32
+
33
+
**Features:**
34
+
- Multi-platform builds (linux/amd64, linux/arm64)
35
+
- Pushes to GitHub Container Registry (ghcr.io)
36
+
- Only pushes on main branch (not PRs)
37
+
- Uses GitHub Actions cache for Docker layers
38
+
39
+
### ๐ค Cadet (`cadet.yml`)
40
+
41
+
**Triggers:** Push/PR to `main` with changes to `services/cadet/**`
42
+
43
+
**Purpose:** Builds and pushes the Cadet Rust service Docker image.
44
+
45
+
**Features:**
46
+
- Multi-platform builds (linux/amd64, linux/arm64)
47
+
- Pushes to GitHub Container Registry (ghcr.io)
48
+
- Only pushes on main branch (not PRs)
49
+
- Uses GitHub Actions cache for Docker layers
50
+
51
+
### ๐ฎ Amethyst (`amethyst.yml`)
52
+
53
+
**Triggers:** Push/PR to `main` with changes to `apps/amethyst/**`
54
+
55
+
**Purpose:** Builds the React Native/Expo application for different platforms.
56
+
57
+
**Jobs:**
58
+
- **build-web**: Builds web version and uploads artifacts
59
+
- **build-ios**: Builds iOS version (only on main branch pushes, requires macOS runner)
60
+
- **lint-and-test**: Type checking and testing
61
+
62
+
**Features:**
63
+
- Generates lexicons before building
64
+
- Platform-specific builds
65
+
- Artifact uploads for build assets
66
+
67
+
### ๐ ๏ธ Services (`services.yml`)
68
+
69
+
**Triggers:** Push/PR to `main` with changes to `services/**`
70
+
71
+
**Purpose:** Dynamically detects and builds all services with Dockerfiles.
72
+
73
+
**Jobs:**
74
+
- **detect-services**: Scans for services with Dockerfiles
75
+
- **build-service**: Matrix build for each detected service
76
+
- **test-services**: Runs tests for all services
77
+
78
+
**Features:**
79
+
- Dynamic service detection
80
+
- Skips special directories (target, migrations, types, .sqlx)
81
+
- Per-service Docker caching
82
+
- Multi-platform builds
83
+
84
+
### ๐ Release (`release.yml`)
85
+
86
+
**Triggers:**
87
+
- Push to tags matching `v*`
88
+
- Manual workflow dispatch
89
+
90
+
**Purpose:** Creates GitHub releases and builds production Docker images.
91
+
92
+
**Jobs:**
93
+
- **create-release**: Creates GitHub release with changelog
94
+
- **build-and-release-aqua**: Builds and tags Aqua for release
95
+
- **build-and-release-cadet**: Builds and tags Cadet for release
96
+
- **release-other-services**: Builds other services (rocketman, satellite)
97
+
- **build-and-release-amethyst**: Builds Amethyst and uploads to release
98
+
99
+
**Features:**
100
+
- Automatic changelog extraction
101
+
- Production Docker tags (latest + version)
102
+
- Release artifact uploads
103
+
- Support for pre-releases (tags with `-`)
104
+
105
+
### ๐ Security (`security.yml`)
106
+
107
+
**Triggers:**
108
+
- Push/PR to `main` or `develop`
109
+
- Daily at 2 AM UTC
110
+
- Manual dispatch
111
+
112
+
**Purpose:** Comprehensive security scanning and vulnerability detection.
113
+
114
+
**Jobs:**
115
+
- **rust-security-audit**: Uses `cargo audit` for Rust dependencies
116
+
- **node-security-audit**: Uses `pnpm audit` for Node.js dependencies
117
+
- **codeql-analysis**: GitHub's semantic code analysis
118
+
- **docker-security-scan**: Trivy vulnerability scanning for Docker images
119
+
- **secrets-scan**: TruffleHog for secrets detection
120
+
121
+
**Features:**
122
+
- Fails on high/critical vulnerabilities
123
+
- SARIF upload for security tab integration
124
+
- Historical scanning with git history
125
+
126
+
## Configuration Files
127
+
128
+
### Dependabot (`dependabot.yml`)
129
+
130
+
Automated dependency updates for:
131
+
- **npm**: Weekly updates for Node.js dependencies
132
+
- **cargo**: Weekly updates for Rust dependencies (services + apps)
133
+
- **github-actions**: Weekly updates for workflow actions
134
+
- **docker**: Weekly updates for Docker base images
135
+
136
+
**Schedule:** Monday-Tuesday mornings, staggered to avoid conflicts
137
+
138
+
## Container Registry
139
+
140
+
All Docker images are pushed to GitHub Container Registry:
141
+
- `ghcr.io/[owner]/[repo]/aqua`
142
+
- `ghcr.io/[owner]/[repo]/cadet`
143
+
- `ghcr.io/[owner]/[repo]/[service-name]`
144
+
145
+
**Tags:**
146
+
- `latest`: Latest build from main branch
147
+
- `sha-[commit]`: Specific commit builds
148
+
- `v[version]`: Release builds
149
+
- `pr-[number]`: Pull request builds (for testing)
150
+
151
+
## Secrets and Permissions
152
+
153
+
**Required secrets:**
154
+
- `GITHUB_TOKEN`: Automatically provided (for registry access and releases)
155
+
156
+
**Permissions used:**
157
+
- `contents: read`: Read repository contents
158
+
- `packages: write`: Push to GitHub Container Registry
159
+
- `security-events: write`: Upload security scan results
160
+
- `actions: read`: Access workflow information
161
+
162
+
## Best Practices
163
+
164
+
1. **Path-based triggers**: Workflows only run when relevant files change
165
+
2. **Caching**: Aggressive caching for Rust, Node.js, and Docker layers
166
+
3. **Multi-platform**: Docker images built for amd64 and arm64
167
+
4. **Security-first**: Regular vulnerability scanning and secrets detection
168
+
5. **Fail-fast**: Early termination on critical issues
169
+
6. **Artifact preservation**: Build outputs stored for debugging/deployment
170
+
171
+
## Usage Examples
172
+
173
+
### Manual Release
174
+
```bash
175
+
# Tag and push for automatic release
176
+
git tag v1.0.0
177
+
git push origin v1.0.0
178
+
179
+
# Or use workflow dispatch in GitHub UI
180
+
```
181
+
182
+
### Local Development
183
+
```bash
184
+
# Run the same checks locally
185
+
pnpm rust:fmt
186
+
pnpm rust:clippy
187
+
pnpm typecheck
188
+
pnpm test
189
+
```
190
+
191
+
### Debugging Failed Builds
192
+
1. Check the Actions tab for detailed logs
193
+
2. Download artifacts from successful builds
194
+
3. Use the same commands locally with cached dependencies
195
+
196
+
## Maintenance
197
+
198
+
- **Weekly**: Review Dependabot PRs
199
+
- **Monthly**: Update action versions if not auto-updated
200
+
- **Quarterly**: Review and update security scanning tools
201
+
- **As needed**: Add new services to release workflow matrix
+77
.github/actions/setup/action.yml
+77
.github/actions/setup/action.yml
···
1
+
name: "Setup Teal Environment"
2
+
description: "Sets up the common environment for Teal builds including Node.js, Rust, pnpm, and lexicons"
3
+
4
+
inputs:
5
+
setup-rust:
6
+
description: "Whether to setup Rust toolchain"
7
+
required: false
8
+
default: "false"
9
+
rust-components:
10
+
description: 'Rust components to install (e.g., "rustfmt,clippy")'
11
+
required: false
12
+
default: "rustfmt,clippy"
13
+
setup-node:
14
+
description: "Whether to setup Node.js and pnpm"
15
+
required: false
16
+
default: "true"
17
+
node-version:
18
+
description: "Node.js version to use"
19
+
required: false
20
+
default: "20"
21
+
lexicons-only-rust:
22
+
description: "Generate only Rust lexicons"
23
+
required: false
24
+
default: "false"
25
+
cache-key-suffix:
26
+
description: "Additional suffix for cache keys"
27
+
required: false
28
+
default: ""
29
+
30
+
runs:
31
+
using: "composite"
32
+
steps:
33
+
- name: Setup lexicons
34
+
shell: bash
35
+
run: ./scripts/setup-lexicons.sh
36
+
37
+
- name: Install pnpm
38
+
if: inputs.setup-node == 'true'
39
+
uses: pnpm/action-setup@v4
40
+
41
+
- name: Setup Node.js
42
+
if: inputs.setup-node == 'true'
43
+
uses: actions/setup-node@v4
44
+
with:
45
+
node-version: ${{ inputs.node-version }}
46
+
cache: "pnpm"
47
+
48
+
- name: Install Node dependencies
49
+
if: inputs.setup-node == 'true'
50
+
shell: bash
51
+
run: pnpm install --frozen-lockfile
52
+
53
+
- name: Generate lexicons
54
+
if: inputs.setup-node == 'true'
55
+
shell: bash
56
+
run: |
57
+
cd tools/lexicon-cli && pnpm i && pnpm build && cd ..
58
+
if [ "${{ inputs.lexicons-only-rust }}" = "true" ]; then
59
+
pnpm lex:gen --rust-only
60
+
else
61
+
pnpm lex:gen
62
+
fi
63
+
64
+
- name: Install Rust toolchain
65
+
if: inputs.setup-rust == 'true'
66
+
uses: dtolnay/rust-toolchain@stable
67
+
with:
68
+
components: ${{ inputs.rust-components }}
69
+
70
+
- name: Cache Rust dependencies
71
+
if: inputs.setup-rust == 'true'
72
+
uses: Swatinem/rust-cache@v2
73
+
with:
74
+
workspaces: |
75
+
services
76
+
apps/aqua
77
+
key: ${{ inputs.cache-key-suffix }}
+91
.github/workflows/amethyst.yml
+91
.github/workflows/amethyst.yml
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: Build Amethyst
4
+
5
+
on:
6
+
push:
7
+
branches: [main]
8
+
paths:
9
+
- "apps/amethyst/**"
10
+
- "packages/**"
11
+
- "lexicons/**"
12
+
- "package.json"
13
+
- "pnpm-lock.yaml"
14
+
- ".github/workflows/amethyst.yml"
15
+
pull_request:
16
+
branches: [main]
17
+
paths:
18
+
- "apps/amethyst/**"
19
+
- "packages/**"
20
+
- "lexicons/**"
21
+
- "package.json"
22
+
- "pnpm-lock.yaml"
23
+
- ".github/workflows/amethyst.yml"
24
+
25
+
jobs:
26
+
build:
27
+
name: Build Amethyst
28
+
runs-on: ubuntu-latest
29
+
outputs:
30
+
build-cache-key: ${{ steps.cache-key.outputs.key }}
31
+
steps:
32
+
- name: Checkout repository
33
+
uses: actions/checkout@v4
34
+
35
+
- name: Setup environment
36
+
uses: ./.github/actions/setup
37
+
with:
38
+
setup-node: "true"
39
+
40
+
- name: Generate cache key
41
+
id: cache-key
42
+
run: echo "key=amethyst-build-${{ hashFiles('apps/amethyst/**', 'packages/**', 'lexicons/**') }}" >> $GITHUB_OUTPUT
43
+
44
+
- name: Build lex tool
45
+
run: cd tools/lexicon-cli && pnpm i && pnpm build
46
+
47
+
- name: Build web
48
+
run: pnpm turbo build:web --filter=@teal/amethyst
49
+
50
+
- name: Type check
51
+
run: pnpm turbo check-types --filter=@teal/amethyst
52
+
53
+
- name: Run tests
54
+
run: pnpm turbo test --filter=@teal/amethyst
55
+
56
+
- name: Upload web build artifacts
57
+
uses: actions/upload-artifact@v4
58
+
with:
59
+
name: amethyst-web-build
60
+
path: apps/amethyst/build/
61
+
retention-days: 7
62
+
63
+
build-ios:
64
+
name: Build iOS
65
+
runs-on: macos-latest
66
+
needs: build
67
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
68
+
steps:
69
+
- name: Checkout repository
70
+
uses: actions/checkout@v4
71
+
72
+
- name: Setup environment
73
+
uses: ./.github/actions/setup
74
+
with:
75
+
setup-node: "true"
76
+
77
+
- name: Build lex tool
78
+
run: cd tools/lexicon-cli && pnpm i && pnpm build
79
+
80
+
- name: Setup Expo CLI
81
+
run: npm install -g @expo/cli
82
+
83
+
- name: Build iOS
84
+
run: pnpm turbo build:ios --filter=@teal/amethyst
85
+
86
+
- name: Upload iOS build artifacts
87
+
uses: actions/upload-artifact@v4
88
+
with:
89
+
name: amethyst-ios-build
90
+
path: apps/amethyst/build/
91
+
retention-days: 7
+70
.github/workflows/aqua.yml
+70
.github/workflows/aqua.yml
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: Build and Push Aqua
4
+
5
+
on:
6
+
push:
7
+
branches:
8
+
- main
9
+
paths:
10
+
- "apps/aqua/**"
11
+
- "Cargo.toml"
12
+
- "Cargo.lock"
13
+
- ".github/workflows/aqua.yml"
14
+
pull_request:
15
+
branches:
16
+
- main
17
+
paths:
18
+
- "apps/aqua/**"
19
+
- "Cargo.toml"
20
+
- "Cargo.lock"
21
+
- ".github/workflows/aqua.yml"
22
+
23
+
env:
24
+
REGISTRY: ghcr.io
25
+
IMAGE_NAME: ${{ github.repository }}/aqua
26
+
27
+
jobs:
28
+
build-and-push:
29
+
runs-on: ubuntu-latest
30
+
permissions:
31
+
contents: read
32
+
packages: write
33
+
34
+
steps:
35
+
- name: Checkout repository
36
+
uses: actions/checkout@v4
37
+
38
+
- name: Log in to Container Registry
39
+
if: github.event_name != 'pull_request'
40
+
uses: docker/login-action@v3
41
+
with:
42
+
registry: ${{ env.REGISTRY }}
43
+
username: ${{ github.actor }}
44
+
password: ${{ secrets.GITHUB_TOKEN }}
45
+
46
+
- name: Extract metadata
47
+
id: meta
48
+
uses: docker/metadata-action@v5
49
+
with:
50
+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
51
+
tags: |
52
+
type=ref,event=branch
53
+
type=ref,event=pr
54
+
type=sha,prefix=sha-
55
+
type=raw,value=latest,enable={{is_default_branch}}
56
+
57
+
- name: Set up Docker Buildx
58
+
uses: docker/setup-buildx-action@v3
59
+
60
+
- name: Build and push Docker image
61
+
uses: docker/build-push-action@v5
62
+
with:
63
+
context: .
64
+
file: ./apps/aqua/Dockerfile
65
+
push: ${{ github.event_name != 'pull_request' }}
66
+
tags: ${{ steps.meta.outputs.tags }}
67
+
labels: ${{ steps.meta.outputs.labels }}
68
+
platforms: linux/amd64,linux/arm64
69
+
cache-from: type=gha
70
+
cache-to: type=gha,mode=max
+68
.github/workflows/cadet.yml
+68
.github/workflows/cadet.yml
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: Build and Push Cadet
4
+
5
+
on:
6
+
push:
7
+
branches: [main]
8
+
paths:
9
+
- "services/cadet/**"
10
+
- "Cargo.toml"
11
+
- "Cargo.lock"
12
+
- ".github/workflows/cadet.yml"
13
+
pull_request:
14
+
branches: [main]
15
+
paths:
16
+
- "services/cadet/**"
17
+
- "Cargo.toml"
18
+
- "Cargo.lock"
19
+
- ".github/workflows/cadet.yml"
20
+
21
+
env:
22
+
REGISTRY: ghcr.io
23
+
IMAGE_NAME: ${{ github.repository }}/cadet
24
+
25
+
jobs:
26
+
build-and-push:
27
+
runs-on: ubuntu-latest
28
+
permissions:
29
+
contents: read
30
+
packages: write
31
+
32
+
steps:
33
+
- name: Checkout repository
34
+
uses: actions/checkout@v4
35
+
36
+
- name: Log in to Container Registry
37
+
if: github.event_name != 'pull_request'
38
+
uses: docker/login-action@v3
39
+
with:
40
+
registry: ${{ env.REGISTRY }}
41
+
username: ${{ github.actor }}
42
+
password: ${{ secrets.GITHUB_TOKEN }}
43
+
44
+
- name: Extract metadata
45
+
id: meta
46
+
uses: docker/metadata-action@v5
47
+
with:
48
+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
49
+
tags: |
50
+
type=ref,event=branch
51
+
type=ref,event=pr
52
+
type=sha,prefix=sha-
53
+
type=raw,value=latest,enable={{is_default_branch}}
54
+
55
+
- name: Set up Docker Buildx
56
+
uses: docker/setup-buildx-action@v3
57
+
58
+
- name: Build and push Docker image
59
+
uses: docker/build-push-action@v5
60
+
with:
61
+
context: .
62
+
file: ./services/cadet/Dockerfile
63
+
push: ${{ github.event_name != 'pull_request' }}
64
+
tags: ${{ steps.meta.outputs.tags }}
65
+
labels: ${{ steps.meta.outputs.labels }}
66
+
platforms: linux/amd64,linux/arm64
67
+
cache-from: type=gha
68
+
cache-to: type=gha,mode=max
+217
.github/workflows/ci.yml
+217
.github/workflows/ci.yml
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: CI
4
+
5
+
on:
6
+
push:
7
+
branches: [main, develop]
8
+
pull_request:
9
+
branches: [main, develop]
10
+
11
+
env:
12
+
CARGO_TERM_COLOR: always
13
+
SQLX_OFFLINE: true
14
+
SQLX_OFFLINE_DIR: "./.sqlx"
15
+
16
+
jobs:
17
+
setup-and-build:
18
+
name: Setup and Build All
19
+
runs-on: ubuntu-latest
20
+
outputs:
21
+
rust-cache-key: ${{ steps.rust-cache.outputs.cache-hit }}
22
+
node-cache-key: ${{ steps.node-cache.outputs.cache-hit }}
23
+
steps:
24
+
- name: Checkout repository
25
+
uses: actions/checkout@v4
26
+
27
+
- name: Setup environment
28
+
uses: ./.github/actions/setup
29
+
with:
30
+
setup-rust: "true"
31
+
setup-node: "true"
32
+
cache-key-suffix: "ci-build"
33
+
34
+
- name: Setup SQLx offline files
35
+
run: ./scripts/setup-sqlx-offline.sh
36
+
37
+
- name: Build Node packages
38
+
run: pnpm build
39
+
40
+
- name: Build Rust services (x86_64)
41
+
run: |
42
+
cargo build --release --all-features
43
+
44
+
- name: Build Rust apps (x86_64)
45
+
run: |
46
+
cd apps/aqua
47
+
cargo build --release --all-features
48
+
49
+
- name: Collect executables (x86_64)
50
+
run: |
51
+
mkdir -p artifacts/x86_64
52
+
# Copy service executables
53
+
if [ -d "services/target/release" ]; then
54
+
find services/target/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/x86_64/ \;
55
+
fi
56
+
# Copy app executables
57
+
if [ -d "apps/aqua/target/release" ]; then
58
+
find apps/aqua/target/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/x86_64/ \;
59
+
fi
60
+
echo "x86_64 executables:"
61
+
ls -la artifacts/x86_64/ || echo "No executables found"
62
+
63
+
- name: Upload Node build artifacts
64
+
uses: actions/upload-artifact@v4
65
+
with:
66
+
name: node-builds
67
+
path: |
68
+
packages/*/dist/
69
+
apps/amethyst/build/
70
+
retention-days: 1
71
+
72
+
- name: Upload Rust build artifacts
73
+
uses: actions/upload-artifact@v4
74
+
with:
75
+
name: rust-builds-x86_64
76
+
path: |
77
+
artifacts/x86_64/
78
+
retention-days: 1
79
+
80
+
rust-cross-compile:
81
+
name: Cross-compile Rust
82
+
runs-on: ubuntu-latest
83
+
needs: setup-and-build
84
+
strategy:
85
+
matrix:
86
+
target: [aarch64-unknown-linux-gnu]
87
+
steps:
88
+
- name: Checkout repository
89
+
uses: actions/checkout@v4
90
+
91
+
- name: Setup environment
92
+
uses: ./.github/actions/setup
93
+
with:
94
+
setup-rust: "true"
95
+
setup-node: "true"
96
+
lexicons-only-rust: "true"
97
+
cache-key-suffix: "cross-${{ matrix.target }}"
98
+
99
+
- name: Setup SQLx offline files
100
+
run: ./scripts/setup-sqlx-offline.sh
101
+
102
+
- name: Install cross-compilation tools
103
+
run: |
104
+
cargo install cross --git https://github.com/cross-rs/cross
105
+
rustup target add ${{ matrix.target }}
106
+
# Set up environment for cross-compilation
107
+
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
108
+
echo "CROSS_NO_WARNINGS=0" >> $GITHUB_ENV
109
+
110
+
- name: Cross-compile services
111
+
run: |
112
+
cross build --release --all-features --target ${{ matrix.target }}
113
+
114
+
- name: Collect cross-compiled executables
115
+
run: |
116
+
mkdir -p artifacts/${{ matrix.target }}
117
+
# Copy service executables
118
+
if [ -d "services/target/${{ matrix.target }}/release" ]; then
119
+
find services/target/${{ matrix.target }}/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/${{ matrix.target }}/ \;
120
+
fi
121
+
# Copy app executables
122
+
if [ -d "apps/aqua/target/${{ matrix.target }}/release" ]; then
123
+
find apps/aqua/target/${{ matrix.target }}/release -maxdepth 1 -type f -executable ! -name "*.d" ! -name "*-*" -exec cp {} artifacts/${{ matrix.target }}/ \;
124
+
fi
125
+
echo "Cross-compiled executables for ${{ matrix.target }}:"
126
+
ls -la artifacts/${{ matrix.target }}/ || echo "No executables found"
127
+
128
+
- name: Upload cross-compiled artifacts
129
+
uses: actions/upload-artifact@v4
130
+
with:
131
+
name: rust-builds-${{ matrix.target }}
132
+
path: |
133
+
artifacts/${{ matrix.target }}/
134
+
retention-days: 1
135
+
136
+
# disabled b/c it's triggered on autogenerated content
137
+
# and can't find a way around it rn
138
+
139
+
# rust-quality:
140
+
# name: Rust Quality Checks
141
+
# runs-on: ubuntu-latest
142
+
# needs: setup-and-build
143
+
# steps:
144
+
# - name: Checkout repository
145
+
# uses: actions/checkout@v4
146
+
147
+
# - name: Setup environment
148
+
# uses: ./.github/actions/setup
149
+
# with:
150
+
# setup-rust: "true"
151
+
# setup-node: "true"
152
+
# lexicons-only-rust: "true"
153
+
# cache-key-suffix: "ci-build"
154
+
155
+
# - name: Setup SQLx offline files
156
+
# run: ./scripts/setup-sqlx-offline.sh
157
+
158
+
# # - name: Check Rust formatting
159
+
# # run: |
160
+
# # cargo fmt --all -- --check
161
+
162
+
# - name: Run Clippy
163
+
# run: |
164
+
# cargo clippy --all-targets --all-features --workspace --exclude types -- -D warnings
165
+
166
+
# - name: Run Rust tests
167
+
# run: |
168
+
# cargo test --all-features
169
+
170
+
# node-quality:
171
+
# name: Node.js Quality Checks
172
+
# runs-on: ubuntu-latest
173
+
# needs: setup-and-build
174
+
# steps:
175
+
# - name: Checkout repository
176
+
# uses: actions/checkout@v4
177
+
178
+
# - name: Setup environment
179
+
# uses: ./.github/actions/setup
180
+
# with:
181
+
# setup-node: "true"
182
+
# cache-key-suffix: "ci-build"
183
+
184
+
# - name: Download Node build artifacts
185
+
# uses: actions/download-artifact@v4
186
+
# with:
187
+
# name: node-builds
188
+
# path: .
189
+
190
+
# # - name: Type check
191
+
# # run: pnpm typecheck
192
+
193
+
# - name: Lint and format check
194
+
# run: pnpm fix --check
195
+
196
+
# - name: Run tests
197
+
# run: pnpm test
198
+
199
+
lexicon-validation:
200
+
name: Lexicon Validation
201
+
runs-on: ubuntu-latest
202
+
steps:
203
+
- name: Checkout repository
204
+
uses: actions/checkout@v4
205
+
206
+
- name: Setup environment
207
+
uses: ./.github/actions/setup
208
+
with:
209
+
setup-node: "true"
210
+
211
+
- name: Validate lexicons
212
+
run: pnpm lex:validate
213
+
214
+
- name: Check lexicon generation consistency
215
+
run: |
216
+
pnpm lex:gen
217
+
git diff --exit-code || (echo "Lexicon files are out of sync. Run 'pnpm lex:gen' locally." && exit 1)
+236
.github/workflows/release.yml
+236
.github/workflows/release.yml
···
1
+
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
2
+
3
+
name: Release
4
+
5
+
on:
6
+
push:
7
+
tags:
8
+
- "v*"
9
+
workflow_dispatch:
10
+
inputs:
11
+
tag:
12
+
description: "Release tag"
13
+
required: true
14
+
type: string
15
+
16
+
env:
17
+
REGISTRY: ghcr.io
18
+
CARGO_TERM_COLOR: always
19
+
SQLX_OFFLINE: true
20
+
21
+
jobs:
22
+
create-release:
23
+
name: Create Release
24
+
runs-on: ubuntu-latest
25
+
outputs:
26
+
release_id: ${{ steps.create_release.outputs.id }}
27
+
upload_url: ${{ steps.create_release.outputs.upload_url }}
28
+
tag: ${{ steps.tag.outputs.tag }}
29
+
steps:
30
+
- name: Checkout repository
31
+
uses: actions/checkout@v4
32
+
33
+
- name: Get tag name
34
+
id: tag
35
+
run: |
36
+
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
37
+
echo "tag=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
38
+
else
39
+
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
40
+
fi
41
+
42
+
- name: Generate changelog
43
+
id: changelog
44
+
run: |
45
+
if [ -f "CHANGELOG.md" ]; then
46
+
# Extract changelog for this version
47
+
awk '/^## \[${{ steps.tag.outputs.tag }}\]/{flag=1; next} /^## \[/{flag=0} flag' CHANGELOG.md > release_notes.md
48
+
else
49
+
echo "Release ${{ steps.tag.outputs.tag }}" > release_notes.md
50
+
fi
51
+
52
+
- name: Create Release
53
+
id: create_release
54
+
uses: actions/create-release@v1
55
+
env:
56
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
57
+
with:
58
+
tag_name: ${{ steps.tag.outputs.tag }}
59
+
release_name: Release ${{ steps.tag.outputs.tag }}
60
+
body_path: release_notes.md
61
+
draft: false
62
+
prerelease: ${{ contains(steps.tag.outputs.tag, '-') }}
63
+
64
+
build-all:
65
+
name: Build All Artifacts
66
+
runs-on: ubuntu-latest
67
+
needs: create-release
68
+
outputs:
69
+
rust-artifacts: ${{ steps.upload-rust.outputs.artifact-id }}
70
+
node-artifacts: ${{ steps.upload-node.outputs.artifact-id }}
71
+
steps:
72
+
- name: Checkout repository
73
+
uses: actions/checkout@v4
74
+
75
+
- name: Setup environment
76
+
uses: ./.github/actions/setup
77
+
with:
78
+
setup-rust: "true"
79
+
setup-node: "true"
80
+
cache-key-suffix: "release-${{ needs.create-release.outputs.tag }}"
81
+
82
+
- name: Install cross-compilation tools
83
+
run: |
84
+
cargo install cross
85
+
rustup target add aarch64-unknown-linux-gnu
86
+
87
+
- name: Build Node.js artifacts
88
+
run: |
89
+
pnpm build
90
+
cd apps/amethyst && pnpm build
91
+
92
+
- name: Build Rust services (x86_64)
93
+
run: |
94
+
cd services
95
+
cargo build --release --all-features
96
+
97
+
- name: Build Rust services (aarch64)
98
+
run: |
99
+
cd services
100
+
cross build --release --all-features --target aarch64-unknown-linux-gnu
101
+
102
+
- name: Build Rust apps (x86_64)
103
+
run: |
104
+
cd apps/aqua
105
+
cargo build --release --all-features
106
+
107
+
- name: Build Rust apps (aarch64)
108
+
run: |
109
+
cd apps/aqua
110
+
cross build --release --all-features --target aarch64-unknown-linux-gnu
111
+
112
+
- name: Create Amethyst build archive
113
+
run: |
114
+
cd apps/amethyst
115
+
tar -czf amethyst-${{ needs.create-release.outputs.tag }}.tar.gz build/
116
+
117
+
- name: Upload Rust build artifacts
118
+
id: upload-rust
119
+
uses: actions/upload-artifact@v4
120
+
with:
121
+
name: rust-release-builds
122
+
path: |
123
+
target/release/
124
+
target/aarch64-unknown-linux-gnu/release/
125
+
apps/aqua/target/release/
126
+
apps/aqua/target/aarch64-unknown-linux-gnu/release/
127
+
retention-days: 7
128
+
129
+
- name: Upload Node build artifacts
130
+
id: upload-node
131
+
uses: actions/upload-artifact@v4
132
+
with:
133
+
name: node-release-builds
134
+
path: |
135
+
packages/*/dist/
136
+
apps/amethyst/build/
137
+
apps/amethyst/amethyst-${{ needs.create-release.outputs.tag }}.tar.gz
138
+
retention-days: 7
139
+
140
+
- name: Upload Amethyst build to release
141
+
uses: actions/upload-release-asset@v1
142
+
env:
143
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
144
+
with:
145
+
upload_url: ${{ needs.create-release.outputs.upload_url }}
146
+
asset_path: ./apps/amethyst/amethyst-${{ needs.create-release.outputs.tag }}.tar.gz
147
+
asset_name: amethyst-${{ needs.create-release.outputs.tag }}.tar.gz
148
+
asset_content_type: application/gzip
149
+
150
+
release-services:
151
+
name: Release Services
152
+
runs-on: ubuntu-latest
153
+
needs: [create-release, build-all]
154
+
permissions:
155
+
contents: read
156
+
packages: write
157
+
strategy:
158
+
matrix:
159
+
service:
160
+
- name: aqua
161
+
dockerfile: apps/aqua/Dockerfile
162
+
context: .
163
+
- name: cadet
164
+
dockerfile: services/cadet/Dockerfile
165
+
context: .
166
+
- name: rocketman
167
+
dockerfile: services/rocketman/Dockerfile
168
+
context: .
169
+
- name: satellite
170
+
dockerfile: services/satellite/Dockerfile
171
+
context: .
172
+
steps:
173
+
- name: Checkout repository
174
+
uses: actions/checkout@v4
175
+
176
+
- name: Check if service has Dockerfile
177
+
id: check
178
+
run: |
179
+
if [ -f "${{ matrix.service.dockerfile }}" ]; then
180
+
echo "has_dockerfile=true" >> $GITHUB_OUTPUT
181
+
echo "Service ${{ matrix.service.name }} has Dockerfile"
182
+
else
183
+
echo "has_dockerfile=false" >> $GITHUB_OUTPUT
184
+
echo "Service ${{ matrix.service.name }} does not have Dockerfile, skipping"
185
+
fi
186
+
187
+
- name: Setup environment
188
+
if: steps.check.outputs.has_dockerfile == 'true'
189
+
uses: ./.github/actions/setup
190
+
with:
191
+
setup-node: "true"
192
+
lexicons-only-rust: "true"
193
+
194
+
- name: Download build artifacts
195
+
if: steps.check.outputs.has_dockerfile == 'true'
196
+
uses: actions/download-artifact@v4
197
+
with:
198
+
name: rust-release-builds
199
+
path: .
200
+
201
+
- name: Log in to Container Registry
202
+
if: steps.check.outputs.has_dockerfile == 'true'
203
+
uses: docker/login-action@v3
204
+
with:
205
+
registry: ${{ env.REGISTRY }}
206
+
username: ${{ github.actor }}
207
+
password: ${{ secrets.GITHUB_TOKEN }}
208
+
209
+
- name: Extract metadata
210
+
if: steps.check.outputs.has_dockerfile == 'true'
211
+
id: meta
212
+
uses: docker/metadata-action@v5
213
+
with:
214
+
images: ${{ env.REGISTRY }}/${{ github.repository }}/${{ matrix.service.name }}
215
+
tags: |
216
+
type=raw,value=latest
217
+
type=raw,value=${{ needs.create-release.outputs.tag }}
218
+
219
+
- name: Set up Docker Buildx
220
+
if: steps.check.outputs.has_dockerfile == 'true'
221
+
uses: docker/setup-buildx-action@v3
222
+
223
+
- name: Build and push Docker image
224
+
if: steps.check.outputs.has_dockerfile == 'true'
225
+
uses: docker/build-push-action@v5
226
+
with:
227
+
context: ${{ matrix.service.context }}
228
+
file: ${{ matrix.service.dockerfile }}
229
+
push: true
230
+
tags: ${{ steps.meta.outputs.tags }}
231
+
labels: ${{ steps.meta.outputs.labels }}
232
+
platforms: linux/amd64,linux/arm64
233
+
cache-from: type=gha,scope=${{ matrix.service.name }}
234
+
cache-to: type=gha,mode=max,scope=${{ matrix.service.name }}
235
+
build-args: |
236
+
BUILDKIT_INLINE_CACHE=1
+4
-17
.gitignore
+4
-17
.gitignore
···
55
55
56
56
# generated lexicons
57
57
# js lexicons
58
-
*/**/lexicons
58
+
packages/lexicons/src
59
59
# rust lexicons (types :)))
60
-
*/**/types
60
+
services/types/src
61
61
62
62
# vendor directory for submodules
63
63
!vendor/
···
66
66
vendor/**/dist/
67
67
vendor/**/node_modules/
68
68
69
-
# lexicons directory structure
70
-
!lexicons/
71
-
# Track our custom lexicons
72
-
!lexicons/fm.teal.alpha/
73
-
!lexicons/fm.teal.alpha/**/*.json
74
-
# Track the symlinks to atproto lexicons
75
-
!lexicons/app
76
-
!lexicons/chat
77
-
!lexicons/com
78
-
!lexicons/tools
79
-
# But ignore any generated files within lexicons
80
-
lexicons/**/*.js
81
-
lexicons/**/*.d.ts
82
-
lexicons/**/dist/
83
-
lexicons/**/node_modules/
69
+
# claude
70
+
.claude
+126
.pre-commit-config.yaml
+126
.pre-commit-config.yaml
···
1
+
# Pre-commit configuration for Teal project
2
+
# Install with: pip install pre-commit && pre-commit install
3
+
# Run manually with: pre-commit run --all-files
4
+
5
+
repos:
6
+
# General file checks
7
+
- repo: https://github.com/pre-commit/pre-commit-hooks
8
+
rev: v4.6.0
9
+
hooks:
10
+
- id: trailing-whitespace
11
+
- id: end-of-file-fixer
12
+
- id: check-yaml
13
+
- id: check-json
14
+
- id: check-toml
15
+
- id: check-merge-conflict
16
+
- id: check-added-large-files
17
+
args: ["--maxkb=500"]
18
+
- id: mixed-line-ending
19
+
args: ["--fix=lf"]
20
+
21
+
# TypeScript/JavaScript formatting and linting
22
+
- repo: local
23
+
hooks:
24
+
- id: prettier
25
+
name: Prettier
26
+
entry: pnpm prettier --write
27
+
language: system
28
+
files: \.(ts|tsx|js|jsx|json|md|yaml|yml)$
29
+
pass_filenames: true
30
+
31
+
- id: biome-check
32
+
name: Biome Check
33
+
entry: pnpm biome check --apply
34
+
language: system
35
+
files: \.(ts|tsx|js|jsx)$
36
+
pass_filenames: false
37
+
38
+
# TypeScript check temporarily disabled due to vendor compilation issues
39
+
# - id: typescript-check
40
+
# name: TypeScript Check
41
+
# entry: pnpm typecheck
42
+
# language: system
43
+
# files: \.(ts|tsx)$
44
+
# pass_filenames: false
45
+
46
+
# Rust formatting and linting
47
+
- repo: local
48
+
hooks:
49
+
- id: cargo-fmt-services
50
+
name: Cargo Format (Services Workspace)
51
+
entry: bash -c 'cd services && cargo fmt'
52
+
language: system
53
+
files: services/.*\.rs$
54
+
pass_filenames: false
55
+
56
+
- id: cargo-clippy-services
57
+
name: Cargo Clippy (Services Workspace)
58
+
entry: bash -c 'cd services && cargo clippy -- -D warnings'
59
+
language: system
60
+
files: services/.*\.rs$
61
+
pass_filenames: false
62
+
63
+
- id: cargo-fmt-apps
64
+
name: Cargo Format (Apps)
65
+
entry: bash -c 'for dir in apps/*/; do if [ -f "$dir/Cargo.toml" ]; then cd "$dir" && cargo fmt && cd ../..; fi; done'
66
+
language: system
67
+
files: apps/.*\.rs$
68
+
pass_filenames: false
69
+
70
+
- id: cargo-clippy-apps
71
+
name: Cargo Clippy (Apps)
72
+
entry: bash -c 'for dir in apps/*/; do if [ -f "$dir/Cargo.toml" ]; then cd "$dir" && cargo clippy -- -D warnings && cd ../..; fi; done'
73
+
language: system
74
+
files: apps/.*\.rs$
75
+
pass_filenames: false
76
+
77
+
# Lexicon validation and generation
78
+
- repo: local
79
+
hooks:
80
+
- id: lexicon-validate
81
+
name: Validate Lexicons
82
+
entry: pnpm lex:validate
83
+
language: system
84
+
files: lexicons/.*\.json$
85
+
pass_filenames: false
86
+
87
+
- id: lexicon-generate
88
+
name: Generate Lexicons (files ignored by .gitignore)
89
+
entry: pnpm lex:gen-server
90
+
language: system
91
+
files: lexicons/.*\.json$
92
+
pass_filenames: false
93
+
always_run: false
94
+
95
+
# Optional: Additional checks
96
+
- repo: local
97
+
hooks:
98
+
- id: no-console-log
99
+
name: Check for console.log
100
+
entry: bash -c 'if grep -r "console\.log" --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" .; then echo "Found console.log statements. Please remove them."; exit 1; fi'
101
+
language: system
102
+
files: \.(ts|tsx|js|jsx)$
103
+
pass_filenames: false
104
+
105
+
- id: check-todos
106
+
name: Check for TODO/FIXME
107
+
entry: bash -c 'if grep -r -i "TODO\|FIXME" --include="*.ts" --include="*.tsx" --include="*.js" --include="*.jsx" --include="*.rs" .; then echo "Found TODO/FIXME comments. Consider addressing them."; fi'
108
+
language: system
109
+
files: \.(ts|tsx|js|jsx|rs)$
110
+
pass_filenames: false
111
+
verbose: true
112
+
113
+
# Global settings
114
+
default_language_version:
115
+
node: system
116
+
python: python3
117
+
118
+
# Skip certain hooks for specific file patterns
119
+
exclude: |
120
+
(?x)^(
121
+
vendor/.*|
122
+
node_modules/.*|
123
+
target/.*|
124
+
.git/.*|
125
+
.*\.lock$
126
+
)$
+46
.sqlx/query-00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75.json
+46
.sqlx/query-00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n ae1.name as synthetic_name,\n ae2.name as target_name,\n similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score,\n COUNT(ptae1.play_uri) as synthetic_plays,\n COUNT(ptae2.play_uri) as target_plays\n FROM artists_extended ae1\n CROSS JOIN artists_extended ae2\n LEFT JOIN play_to_artists_extended ptae1 ON ae1.id = ptae1.artist_id\n LEFT JOIN play_to_artists_extended ptae2 ON ae2.id = ptae2.artist_id\n WHERE ae1.id != ae2.id\n AND ae1.mbid_type = 'synthetic'\n AND ae2.mbid_type = 'musicbrainz'\n AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1\n GROUP BY ae1.id, ae1.name, ae2.id, ae2.name, similarity_score\n ORDER BY similarity_score DESC\n LIMIT 10\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "synthetic_name",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "target_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "similarity_score",
19
+
"type_info": "Float4"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "synthetic_plays",
24
+
"type_info": "Int8"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "target_plays",
29
+
"type_info": "Int8"
30
+
}
31
+
],
32
+
"parameters": {
33
+
"Left": [
34
+
"Float4"
35
+
]
36
+
},
37
+
"nullable": [
38
+
false,
39
+
false,
40
+
null,
41
+
null,
42
+
null
43
+
]
44
+
},
45
+
"hash": "00b655145e9033d951628a8bc69521815b3af632d0433f87d78c5403dd22eb75"
46
+
}
+12
.sqlx/query-0d7c3ef80c20dac6efd0fe3c430d7f41b1c90368ff99ce8a09f66bca63864d1e.json
+12
.sqlx/query-0d7c3ef80c20dac6efd0fe3c430d7f41b1c90368ff99ce8a09f66bca63864d1e.json
+35
.sqlx/query-0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273.json
+35
.sqlx/query-0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n pta.artist_mbid as mbid,\n pta.artist_name as name,\n COUNT(*) as play_count\n FROM plays p\n INNER JOIN play_to_artists pta ON p.uri = pta.play_uri\n WHERE p.did = $1\n AND pta.artist_mbid IS NOT NULL\n AND pta.artist_name IS NOT NULL\n GROUP BY pta.artist_mbid, pta.artist_name\n ORDER BY play_count DESC\n LIMIT $2\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "play_count",
19
+
"type_info": "Int8"
20
+
}
21
+
],
22
+
"parameters": {
23
+
"Left": [
24
+
"Text",
25
+
"Int8"
26
+
]
27
+
},
28
+
"nullable": [
29
+
false,
30
+
true,
31
+
null
32
+
]
33
+
},
34
+
"hash": "0e053ba402c8b769b697f60d189675eceb89f1d14e52174bda67dc65cc68d273"
35
+
}
+14
.sqlx/query-0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec.json
+14
.sqlx/query-0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM artists_extended WHERE id = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Int4"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "0f62d18dcac06b6da3fc90e2206af0fc21e46e42ce1402750f9cc4dd08b54cec"
14
+
}
+112
.sqlx/query-0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1.json
+112
.sqlx/query-0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays\n LEFT JOIN play_to_artists as pta ON uri = pta.play_uri\n WHERE did = ANY($1)\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time desc\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "uri",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "did",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "rkey",
19
+
"type_info": "Text"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "cid",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "isrc",
29
+
"type_info": "Text"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "duration",
34
+
"type_info": "Int4"
35
+
},
36
+
{
37
+
"ordinal": 6,
38
+
"name": "track_name",
39
+
"type_info": "Text"
40
+
},
41
+
{
42
+
"ordinal": 7,
43
+
"name": "played_time",
44
+
"type_info": "Timestamptz"
45
+
},
46
+
{
47
+
"ordinal": 8,
48
+
"name": "processed_time",
49
+
"type_info": "Timestamptz"
50
+
},
51
+
{
52
+
"ordinal": 9,
53
+
"name": "release_mbid",
54
+
"type_info": "Uuid"
55
+
},
56
+
{
57
+
"ordinal": 10,
58
+
"name": "release_name",
59
+
"type_info": "Text"
60
+
},
61
+
{
62
+
"ordinal": 11,
63
+
"name": "recording_mbid",
64
+
"type_info": "Uuid"
65
+
},
66
+
{
67
+
"ordinal": 12,
68
+
"name": "submission_client_agent",
69
+
"type_info": "Text"
70
+
},
71
+
{
72
+
"ordinal": 13,
73
+
"name": "music_service_base_domain",
74
+
"type_info": "Text"
75
+
},
76
+
{
77
+
"ordinal": 14,
78
+
"name": "origin_url",
79
+
"type_info": "Text"
80
+
},
81
+
{
82
+
"ordinal": 15,
83
+
"name": "artists",
84
+
"type_info": "Json"
85
+
}
86
+
],
87
+
"parameters": {
88
+
"Left": [
89
+
"TextArray"
90
+
]
91
+
},
92
+
"nullable": [
93
+
false,
94
+
false,
95
+
false,
96
+
false,
97
+
true,
98
+
true,
99
+
false,
100
+
true,
101
+
true,
102
+
true,
103
+
true,
104
+
true,
105
+
true,
106
+
true,
107
+
true,
108
+
null
109
+
]
110
+
},
111
+
"hash": "0ff59e15ce4faa50bb4b9996ae7877681060ed462a7905012f8097c9545f60b1"
112
+
}
+22
.sqlx/query-193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7.json
+22
.sqlx/query-193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT extract_discriminant($1)",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "extract_discriminant",
9
+
"type_info": "Text"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Text"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "193ac753fc587fa24887d8be61eea86f74de6a1a8d4546304fb023532dfaefe7"
22
+
}
+14
.sqlx/query-1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c.json
+14
.sqlx/query-1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM releases WHERE mbid = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Uuid"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "1d35c8cf83ad859a8c50986ef1f587fb7f9aef2067feccd8af89d3b03d88020c"
14
+
}
+14
.sqlx/query-1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363.json
+14
.sqlx/query-1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM play_to_artists WHERE play_uri = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "1e4e6b89ac28b1b6cb21c9fbab8f22348943b3f27e9ba9642785d33129f98363"
14
+
}
+22
.sqlx/query-28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e.json
+22
.sqlx/query-28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT extract_edition_discriminant($1)",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "extract_edition_discriminant",
9
+
"type_info": "Text"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Text"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "28b1d571a1d045115bcae785b2583f7bf6d02b0b19946b322192dd7f62748d4e"
22
+
}
+52
.sqlx/query-2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149.json
+52
.sqlx/query-2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n r1.mbid as release1_mbid,\n r1.name as release1_name,\n r2.mbid as release2_mbid,\n r2.name as release2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists\n FROM releases r1\n CROSS JOIN releases r2\n INNER JOIN plays p1 ON p1.release_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.release_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id -- Same artist\n AND (\n (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR\n (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))\n ) -- Same or no discriminants\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist\n ORDER BY similarity_score DESC, shared_artists DESC\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "release1_mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "release1_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "release2_mbid",
19
+
"type_info": "Uuid"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "release2_name",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "similarity_score",
29
+
"type_info": "Float4"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "shared_artists",
34
+
"type_info": "Int8"
35
+
}
36
+
],
37
+
"parameters": {
38
+
"Left": [
39
+
"Float4"
40
+
]
41
+
},
42
+
"nullable": [
43
+
false,
44
+
false,
45
+
false,
46
+
false,
47
+
null,
48
+
null
49
+
]
50
+
},
51
+
"hash": "2bdfb2ec8d91cffc761dc72be1a4f540e6cc918a9f7941bfdbefbea6f3dee149"
52
+
}
+14
.sqlx/query-2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a.json
+14
.sqlx/query-2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM play_to_artists_extended WHERE artist_id = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Int4"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "2c2f9db90b7465147a6a696a628e2542d51c42844162455230e702a87719588a"
14
+
}
+12
.sqlx/query-3d84a9e1ed05846bc931eea9b90fd88cae8b636968af4bd2f9b1a9927d15379d.json
+12
.sqlx/query-3d84a9e1ed05846bc931eea9b90fd88cae8b636968af4bd2f9b1a9927d15379d.json
+22
.sqlx/query-413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0.json
+22
.sqlx/query-413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT COUNT(*) FROM plays WHERE recording_mbid = $1",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "count",
9
+
"type_info": "Int8"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "413d8c111e295ddda68a47f38f6b9df88d4b45b149288caba54c339742a718a0"
22
+
}
+14
.sqlx/query-5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1.json
+14
.sqlx/query-5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n DELETE FROM profiles WHERE did = $1\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "5095c5a6b62d018f95c5c1f58c274b9682f33d918ab02d4d78963fa9ca9c07d1"
14
+
}
+112
.sqlx/query-651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0.json
+112
.sqlx/query-651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays\n LEFT JOIN play_to_artists as pta ON uri = pta.play_uri\n WHERE uri = $1\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time desc\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "uri",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "did",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "rkey",
19
+
"type_info": "Text"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "cid",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "isrc",
29
+
"type_info": "Text"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "duration",
34
+
"type_info": "Int4"
35
+
},
36
+
{
37
+
"ordinal": 6,
38
+
"name": "track_name",
39
+
"type_info": "Text"
40
+
},
41
+
{
42
+
"ordinal": 7,
43
+
"name": "played_time",
44
+
"type_info": "Timestamptz"
45
+
},
46
+
{
47
+
"ordinal": 8,
48
+
"name": "processed_time",
49
+
"type_info": "Timestamptz"
50
+
},
51
+
{
52
+
"ordinal": 9,
53
+
"name": "release_mbid",
54
+
"type_info": "Uuid"
55
+
},
56
+
{
57
+
"ordinal": 10,
58
+
"name": "release_name",
59
+
"type_info": "Text"
60
+
},
61
+
{
62
+
"ordinal": 11,
63
+
"name": "recording_mbid",
64
+
"type_info": "Uuid"
65
+
},
66
+
{
67
+
"ordinal": 12,
68
+
"name": "submission_client_agent",
69
+
"type_info": "Text"
70
+
},
71
+
{
72
+
"ordinal": 13,
73
+
"name": "music_service_base_domain",
74
+
"type_info": "Text"
75
+
},
76
+
{
77
+
"ordinal": 14,
78
+
"name": "origin_url",
79
+
"type_info": "Text"
80
+
},
81
+
{
82
+
"ordinal": 15,
83
+
"name": "artists",
84
+
"type_info": "Json"
85
+
}
86
+
],
87
+
"parameters": {
88
+
"Left": [
89
+
"Text"
90
+
]
91
+
},
92
+
"nullable": [
93
+
false,
94
+
false,
95
+
false,
96
+
false,
97
+
true,
98
+
true,
99
+
false,
100
+
true,
101
+
true,
102
+
true,
103
+
true,
104
+
true,
105
+
true,
106
+
true,
107
+
true,
108
+
null
109
+
]
110
+
},
111
+
"hash": "651c94b4edd5afa55c3679a5f8c1ef1cbe53f7dac01b050ec7ad9100950527c0"
112
+
}
+16
.sqlx/query-6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48.json
+16
.sqlx/query-6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n UPDATE play_to_artists_extended\n SET artist_id = $1, artist_name = $2\n WHERE artist_id = $3\n AND NOT EXISTS (\n SELECT 1 FROM play_to_artists_extended existing\n WHERE existing.play_uri = play_to_artists_extended.play_uri\n AND existing.artist_id = $1\n )\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Int4",
9
+
"Text",
10
+
"Int4"
11
+
]
12
+
},
13
+
"nullable": []
14
+
},
15
+
"hash": "6b1a3660fc7e391293278d11020b1f37ddf7446cbc73931c8e30ee38c2f3ae48"
16
+
}
+52
.sqlx/query-6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56.json
+52
.sqlx/query-6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n ae1.id as synthetic_id,\n ae1.name as synthetic_name,\n ae2.id as target_id,\n ae2.name as target_name,\n ae2.mbid as target_mbid,\n similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score\n FROM artists_extended ae1\n CROSS JOIN artists_extended ae2\n WHERE ae1.id != ae2.id\n AND ae1.mbid_type = 'synthetic'\n AND ae2.mbid_type = 'musicbrainz'\n AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1\n ORDER BY similarity_score DESC\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "synthetic_id",
9
+
"type_info": "Int4"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "synthetic_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "target_id",
19
+
"type_info": "Int4"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "target_name",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "target_mbid",
29
+
"type_info": "Uuid"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "similarity_score",
34
+
"type_info": "Float4"
35
+
}
36
+
],
37
+
"parameters": {
38
+
"Left": [
39
+
"Float4"
40
+
]
41
+
},
42
+
"nullable": [
43
+
false,
44
+
false,
45
+
false,
46
+
false,
47
+
true,
48
+
null
49
+
]
50
+
},
51
+
"hash": "6fec79345247c090a72f32d06cb53290156d41f49abba3a9280bc2bedc1c9c56"
52
+
}
+23
.sqlx/query-76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0.json
+23
.sqlx/query-76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'musicbrainz')\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n updated_at = NOW()\n RETURNING id;\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "id",
9
+
"type_info": "Int4"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid",
15
+
"Text"
16
+
]
17
+
},
18
+
"nullable": [
19
+
false
20
+
]
21
+
},
22
+
"hash": "76c4d9600293bb80c2a6009b2b823ba85b02f77442ce3a783643e89676fef9a0"
23
+
}
+29
.sqlx/query-7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85.json
+29
.sqlx/query-7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO plays (\n uri, cid, did, rkey, isrc, duration, track_name, played_time,\n processed_time, release_mbid, release_name, recording_mbid,\n submission_client_agent, music_service_base_domain, artist_names_raw,\n track_discriminant, release_discriminant\n ) VALUES (\n $1, $2, $3, $4, $5, $6, $7, $8,\n NOW(), $9, $10, $11, $12, $13, $14, $15, $16\n ) ON CONFLICT(uri) DO UPDATE SET\n isrc = EXCLUDED.isrc,\n duration = EXCLUDED.duration,\n track_name = EXCLUDED.track_name,\n played_time = EXCLUDED.played_time,\n processed_time = EXCLUDED.processed_time,\n release_mbid = EXCLUDED.release_mbid,\n release_name = EXCLUDED.release_name,\n recording_mbid = EXCLUDED.recording_mbid,\n submission_client_agent = EXCLUDED.submission_client_agent,\n music_service_base_domain = EXCLUDED.music_service_base_domain,\n artist_names_raw = EXCLUDED.artist_names_raw,\n track_discriminant = EXCLUDED.track_discriminant,\n release_discriminant = EXCLUDED.release_discriminant;\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Text",
10
+
"Text",
11
+
"Text",
12
+
"Text",
13
+
"Int4",
14
+
"Text",
15
+
"Timestamptz",
16
+
"Uuid",
17
+
"Text",
18
+
"Uuid",
19
+
"Text",
20
+
"Text",
21
+
"Jsonb",
22
+
"Text",
23
+
"Text"
24
+
]
25
+
},
26
+
"nullable": []
27
+
},
28
+
"hash": "7cdcd5e8ecada65d351a38c38cfda64ad3d9f04982181dbb32bde93ebd5adc85"
29
+
}
+16
.sqlx/query-7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10.json
+16
.sqlx/query-7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "UPDATE plays SET release_mbid = $1, release_name = $2 WHERE release_mbid = $3",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Uuid",
9
+
"Text",
10
+
"Uuid"
11
+
]
12
+
},
13
+
"nullable": []
14
+
},
15
+
"hash": "7cfece6879feb2653c647d1248913c9cf54bd02a20e9694c7f6d7e92f28f8d10"
16
+
}
+18
.sqlx/query-8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946.json
+18
.sqlx/query-8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO statii (uri, did, rkey, cid, record)\n VALUES ($1, $2, $3, $4, $5)\n ON CONFLICT (uri) DO UPDATE SET\n cid = EXCLUDED.cid,\n record = EXCLUDED.record,\n indexed_at = NOW();\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Text",
10
+
"Text",
11
+
"Text",
12
+
"Jsonb"
13
+
]
14
+
},
15
+
"nullable": []
16
+
},
17
+
"hash": "8758f5bb57feedca6cd65831f36aabc811e8b7072dc6bdbfd4a49242e5d7c946"
18
+
}
+34
.sqlx/query-97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59.json
+34
.sqlx/query-97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n p.release_mbid as mbid,\n p.release_name as name,\n COUNT(*) as play_count\n FROM plays p\n WHERE p.release_mbid IS NOT NULL\n AND p.release_name IS NOT NULL\n GROUP BY p.release_mbid, p.release_name\n ORDER BY play_count DESC\n LIMIT $1\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "play_count",
19
+
"type_info": "Int8"
20
+
}
21
+
],
22
+
"parameters": {
23
+
"Left": [
24
+
"Int8"
25
+
]
26
+
},
27
+
"nullable": [
28
+
true,
29
+
true,
30
+
null
31
+
]
32
+
},
33
+
"hash": "97e98ede9b32adab5e1ad9808ae827387eba7ad376fba8e41217862a76179f59"
34
+
}
+12
.sqlx/query-9af33e4329198dee7814519573b63858eaf69f08ad2959d96ffee5c8387af0ba.json
+12
.sqlx/query-9af33e4329198dee7814519573b63858eaf69f08ad2959d96ffee5c8387af0ba.json
+16
.sqlx/query-9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3.json
+16
.sqlx/query-9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) VALUES\n ($1, $2, $3)\n ON CONFLICT (play_uri, artist_id) DO NOTHING;\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Int4",
10
+
"Text"
11
+
]
12
+
},
13
+
"nullable": []
14
+
},
15
+
"hash": "9bac472357fa38a6e3bb38d02ebb56a6e11c85d4aff91096f8ea68f1196e8bd3"
16
+
}
+24
.sqlx/query-9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81.json
+24
.sqlx/query-9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO recordings (mbid, name, discriminant) VALUES ($1, $2, $3)\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n discriminant = COALESCE(EXCLUDED.discriminant, recordings.discriminant)\n RETURNING mbid;\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid",
15
+
"Text",
16
+
"Text"
17
+
]
18
+
},
19
+
"nullable": [
20
+
false
21
+
]
22
+
},
23
+
"hash": "9c08de3ad1dd8e005e6cf15694ad1878203772969a3b280c3db4193631a98f81"
24
+
}
+14
.sqlx/query-9d4e872755f90087f64f116d8fee340218e09b40ab8f94b5d9d17b9c39bf3d4f.json
+14
.sqlx/query-9d4e872755f90087f64f116d8fee340218e09b40ab8f94b5d9d17b9c39bf3d4f.json
+22
.sqlx/query-ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd.json
+22
.sqlx/query-ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT generate_synthetic_mbid($1)",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "generate_synthetic_mbid",
9
+
"type_info": "Uuid"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Text"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "ad02971766fb37f49f4a75a6414807606be0562574826f8fe88827c645c01acd"
22
+
}
+35
.sqlx/query-af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3.json
+35
.sqlx/query-af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n p.release_mbid as mbid,\n p.release_name as name,\n COUNT(*) as play_count\n FROM plays p\n WHERE p.did = $1\n AND p.release_mbid IS NOT NULL\n AND p.release_name IS NOT NULL\n GROUP BY p.release_mbid, p.release_name\n ORDER BY play_count DESC\n LIMIT $2\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "play_count",
19
+
"type_info": "Int8"
20
+
}
21
+
],
22
+
"parameters": {
23
+
"Left": [
24
+
"Text",
25
+
"Int8"
26
+
]
27
+
},
28
+
"nullable": [
29
+
true,
30
+
true,
31
+
null
32
+
]
33
+
},
34
+
"hash": "af5c1fdabaee1cbc49f89d1df92e13cbb4a0837e3c644de9c7cf8e33e170d2e3"
35
+
}
+46
.sqlx/query-b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453.json
+46
.sqlx/query-b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n r1.name as recording1_name,\n r2.name as recording2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists,\n STRING_AGG(DISTINCT ae.name, ', ') as artist_names\n FROM recordings r1\n CROSS JOIN recordings r2\n INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0\n ORDER BY similarity_score DESC\n LIMIT 5\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "recording1_name",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "recording2_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "similarity_score",
19
+
"type_info": "Float4"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "shared_artists",
24
+
"type_info": "Int8"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "artist_names",
29
+
"type_info": "Text"
30
+
}
31
+
],
32
+
"parameters": {
33
+
"Left": [
34
+
"Float4"
35
+
]
36
+
},
37
+
"nullable": [
38
+
false,
39
+
false,
40
+
null,
41
+
null,
42
+
null
43
+
]
44
+
},
45
+
"hash": "b0036bbbb21b71900394c33f4c1db6f8281159b68ca492f6977dc153c60ab453"
46
+
}
+15
.sqlx/query-b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56.json
+15
.sqlx/query-b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "UPDATE artists_extended SET name = $1, updated_at = NOW() WHERE id = $2",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Int4"
10
+
]
11
+
},
12
+
"nullable": []
13
+
},
14
+
"hash": "b23dc54aec3e2bee85f1e5874df7ad4cbaeb15ca043b244bbce224dfc26d8b56"
15
+
}
+65
.sqlx/query-b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837.json
+65
.sqlx/query-b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT\n p.avatar,\n p.banner,\n p.created_at,\n p.description,\n p.description_facets,\n p.did,\n p.display_name,\n s.record as status\n FROM profiles p\n LEFT JOIN statii s ON p.did = s.did AND s.rkey = 'self'\n WHERE (p.did = ANY($1))\n OR (p.handle = ANY($2))",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "avatar",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "banner",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "created_at",
19
+
"type_info": "Timestamptz"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "description",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "description_facets",
29
+
"type_info": "Jsonb"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "did",
34
+
"type_info": "Text"
35
+
},
36
+
{
37
+
"ordinal": 6,
38
+
"name": "display_name",
39
+
"type_info": "Text"
40
+
},
41
+
{
42
+
"ordinal": 7,
43
+
"name": "status",
44
+
"type_info": "Jsonb"
45
+
}
46
+
],
47
+
"parameters": {
48
+
"Left": [
49
+
"TextArray",
50
+
"TextArray"
51
+
]
52
+
},
53
+
"nullable": [
54
+
true,
55
+
true,
56
+
true,
57
+
true,
58
+
true,
59
+
false,
60
+
true,
61
+
true
62
+
]
63
+
},
64
+
"hash": "b4e829c20bb78b9db20eccd9827e0d2f7bdbeedbaa39f6b40d1ae8a1045d6837"
65
+
}
+34
.sqlx/query-b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e.json
+34
.sqlx/query-b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n pta.artist_mbid as mbid,\n pta.artist_name as name,\n COUNT(*) as play_count\n FROM plays p\n INNER JOIN play_to_artists pta ON p.uri = pta.play_uri\n WHERE pta.artist_mbid IS NOT NULL\n AND pta.artist_name IS NOT NULL\n GROUP BY pta.artist_mbid, pta.artist_name\n ORDER BY play_count DESC\n LIMIT $1\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "play_count",
19
+
"type_info": "Int8"
20
+
}
21
+
],
22
+
"parameters": {
23
+
"Left": [
24
+
"Int8"
25
+
]
26
+
},
27
+
"nullable": [
28
+
false,
29
+
true,
30
+
null
31
+
]
32
+
},
33
+
"hash": "b8bf07c21c04acf3b4d908b2db93643e497db9a1f01d4d51b99dfdbddd2d4c0e"
34
+
}
+21
.sqlx/query-b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded.json
+21
.sqlx/query-b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO profiles (did, handle, display_name, description, description_facets, avatar, banner, created_at)\n VALUES ($1, $2, $3, $4, $5, $6, $7, $8)\n ON CONFLICT (did) DO UPDATE SET\n display_name = EXCLUDED.display_name,\n description = EXCLUDED.description,\n description_facets = EXCLUDED.description_facets,\n avatar = EXCLUDED.avatar,\n banner = EXCLUDED.banner,\n created_at = EXCLUDED.created_at;\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text",
9
+
"Text",
10
+
"Text",
11
+
"Text",
12
+
"Jsonb",
13
+
"Text",
14
+
"Text",
15
+
"Timestamptz"
16
+
]
17
+
},
18
+
"nullable": []
19
+
},
20
+
"hash": "b9ca1a73cba5a29665e5f996fd33410054936bbd74cfd611767bf6a7893ebded"
21
+
}
+22
.sqlx/query-bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224.json
+22
.sqlx/query-bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "SELECT COUNT(*) FROM plays WHERE release_mbid = $1",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "count",
9
+
"type_info": "Int8"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid"
15
+
]
16
+
},
17
+
"nullable": [
18
+
null
19
+
]
20
+
},
21
+
"hash": "bbedc0ebf2ae8ecd086c089546f700e4c027150db583ae78ebba24da334c7224"
22
+
}
+12
.sqlx/query-bf9c6d3bf0f9594ae1c02dc85c9887b747aaa5f0c3e67d9381c3867c4f67ae6d.json
+12
.sqlx/query-bf9c6d3bf0f9594ae1c02dc85c9887b747aaa5f0c3e67d9381c3867c4f67ae6d.json
+46
.sqlx/query-cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c.json
+46
.sqlx/query-cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n r1.name as release1_name,\n r2.name as release2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists,\n STRING_AGG(DISTINCT ae.name, ', ') as artist_names\n FROM releases r1\n CROSS JOIN releases r2\n INNER JOIN plays p1 ON p1.release_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.release_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0\n ORDER BY similarity_score DESC\n LIMIT 5\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "release1_name",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "release2_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "similarity_score",
19
+
"type_info": "Float4"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "shared_artists",
24
+
"type_info": "Int8"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "artist_names",
29
+
"type_info": "Text"
30
+
}
31
+
],
32
+
"parameters": {
33
+
"Left": [
34
+
"Float4"
35
+
]
36
+
},
37
+
"nullable": [
38
+
false,
39
+
false,
40
+
null,
41
+
null,
42
+
null
43
+
]
44
+
},
45
+
"hash": "cbc1d1c3cfe95d3d223ab4bb125e301436c9d6bbf09376215aa43e7abc98d87c"
46
+
}
+15
.sqlx/query-cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2.json
+15
.sqlx/query-cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "UPDATE plays SET recording_mbid = $1 WHERE recording_mbid = $2",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Uuid",
9
+
"Uuid"
10
+
]
11
+
},
12
+
"nullable": []
13
+
},
14
+
"hash": "cdd7488f49e0b81ab138afaf173030ef4c37d195aee42cc6e5e2c6638cb6f3b2"
15
+
}
+14
.sqlx/query-d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45.json
+14
.sqlx/query-d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "DELETE FROM recordings WHERE mbid = $1",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Uuid"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "d5414741e228591937d2d3e743d0ed343ee2434cc86a8b726806959f024b7b45"
14
+
}
+14
.sqlx/query-d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961.json
+14
.sqlx/query-d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n DELETE FROM statii WHERE uri = $1\n ",
4
+
"describe": {
5
+
"columns": [],
6
+
"parameters": {
7
+
"Left": [
8
+
"Text"
9
+
]
10
+
},
11
+
"nullable": []
12
+
},
13
+
"hash": "d80a24e6b32f04c26d28823db4601960a926801000b5f37583c98ae168c7e961"
14
+
}
+112
.sqlx/query-f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0.json
+112
.sqlx/query-f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url,\n COALESCE(\n json_agg(\n json_build_object(\n 'artist_mbid', pta.artist_mbid,\n 'artist_name', pta.artist_name\n )\n ) FILTER (WHERE pta.artist_name IS NOT NULL),\n '[]'\n ) AS artists\n FROM plays p\n LEFT JOIN play_to_artists as pta ON p.uri = pta.play_uri\n GROUP BY uri, did, rkey, cid, isrc, duration, track_name, played_time, processed_time,\n release_mbid, release_name, recording_mbid, submission_client_agent,\n music_service_base_domain, origin_url\n ORDER BY processed_time DESC\n LIMIT $1\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "uri",
9
+
"type_info": "Text"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "did",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "rkey",
19
+
"type_info": "Text"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "cid",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "isrc",
29
+
"type_info": "Text"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "duration",
34
+
"type_info": "Int4"
35
+
},
36
+
{
37
+
"ordinal": 6,
38
+
"name": "track_name",
39
+
"type_info": "Text"
40
+
},
41
+
{
42
+
"ordinal": 7,
43
+
"name": "played_time",
44
+
"type_info": "Timestamptz"
45
+
},
46
+
{
47
+
"ordinal": 8,
48
+
"name": "processed_time",
49
+
"type_info": "Timestamptz"
50
+
},
51
+
{
52
+
"ordinal": 9,
53
+
"name": "release_mbid",
54
+
"type_info": "Uuid"
55
+
},
56
+
{
57
+
"ordinal": 10,
58
+
"name": "release_name",
59
+
"type_info": "Text"
60
+
},
61
+
{
62
+
"ordinal": 11,
63
+
"name": "recording_mbid",
64
+
"type_info": "Uuid"
65
+
},
66
+
{
67
+
"ordinal": 12,
68
+
"name": "submission_client_agent",
69
+
"type_info": "Text"
70
+
},
71
+
{
72
+
"ordinal": 13,
73
+
"name": "music_service_base_domain",
74
+
"type_info": "Text"
75
+
},
76
+
{
77
+
"ordinal": 14,
78
+
"name": "origin_url",
79
+
"type_info": "Text"
80
+
},
81
+
{
82
+
"ordinal": 15,
83
+
"name": "artists",
84
+
"type_info": "Json"
85
+
}
86
+
],
87
+
"parameters": {
88
+
"Left": [
89
+
"Int8"
90
+
]
91
+
},
92
+
"nullable": [
93
+
false,
94
+
false,
95
+
false,
96
+
false,
97
+
true,
98
+
true,
99
+
false,
100
+
true,
101
+
true,
102
+
true,
103
+
true,
104
+
true,
105
+
true,
106
+
true,
107
+
true,
108
+
null
109
+
]
110
+
},
111
+
"hash": "f224b252a34a67a71266caca5affc5022e74dc42496aef9e61cec0e86d80f9d0"
112
+
}
+23
.sqlx/query-f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734.json
+23
.sqlx/query-f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'synthetic')\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n updated_at = NOW()\n RETURNING id;\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "id",
9
+
"type_info": "Int4"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid",
15
+
"Text"
16
+
]
17
+
},
18
+
"nullable": [
19
+
false
20
+
]
21
+
},
22
+
"hash": "f604394b9517a78f2dd81723bed6435b9c3a03922a50d86daa21bfb6d09ac734"
23
+
}
+24
.sqlx/query-f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672.json
+24
.sqlx/query-f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n INSERT INTO releases (mbid, name, discriminant) VALUES ($1, $2, $3)\n ON CONFLICT (mbid) DO UPDATE SET\n name = EXCLUDED.name,\n discriminant = COALESCE(EXCLUDED.discriminant, releases.discriminant)\n RETURNING mbid;\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "mbid",
9
+
"type_info": "Uuid"
10
+
}
11
+
],
12
+
"parameters": {
13
+
"Left": [
14
+
"Uuid",
15
+
"Text",
16
+
"Text"
17
+
]
18
+
},
19
+
"nullable": [
20
+
false
21
+
]
22
+
},
23
+
"hash": "f8caa11009d6220e139157dff83a0d3ffb37fcd8590527a5d7d3fc6e2e8f3672"
24
+
}
+28
.sqlx/query-fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7.json
+28
.sqlx/query-fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT\n ae.id,\n ae.name\n FROM artists_extended ae\n WHERE ae.mbid_type = 'musicbrainz'\n AND (\n LOWER(TRIM(ae.name)) = $1\n OR LOWER(TRIM(ae.name)) LIKE '%' || $1 || '%'\n OR $1 LIKE '%' || LOWER(TRIM(ae.name)) || '%'\n OR similarity(LOWER(TRIM(ae.name)), $1) > 0.6\n )\n ORDER BY similarity(LOWER(TRIM(ae.name)), $1) DESC\n LIMIT 10\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "id",
9
+
"type_info": "Int4"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "name",
14
+
"type_info": "Text"
15
+
}
16
+
],
17
+
"parameters": {
18
+
"Left": [
19
+
"Text"
20
+
]
21
+
},
22
+
"nullable": [
23
+
false,
24
+
false
25
+
]
26
+
},
27
+
"hash": "fd5f376dac5f38005efa3217c9614e377703c681e1510fc0c6539b1edee289b7"
28
+
}
+52
.sqlx/query-ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0.json
+52
.sqlx/query-ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0.json
···
1
+
{
2
+
"db_name": "PostgreSQL",
3
+
"query": "\n SELECT DISTINCT\n r1.mbid as recording1_mbid,\n r1.name as recording1_name,\n r2.mbid as recording2_mbid,\n r2.name as recording2_name,\n similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,\n COUNT(DISTINCT ptae1.artist_id) as shared_artists\n FROM recordings r1\n CROSS JOIN recordings r2\n INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid\n INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid\n INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri\n INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri\n WHERE r1.mbid != r2.mbid\n AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1\n AND ptae1.artist_id = ptae2.artist_id -- Same artist\n AND (\n (r1.discriminant IS NULL AND r2.discriminant IS NULL) OR\n (LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))\n ) -- Same or no discriminants\n GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score\n HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist\n ORDER BY similarity_score DESC, shared_artists DESC\n ",
4
+
"describe": {
5
+
"columns": [
6
+
{
7
+
"ordinal": 0,
8
+
"name": "recording1_mbid",
9
+
"type_info": "Uuid"
10
+
},
11
+
{
12
+
"ordinal": 1,
13
+
"name": "recording1_name",
14
+
"type_info": "Text"
15
+
},
16
+
{
17
+
"ordinal": 2,
18
+
"name": "recording2_mbid",
19
+
"type_info": "Uuid"
20
+
},
21
+
{
22
+
"ordinal": 3,
23
+
"name": "recording2_name",
24
+
"type_info": "Text"
25
+
},
26
+
{
27
+
"ordinal": 4,
28
+
"name": "similarity_score",
29
+
"type_info": "Float4"
30
+
},
31
+
{
32
+
"ordinal": 5,
33
+
"name": "shared_artists",
34
+
"type_info": "Int8"
35
+
}
36
+
],
37
+
"parameters": {
38
+
"Left": [
39
+
"Float4"
40
+
]
41
+
},
42
+
"nullable": [
43
+
false,
44
+
false,
45
+
false,
46
+
false,
47
+
null,
48
+
null
49
+
]
50
+
},
51
+
"hash": "ffa27ada5f1ef0d5c699277b88ad33aa6576f6d14a12ad61974e77d52b42eea0"
52
+
}
+616
-241
Cargo.lock
+616
-241
Cargo.lock
···
121
121
dependencies = [
122
122
"anyhow",
123
123
"async-trait",
124
+
"atmst",
124
125
"atrium-api",
125
126
"axum",
126
-
"base64",
127
+
"base64 0.22.1",
127
128
"chrono",
128
129
"clap",
129
130
"dotenvy",
···
165
166
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
166
167
167
168
[[package]]
169
+
name = "async-compression"
170
+
version = "0.4.27"
171
+
source = "registry+https://github.com/rust-lang/crates.io-index"
172
+
checksum = "ddb939d66e4ae03cee6091612804ba446b12878410cfa17f785f4dd67d4014e8"
173
+
dependencies = [
174
+
"flate2",
175
+
"futures-core",
176
+
"memchr",
177
+
"pin-project-lite",
178
+
"tokio",
179
+
]
180
+
181
+
[[package]]
168
182
name = "async-lock"
169
183
version = "3.4.0"
170
184
source = "registry+https://github.com/rust-lang/crates.io-index"
···
187
201
]
188
202
189
203
[[package]]
204
+
name = "atmst"
205
+
version = "0.0.1"
206
+
source = "registry+https://github.com/rust-lang/crates.io-index"
207
+
checksum = "aeb2a4631a64a242ae62c3ceb140adfa2a8bdacb1b22a6549db5de2ce3389c1d"
208
+
dependencies = [
209
+
"async-trait",
210
+
"bytes",
211
+
"cid 0.11.1",
212
+
"dashmap",
213
+
"futures",
214
+
"ipld-core",
215
+
"iroh-car",
216
+
"log",
217
+
"multihash 0.19.3",
218
+
"serde",
219
+
"serde_ipld_dagcbor",
220
+
"serde_ipld_dagjson",
221
+
"sha2",
222
+
"thiserror 1.0.69",
223
+
"tokio",
224
+
]
225
+
226
+
[[package]]
190
227
name = "atoi"
191
228
version = "2.0.0"
192
229
source = "registry+https://github.com/rust-lang/crates.io-index"
···
210
247
"atrium-common",
211
248
"atrium-xrpc",
212
249
"chrono",
213
-
"http",
250
+
"http 1.3.1",
214
251
"ipld-core",
215
252
"langtag",
216
253
"regex",
···
243
280
source = "registry+https://github.com/rust-lang/crates.io-index"
244
281
checksum = "0216ad50ce34e9ff982e171c3659e65dedaa2ed5ac2994524debdc9a9647ffa8"
245
282
dependencies = [
246
-
"http",
283
+
"http 1.3.1",
247
284
"serde",
248
285
"serde_html_form",
249
286
"serde_json",
···
259
296
260
297
[[package]]
261
298
name = "aws-lc-rs"
262
-
version = "1.13.2"
299
+
version = "1.13.3"
263
300
source = "registry+https://github.com/rust-lang/crates.io-index"
264
-
checksum = "08b5d4e069cbc868041a64bd68dc8cb39a0d79585cd6c5a24caa8c2d622121be"
301
+
checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba"
265
302
dependencies = [
266
303
"aws-lc-sys",
267
304
"zeroize",
···
291
328
"bytes",
292
329
"form_urlencoded",
293
330
"futures-util",
294
-
"http",
331
+
"http 1.3.1",
295
332
"http-body",
296
333
"http-body-util",
297
334
"hyper",
···
324
361
dependencies = [
325
362
"bytes",
326
363
"futures-core",
327
-
"http",
364
+
"http 1.3.1",
328
365
"http-body",
329
366
"http-body-util",
330
367
"mime",
···
348
385
]
349
386
350
387
[[package]]
388
+
name = "backon"
389
+
version = "1.5.2"
390
+
source = "registry+https://github.com/rust-lang/crates.io-index"
391
+
checksum = "592277618714fbcecda9a02ba7a8781f319d26532a88553bbacc77ba5d2b3a8d"
392
+
dependencies = [
393
+
"fastrand",
394
+
]
395
+
396
+
[[package]]
351
397
name = "backtrace"
352
398
version = "0.3.75"
353
399
source = "registry+https://github.com/rust-lang/crates.io-index"
···
369
415
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
370
416
371
417
[[package]]
418
+
name = "base16ct"
419
+
version = "0.2.0"
420
+
source = "registry+https://github.com/rust-lang/crates.io-index"
421
+
checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
422
+
423
+
[[package]]
424
+
name = "base64"
425
+
version = "0.21.7"
426
+
source = "registry+https://github.com/rust-lang/crates.io-index"
427
+
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
428
+
429
+
[[package]]
372
430
name = "base64"
373
431
version = "0.22.1"
374
432
source = "registry+https://github.com/rust-lang/crates.io-index"
···
397
455
"proc-macro2",
398
456
"quote",
399
457
"regex",
400
-
"rustc-hash",
458
+
"rustc-hash 1.1.0",
401
459
"shlex",
402
460
"syn 2.0.104",
403
461
"which",
···
504
562
version = "1.10.1"
505
563
source = "registry+https://github.com/rust-lang/crates.io-index"
506
564
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
565
+
dependencies = [
566
+
"serde",
567
+
]
507
568
508
569
[[package]]
509
570
name = "cadet"
···
511
572
dependencies = [
512
573
"anyhow",
513
574
"async-trait",
575
+
"atmst",
514
576
"atrium-api",
515
-
"base64",
577
+
"base64 0.22.1",
516
578
"chrono",
517
579
"cid 0.11.1",
518
580
"dotenvy",
519
581
"flume",
582
+
"futures",
520
583
"iroh-car",
521
584
"libipld",
522
585
"metrics 0.23.1",
···
528
591
"reqwest",
529
592
"rocketman",
530
593
"serde",
594
+
"serde_ipld_dagcbor",
531
595
"serde_json",
532
596
"sqlx",
533
597
"time",
534
598
"tokio",
535
-
"tokio-tungstenite",
599
+
"tokio-tungstenite 0.24.0",
536
600
"tracing",
537
601
"tracing-subscriber",
538
602
"types",
···
583
647
584
648
[[package]]
585
649
name = "cc"
586
-
version = "1.2.30"
650
+
version = "1.2.31"
587
651
source = "registry+https://github.com/rust-lang/crates.io-index"
588
-
checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
652
+
checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2"
589
653
dependencies = [
590
654
"jobserver",
591
655
"libc",
···
608
672
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
609
673
610
674
[[package]]
675
+
name = "cfg_aliases"
676
+
version = "0.2.1"
677
+
source = "registry+https://github.com/rust-lang/crates.io-index"
678
+
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
679
+
680
+
[[package]]
611
681
name = "chrono"
612
682
version = "0.4.41"
613
683
source = "registry+https://github.com/rust-lang/crates.io-index"
···
662
732
663
733
[[package]]
664
734
name = "clap"
665
-
version = "4.5.41"
735
+
version = "4.5.42"
666
736
source = "registry+https://github.com/rust-lang/crates.io-index"
667
-
checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
737
+
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
668
738
dependencies = [
669
739
"clap_builder",
670
740
"clap_derive",
···
672
742
673
743
[[package]]
674
744
name = "clap_builder"
675
-
version = "4.5.41"
745
+
version = "4.5.42"
676
746
source = "registry+https://github.com/rust-lang/crates.io-index"
677
-
checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
747
+
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
678
748
dependencies = [
679
749
"anstream",
680
750
"anstyle",
···
716
786
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
717
787
718
788
[[package]]
789
+
name = "colored"
790
+
version = "2.2.0"
791
+
source = "registry+https://github.com/rust-lang/crates.io-index"
792
+
checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
793
+
dependencies = [
794
+
"lazy_static",
795
+
"windows-sys 0.59.0",
796
+
]
797
+
798
+
[[package]]
719
799
name = "combine"
720
800
version = "4.6.7"
721
801
source = "registry+https://github.com/rust-lang/crates.io-index"
···
810
890
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
811
891
812
892
[[package]]
893
+
name = "crc32fast"
894
+
version = "1.5.0"
895
+
source = "registry+https://github.com/rust-lang/crates.io-index"
896
+
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
897
+
dependencies = [
898
+
"cfg-if",
899
+
]
900
+
901
+
[[package]]
813
902
name = "crossbeam-channel"
814
903
version = "0.5.15"
815
904
source = "registry+https://github.com/rust-lang/crates.io-index"
···
841
930
version = "0.8.21"
842
931
source = "registry+https://github.com/rust-lang/crates.io-index"
843
932
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
933
+
934
+
[[package]]
935
+
name = "crypto-bigint"
936
+
version = "0.5.5"
937
+
source = "registry+https://github.com/rust-lang/crates.io-index"
938
+
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
939
+
dependencies = [
940
+
"generic-array",
941
+
"rand_core 0.6.4",
942
+
"subtle",
943
+
"zeroize",
944
+
]
844
945
845
946
[[package]]
846
947
name = "crypto-common"
···
1026
1127
]
1027
1128
1028
1129
[[package]]
1130
+
name = "dirs"
1131
+
version = "5.0.1"
1132
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1133
+
checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
1134
+
dependencies = [
1135
+
"dirs-sys",
1136
+
]
1137
+
1138
+
[[package]]
1139
+
name = "dirs-sys"
1140
+
version = "0.4.1"
1141
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1142
+
checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c"
1143
+
dependencies = [
1144
+
"libc",
1145
+
"option-ext",
1146
+
"redox_users",
1147
+
"windows-sys 0.48.0",
1148
+
]
1149
+
1150
+
[[package]]
1029
1151
name = "displaydoc"
1030
1152
version = "0.2.5"
1031
1153
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1049
1171
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
1050
1172
1051
1173
[[package]]
1174
+
name = "ecdsa"
1175
+
version = "0.16.9"
1176
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1177
+
checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
1178
+
dependencies = [
1179
+
"der",
1180
+
"digest",
1181
+
"elliptic-curve",
1182
+
"rfc6979",
1183
+
"signature",
1184
+
"spki",
1185
+
]
1186
+
1187
+
[[package]]
1052
1188
name = "either"
1053
1189
version = "1.15.0"
1054
1190
source = "registry+https://github.com/rust-lang/crates.io-index"
1055
1191
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
1056
1192
dependencies = [
1057
1193
"serde",
1194
+
]
1195
+
1196
+
[[package]]
1197
+
name = "elliptic-curve"
1198
+
version = "0.13.8"
1199
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1200
+
checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
1201
+
dependencies = [
1202
+
"base16ct",
1203
+
"crypto-bigint",
1204
+
"digest",
1205
+
"ff",
1206
+
"generic-array",
1207
+
"group",
1208
+
"pkcs8",
1209
+
"rand_core 0.6.4",
1210
+
"sec1",
1211
+
"subtle",
1212
+
"zeroize",
1058
1213
]
1059
1214
1060
1215
[[package]]
···
1121
1276
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
1122
1277
1123
1278
[[package]]
1279
+
name = "ff"
1280
+
version = "0.13.1"
1281
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1282
+
checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
1283
+
dependencies = [
1284
+
"rand_core 0.6.4",
1285
+
"subtle",
1286
+
]
1287
+
1288
+
[[package]]
1289
+
name = "flate2"
1290
+
version = "1.1.2"
1291
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1292
+
checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
1293
+
dependencies = [
1294
+
"crc32fast",
1295
+
"miniz_oxide",
1296
+
]
1297
+
1298
+
[[package]]
1124
1299
name = "flume"
1125
1300
version = "0.11.1"
1126
1301
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1145
1320
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
1146
1321
1147
1322
[[package]]
1148
-
name = "foreign-types"
1149
-
version = "0.3.2"
1150
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1151
-
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
1152
-
dependencies = [
1153
-
"foreign-types-shared",
1154
-
]
1155
-
1156
-
[[package]]
1157
-
name = "foreign-types-shared"
1158
-
version = "0.1.1"
1159
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1160
-
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
1161
-
1162
-
[[package]]
1163
1323
name = "form_urlencoded"
1164
1324
version = "1.2.1"
1165
1325
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1296
1456
dependencies = [
1297
1457
"typenum",
1298
1458
"version_check",
1459
+
"zeroize",
1299
1460
]
1300
1461
1301
1462
[[package]]
···
1318
1479
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
1319
1480
dependencies = [
1320
1481
"cfg-if",
1482
+
"js-sys",
1321
1483
"libc",
1322
1484
"r-efi",
1323
1485
"wasi 0.14.2+wasi-0.2.4",
1486
+
"wasm-bindgen",
1324
1487
]
1325
1488
1326
1489
[[package]]
···
1336
1499
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
1337
1500
1338
1501
[[package]]
1502
+
name = "group"
1503
+
version = "0.13.0"
1504
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1505
+
checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
1506
+
dependencies = [
1507
+
"ff",
1508
+
"rand_core 0.6.4",
1509
+
"subtle",
1510
+
]
1511
+
1512
+
[[package]]
1339
1513
name = "h2"
1340
1514
version = "0.4.11"
1341
1515
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1346
1520
"fnv",
1347
1521
"futures-core",
1348
1522
"futures-sink",
1349
-
"http",
1523
+
"http 1.3.1",
1350
1524
"indexmap",
1351
1525
"slab",
1352
1526
"tokio",
···
1421
1595
1422
1596
[[package]]
1423
1597
name = "http"
1598
+
version = "0.2.12"
1599
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1600
+
checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
1601
+
dependencies = [
1602
+
"bytes",
1603
+
"fnv",
1604
+
"itoa",
1605
+
]
1606
+
1607
+
[[package]]
1608
+
name = "http"
1424
1609
version = "1.3.1"
1425
1610
source = "registry+https://github.com/rust-lang/crates.io-index"
1426
1611
checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
···
1437
1622
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
1438
1623
dependencies = [
1439
1624
"bytes",
1440
-
"http",
1625
+
"http 1.3.1",
1441
1626
]
1442
1627
1443
1628
[[package]]
···
1448
1633
dependencies = [
1449
1634
"bytes",
1450
1635
"futures-core",
1451
-
"http",
1636
+
"http 1.3.1",
1452
1637
"http-body",
1453
1638
"pin-project-lite",
1454
1639
]
···
1475
1660
"futures-channel",
1476
1661
"futures-util",
1477
1662
"h2",
1478
-
"http",
1663
+
"http 1.3.1",
1479
1664
"http-body",
1480
1665
"httparse",
1481
1666
"httpdate",
···
1492
1677
source = "registry+https://github.com/rust-lang/crates.io-index"
1493
1678
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
1494
1679
dependencies = [
1495
-
"http",
1680
+
"http 1.3.1",
1496
1681
"hyper",
1497
1682
"hyper-util",
1498
-
"rustls",
1499
-
"rustls-native-certs",
1683
+
"rustls 0.23.31",
1684
+
"rustls-native-certs 0.8.1",
1500
1685
"rustls-pki-types",
1501
1686
"tokio",
1502
-
"tokio-rustls",
1687
+
"tokio-rustls 0.26.2",
1503
1688
"tower-service",
1504
-
]
1505
-
1506
-
[[package]]
1507
-
name = "hyper-tls"
1508
-
version = "0.6.0"
1509
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1510
-
checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
1511
-
dependencies = [
1512
-
"bytes",
1513
-
"http-body-util",
1514
-
"hyper",
1515
-
"hyper-util",
1516
-
"native-tls",
1517
-
"tokio",
1518
-
"tokio-native-tls",
1519
-
"tower-service",
1689
+
"webpki-roots 1.0.2",
1520
1690
]
1521
1691
1522
1692
[[package]]
1523
1693
name = "hyper-util"
1524
-
version = "0.1.15"
1694
+
version = "0.1.16"
1525
1695
source = "registry+https://github.com/rust-lang/crates.io-index"
1526
-
checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df"
1696
+
checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
1527
1697
dependencies = [
1528
-
"base64",
1698
+
"base64 0.22.1",
1529
1699
"bytes",
1530
1700
"futures-channel",
1531
1701
"futures-core",
1532
1702
"futures-util",
1533
-
"http",
1703
+
"http 1.3.1",
1534
1704
"http-body",
1535
1705
"hyper",
1536
1706
"ipnet",
1537
1707
"libc",
1538
1708
"percent-encoding",
1539
1709
"pin-project-lite",
1540
-
"socket2 0.5.10",
1541
-
"system-configuration",
1710
+
"socket2 0.6.0",
1542
1711
"tokio",
1543
1712
"tower-service",
1544
1713
"tracing",
1545
-
"windows-registry",
1546
1714
]
1547
1715
1548
1716
[[package]]
···
1694
1862
1695
1863
[[package]]
1696
1864
name = "io-uring"
1697
-
version = "0.7.8"
1865
+
version = "0.7.9"
1698
1866
source = "registry+https://github.com/rust-lang/crates.io-index"
1699
-
checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
1867
+
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
1700
1868
dependencies = [
1701
1869
"bitflags 2.9.1",
1702
1870
"cfg-if",
···
1732
1900
1733
1901
[[package]]
1734
1902
name = "iroh-car"
1735
-
version = "0.4.0"
1903
+
version = "0.5.1"
1736
1904
source = "registry+https://github.com/rust-lang/crates.io-index"
1737
-
checksum = "475a6f0ebd64c87ea011021c67f10b57930f6c286e0163807066bfb83553b1b6"
1905
+
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
1738
1906
dependencies = [
1739
1907
"anyhow",
1740
-
"cid 0.10.1",
1908
+
"cid 0.11.1",
1741
1909
"futures",
1742
-
"libipld",
1910
+
"serde",
1911
+
"serde_ipld_dagcbor",
1743
1912
"thiserror 1.0.69",
1744
1913
"tokio",
1745
1914
"unsigned-varint 0.7.2",
···
1784
1953
dependencies = [
1785
1954
"once_cell",
1786
1955
"wasm-bindgen",
1956
+
]
1957
+
1958
+
[[package]]
1959
+
name = "k256"
1960
+
version = "0.13.4"
1961
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1962
+
checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
1963
+
dependencies = [
1964
+
"cfg-if",
1965
+
"ecdsa",
1966
+
"elliptic-curve",
1967
+
"once_cell",
1968
+
"sha2",
1969
+
"signature",
1787
1970
]
1788
1971
1789
1972
[[package]]
···
1921
2104
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
1922
2105
dependencies = [
1923
2106
"cfg-if",
1924
-
"windows-targets 0.53.2",
2107
+
"windows-targets 0.53.3",
1925
2108
]
1926
2109
1927
2110
[[package]]
···
1931
2114
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
1932
2115
1933
2116
[[package]]
2117
+
name = "libredox"
2118
+
version = "0.1.9"
2119
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2120
+
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
2121
+
dependencies = [
2122
+
"bitflags 2.9.1",
2123
+
"libc",
2124
+
]
2125
+
2126
+
[[package]]
1934
2127
name = "libsqlite3-sys"
1935
2128
version = "0.30.1"
1936
2129
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1997
2190
]
1998
2191
1999
2192
[[package]]
2193
+
name = "lru-slab"
2194
+
version = "0.1.2"
2195
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2196
+
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
2197
+
2198
+
[[package]]
2000
2199
name = "matchers"
2001
2200
version = "0.1.0"
2002
2201
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2053
2252
source = "registry+https://github.com/rust-lang/crates.io-index"
2054
2253
checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034"
2055
2254
dependencies = [
2056
-
"base64",
2255
+
"base64 0.22.1",
2057
2256
"http-body-util",
2058
2257
"hyper",
2059
2258
"hyper-rustls",
···
2079
2278
"hashbrown 0.15.4",
2080
2279
"metrics 0.24.2",
2081
2280
"quanta",
2082
-
"rand 0.9.1",
2281
+
"rand 0.9.2",
2083
2282
"rand_xoshiro",
2084
2283
"sketches-ddsketch",
2085
2284
]
···
2147
2346
"bytes",
2148
2347
"encoding_rs",
2149
2348
"futures-util",
2150
-
"http",
2349
+
"http 1.3.1",
2151
2350
"httparse",
2152
2351
"memchr",
2153
2352
"mime",
···
2262
2461
]
2263
2462
2264
2463
[[package]]
2265
-
name = "native-tls"
2266
-
version = "0.2.14"
2267
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2268
-
checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
2269
-
dependencies = [
2270
-
"libc",
2271
-
"log",
2272
-
"openssl",
2273
-
"openssl-probe",
2274
-
"openssl-sys",
2275
-
"schannel",
2276
-
"security-framework 2.11.1",
2277
-
"security-framework-sys",
2278
-
"tempfile",
2279
-
]
2280
-
2281
-
[[package]]
2282
2464
name = "nom"
2283
2465
version = "7.1.3"
2284
2466
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2305
2487
dependencies = [
2306
2488
"overload",
2307
2489
"winapi",
2490
+
]
2491
+
2492
+
[[package]]
2493
+
name = "num-bigint"
2494
+
version = "0.4.6"
2495
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2496
+
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
2497
+
dependencies = [
2498
+
"num-integer",
2499
+
"num-traits",
2308
2500
]
2309
2501
2310
2502
[[package]]
···
2400
2592
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
2401
2593
2402
2594
[[package]]
2403
-
name = "openssl"
2404
-
version = "0.10.73"
2405
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2406
-
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
2407
-
dependencies = [
2408
-
"bitflags 2.9.1",
2409
-
"cfg-if",
2410
-
"foreign-types",
2411
-
"libc",
2412
-
"once_cell",
2413
-
"openssl-macros",
2414
-
"openssl-sys",
2415
-
]
2416
-
2417
-
[[package]]
2418
-
name = "openssl-macros"
2419
-
version = "0.1.1"
2420
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2421
-
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
2422
-
dependencies = [
2423
-
"proc-macro2",
2424
-
"quote",
2425
-
"syn 2.0.104",
2426
-
]
2427
-
2428
-
[[package]]
2429
2595
name = "openssl-probe"
2430
2596
version = "0.1.6"
2431
2597
source = "registry+https://github.com/rust-lang/crates.io-index"
2432
2598
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
2433
2599
2434
2600
[[package]]
2435
-
name = "openssl-sys"
2436
-
version = "0.9.109"
2601
+
name = "option-ext"
2602
+
version = "0.2.0"
2437
2603
source = "registry+https://github.com/rust-lang/crates.io-index"
2438
-
checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
2439
-
dependencies = [
2440
-
"cc",
2441
-
"libc",
2442
-
"pkg-config",
2443
-
"vcpkg",
2444
-
]
2604
+
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
2445
2605
2446
2606
[[package]]
2447
2607
name = "overload"
···
2494
2654
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
2495
2655
2496
2656
[[package]]
2497
-
name = "pin-project"
2498
-
version = "1.1.10"
2499
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2500
-
checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
2501
-
dependencies = [
2502
-
"pin-project-internal",
2503
-
]
2504
-
2505
-
[[package]]
2506
-
name = "pin-project-internal"
2507
-
version = "1.1.10"
2508
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2509
-
checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
2510
-
dependencies = [
2511
-
"proc-macro2",
2512
-
"quote",
2513
-
"syn 2.0.104",
2514
-
]
2515
-
2516
-
[[package]]
2517
2657
name = "pin-project-lite"
2518
2658
version = "0.2.16"
2519
2659
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2584
2724
2585
2725
[[package]]
2586
2726
name = "prettyplease"
2587
-
version = "0.2.35"
2727
+
version = "0.2.36"
2588
2728
source = "registry+https://github.com/rust-lang/crates.io-index"
2589
-
checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a"
2729
+
checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2"
2590
2730
dependencies = [
2591
2731
"proc-macro2",
2592
2732
"syn 2.0.104",
···
2669
2809
]
2670
2810
2671
2811
[[package]]
2812
+
name = "quinn"
2813
+
version = "0.11.8"
2814
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2815
+
checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8"
2816
+
dependencies = [
2817
+
"bytes",
2818
+
"cfg_aliases",
2819
+
"pin-project-lite",
2820
+
"quinn-proto",
2821
+
"quinn-udp",
2822
+
"rustc-hash 2.1.1",
2823
+
"rustls 0.23.31",
2824
+
"socket2 0.5.10",
2825
+
"thiserror 2.0.12",
2826
+
"tokio",
2827
+
"tracing",
2828
+
"web-time",
2829
+
]
2830
+
2831
+
[[package]]
2832
+
name = "quinn-proto"
2833
+
version = "0.11.12"
2834
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2835
+
checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e"
2836
+
dependencies = [
2837
+
"bytes",
2838
+
"getrandom 0.3.3",
2839
+
"lru-slab",
2840
+
"rand 0.9.2",
2841
+
"ring",
2842
+
"rustc-hash 2.1.1",
2843
+
"rustls 0.23.31",
2844
+
"rustls-pki-types",
2845
+
"slab",
2846
+
"thiserror 2.0.12",
2847
+
"tinyvec",
2848
+
"tracing",
2849
+
"web-time",
2850
+
]
2851
+
2852
+
[[package]]
2853
+
name = "quinn-udp"
2854
+
version = "0.5.13"
2855
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2856
+
checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970"
2857
+
dependencies = [
2858
+
"cfg_aliases",
2859
+
"libc",
2860
+
"once_cell",
2861
+
"socket2 0.5.10",
2862
+
"tracing",
2863
+
"windows-sys 0.59.0",
2864
+
]
2865
+
2866
+
[[package]]
2672
2867
name = "quote"
2673
2868
version = "1.0.40"
2674
2869
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2696
2891
2697
2892
[[package]]
2698
2893
name = "rand"
2699
-
version = "0.9.1"
2894
+
version = "0.9.2"
2700
2895
source = "registry+https://github.com/rust-lang/crates.io-index"
2701
-
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
2896
+
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
2702
2897
dependencies = [
2703
2898
"rand_chacha 0.9.0",
2704
2899
"rand_core 0.9.3",
···
2762
2957
2763
2958
[[package]]
2764
2959
name = "redis"
2765
-
version = "0.24.0"
2960
+
version = "0.32.4"
2766
2961
source = "registry+https://github.com/rust-lang/crates.io-index"
2767
-
checksum = "c580d9cbbe1d1b479e8d67cf9daf6a62c957e6846048408b80b43ac3f6af84cd"
2962
+
checksum = "e1f66bf4cac9733a23bcdf1e0e01effbaaad208567beba68be8f67e5f4af3ee1"
2768
2963
dependencies = [
2769
2964
"arc-swap",
2770
-
"async-trait",
2965
+
"backon",
2771
2966
"bytes",
2967
+
"cfg-if",
2772
2968
"combine",
2773
-
"futures",
2969
+
"futures-channel",
2774
2970
"futures-util",
2775
2971
"itoa",
2972
+
"num-bigint",
2776
2973
"percent-encoding",
2777
2974
"pin-project-lite",
2778
2975
"ryu",
2779
2976
"sha1_smol",
2780
-
"socket2 0.4.10",
2977
+
"socket2 0.6.0",
2781
2978
"tokio",
2782
-
"tokio-retry",
2783
2979
"tokio-util",
2784
2980
"url",
2785
2981
]
2786
2982
2787
2983
[[package]]
2788
2984
name = "redox_syscall"
2789
-
version = "0.5.13"
2985
+
version = "0.5.17"
2790
2986
source = "registry+https://github.com/rust-lang/crates.io-index"
2791
-
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
2987
+
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
2792
2988
dependencies = [
2793
2989
"bitflags 2.9.1",
2794
2990
]
2795
2991
2796
2992
[[package]]
2993
+
name = "redox_users"
2994
+
version = "0.4.6"
2995
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2996
+
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
2997
+
dependencies = [
2998
+
"getrandom 0.2.16",
2999
+
"libredox",
3000
+
"thiserror 1.0.69",
3001
+
]
3002
+
3003
+
[[package]]
2797
3004
name = "regex"
2798
3005
version = "1.11.1"
2799
3006
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2843
3050
source = "registry+https://github.com/rust-lang/crates.io-index"
2844
3051
checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531"
2845
3052
dependencies = [
2846
-
"base64",
3053
+
"async-compression",
3054
+
"base64 0.22.1",
2847
3055
"bytes",
2848
-
"encoding_rs",
2849
3056
"futures-core",
2850
-
"h2",
2851
-
"http",
3057
+
"futures-util",
3058
+
"http 1.3.1",
2852
3059
"http-body",
2853
3060
"http-body-util",
2854
3061
"hyper",
2855
3062
"hyper-rustls",
2856
-
"hyper-tls",
2857
3063
"hyper-util",
2858
3064
"js-sys",
2859
3065
"log",
2860
-
"mime",
2861
-
"native-tls",
2862
3066
"percent-encoding",
2863
3067
"pin-project-lite",
3068
+
"quinn",
3069
+
"rustls 0.23.31",
2864
3070
"rustls-pki-types",
2865
3071
"serde",
2866
3072
"serde_json",
2867
3073
"serde_urlencoded",
2868
3074
"sync_wrapper",
2869
3075
"tokio",
2870
-
"tokio-native-tls",
3076
+
"tokio-rustls 0.26.2",
3077
+
"tokio-util",
2871
3078
"tower",
2872
3079
"tower-http",
2873
3080
"tower-service",
2874
3081
"url",
2875
3082
"wasm-bindgen",
2876
3083
"wasm-bindgen-futures",
3084
+
"wasm-streams",
2877
3085
"web-sys",
3086
+
"webpki-roots 1.0.2",
3087
+
]
3088
+
3089
+
[[package]]
3090
+
name = "rfc6979"
3091
+
version = "0.4.0"
3092
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3093
+
checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
3094
+
dependencies = [
3095
+
"hmac",
3096
+
"subtle",
2878
3097
]
2879
3098
2880
3099
[[package]]
···
2903
3122
[[package]]
2904
3123
name = "rocketman"
2905
3124
version = "0.2.3"
3125
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3126
+
checksum = "9928fe43979c19ff1f46f7920c30b76dfcead7a4d571c9836c4d02da8587f844"
2906
3127
dependencies = [
2907
3128
"anyhow",
2908
3129
"async-trait",
···
2910
3131
"derive_builder",
2911
3132
"flume",
2912
3133
"futures-util",
2913
-
"metrics 0.23.1",
3134
+
"metrics 0.24.2",
2914
3135
"rand 0.8.5",
2915
3136
"serde",
2916
3137
"serde_json",
2917
3138
"tokio",
2918
-
"tokio-tungstenite",
3139
+
"tokio-tungstenite 0.20.1",
2919
3140
"tracing",
2920
3141
"tracing-subscriber",
2921
3142
"url",
···
2944
3165
2945
3166
[[package]]
2946
3167
name = "rustc-demangle"
2947
-
version = "0.1.25"
3168
+
version = "0.1.26"
2948
3169
source = "registry+https://github.com/rust-lang/crates.io-index"
2949
-
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
3170
+
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
2950
3171
2951
3172
[[package]]
2952
3173
name = "rustc-hash"
2953
3174
version = "1.1.0"
2954
3175
source = "registry+https://github.com/rust-lang/crates.io-index"
2955
3176
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
3177
+
3178
+
[[package]]
3179
+
name = "rustc-hash"
3180
+
version = "2.1.1"
3181
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3182
+
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
2956
3183
2957
3184
[[package]]
2958
3185
name = "rustc_version"
···
2991
3218
2992
3219
[[package]]
2993
3220
name = "rustls"
2994
-
version = "0.23.29"
3221
+
version = "0.21.12"
3222
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3223
+
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
3224
+
dependencies = [
3225
+
"log",
3226
+
"ring",
3227
+
"rustls-webpki 0.101.7",
3228
+
"sct",
3229
+
]
3230
+
3231
+
[[package]]
3232
+
name = "rustls"
3233
+
version = "0.23.31"
2995
3234
source = "registry+https://github.com/rust-lang/crates.io-index"
2996
-
checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1"
3235
+
checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc"
2997
3236
dependencies = [
2998
3237
"aws-lc-rs",
2999
3238
"once_cell",
3239
+
"ring",
3000
3240
"rustls-pki-types",
3001
-
"rustls-webpki",
3241
+
"rustls-webpki 0.103.4",
3002
3242
"subtle",
3003
3243
"zeroize",
3004
3244
]
3005
3245
3006
3246
[[package]]
3007
3247
name = "rustls-native-certs"
3248
+
version = "0.6.3"
3249
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3250
+
checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00"
3251
+
dependencies = [
3252
+
"openssl-probe",
3253
+
"rustls-pemfile",
3254
+
"schannel",
3255
+
"security-framework 2.11.1",
3256
+
]
3257
+
3258
+
[[package]]
3259
+
name = "rustls-native-certs"
3008
3260
version = "0.8.1"
3009
3261
source = "registry+https://github.com/rust-lang/crates.io-index"
3010
3262
checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3"
···
3016
3268
]
3017
3269
3018
3270
[[package]]
3271
+
name = "rustls-pemfile"
3272
+
version = "1.0.4"
3273
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3274
+
checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
3275
+
dependencies = [
3276
+
"base64 0.21.7",
3277
+
]
3278
+
3279
+
[[package]]
3019
3280
name = "rustls-pki-types"
3020
3281
version = "1.12.0"
3021
3282
source = "registry+https://github.com/rust-lang/crates.io-index"
3022
3283
checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
3023
3284
dependencies = [
3285
+
"web-time",
3024
3286
"zeroize",
3287
+
]
3288
+
3289
+
[[package]]
3290
+
name = "rustls-webpki"
3291
+
version = "0.101.7"
3292
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3293
+
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
3294
+
dependencies = [
3295
+
"ring",
3296
+
"untrusted",
3025
3297
]
3026
3298
3027
3299
[[package]]
···
3070
3342
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
3071
3343
3072
3344
[[package]]
3345
+
name = "sct"
3346
+
version = "0.7.1"
3347
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3348
+
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
3349
+
dependencies = [
3350
+
"ring",
3351
+
"untrusted",
3352
+
]
3353
+
3354
+
[[package]]
3355
+
name = "sec1"
3356
+
version = "0.7.3"
3357
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3358
+
checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
3359
+
dependencies = [
3360
+
"base16ct",
3361
+
"der",
3362
+
"generic-array",
3363
+
"pkcs8",
3364
+
"subtle",
3365
+
"zeroize",
3366
+
]
3367
+
3368
+
[[package]]
3073
3369
name = "security-framework"
3074
3370
version = "2.11.1"
3075
3371
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3169
3465
]
3170
3466
3171
3467
[[package]]
3468
+
name = "serde_ipld_dagjson"
3469
+
version = "0.2.0"
3470
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3471
+
checksum = "3359b47ba7f4a306ef5984665e10539e212e97217afa489437d533208eecda36"
3472
+
dependencies = [
3473
+
"ipld-core",
3474
+
"serde",
3475
+
"serde_json",
3476
+
]
3477
+
3478
+
[[package]]
3172
3479
name = "serde_json"
3173
-
version = "1.0.141"
3480
+
version = "1.0.142"
3174
3481
source = "registry+https://github.com/rust-lang/crates.io-index"
3175
-
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
3482
+
checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7"
3176
3483
dependencies = [
3177
3484
"itoa",
3178
3485
"memchr",
···
3256
3563
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
3257
3564
3258
3565
[[package]]
3566
+
name = "signal-hook-registry"
3567
+
version = "1.4.5"
3568
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3569
+
checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
3570
+
dependencies = [
3571
+
"libc",
3572
+
]
3573
+
3574
+
[[package]]
3259
3575
name = "signature"
3260
3576
version = "2.2.0"
3261
3577
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3288
3604
3289
3605
[[package]]
3290
3606
name = "socket2"
3291
-
version = "0.4.10"
3607
+
version = "0.5.10"
3292
3608
source = "registry+https://github.com/rust-lang/crates.io-index"
3293
-
checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
3609
+
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
3294
3610
dependencies = [
3295
3611
"libc",
3296
-
"winapi",
3612
+
"windows-sys 0.52.0",
3297
3613
]
3298
3614
3299
3615
[[package]]
3300
3616
name = "socket2"
3301
-
version = "0.5.10"
3617
+
version = "0.6.0"
3302
3618
source = "registry+https://github.com/rust-lang/crates.io-index"
3303
-
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
3619
+
checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
3304
3620
dependencies = [
3305
3621
"libc",
3306
-
"windows-sys 0.52.0",
3622
+
"windows-sys 0.59.0",
3307
3623
]
3308
3624
3309
3625
[[package]]
···
3344
3660
source = "registry+https://github.com/rust-lang/crates.io-index"
3345
3661
checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6"
3346
3662
dependencies = [
3347
-
"base64",
3663
+
"base64 0.22.1",
3348
3664
"bytes",
3349
3665
"crc",
3350
3666
"crossbeam-queue",
···
3361
3677
"memchr",
3362
3678
"once_cell",
3363
3679
"percent-encoding",
3680
+
"rustls 0.23.31",
3364
3681
"serde",
3365
3682
"serde_json",
3366
3683
"sha2",
···
3372
3689
"tracing",
3373
3690
"url",
3374
3691
"uuid",
3692
+
"webpki-roots 0.26.11",
3375
3693
]
3376
3694
3377
3695
[[package]]
···
3419
3737
checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526"
3420
3738
dependencies = [
3421
3739
"atoi",
3422
-
"base64",
3740
+
"base64 0.22.1",
3423
3741
"bitflags 2.9.1",
3424
3742
"byteorder",
3425
3743
"bytes",
···
3463
3781
checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46"
3464
3782
dependencies = [
3465
3783
"atoi",
3466
-
"base64",
3784
+
"base64 0.22.1",
3467
3785
"bitflags 2.9.1",
3468
3786
"byteorder",
3469
3787
"crc",
···
3641
3959
]
3642
3960
3643
3961
[[package]]
3644
-
name = "system-configuration"
3645
-
version = "0.6.1"
3962
+
name = "tagptr"
3963
+
version = "0.2.0"
3646
3964
source = "registry+https://github.com/rust-lang/crates.io-index"
3647
-
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
3648
-
dependencies = [
3649
-
"bitflags 2.9.1",
3650
-
"core-foundation 0.9.4",
3651
-
"system-configuration-sys",
3652
-
]
3965
+
checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
3653
3966
3654
3967
[[package]]
3655
-
name = "system-configuration-sys"
3656
-
version = "0.6.0"
3657
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3658
-
checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
3968
+
name = "teal-cli"
3969
+
version = "0.1.0"
3659
3970
dependencies = [
3660
-
"core-foundation-sys",
3661
-
"libc",
3971
+
"anyhow",
3972
+
"chrono",
3973
+
"clap",
3974
+
"colored",
3975
+
"dirs",
3976
+
"hex",
3977
+
"k256",
3978
+
"multibase",
3979
+
"rand 0.8.5",
3980
+
"serde",
3981
+
"serde_json",
3982
+
"tempfile",
3983
+
"tokio",
3662
3984
]
3663
-
3664
-
[[package]]
3665
-
name = "tagptr"
3666
-
version = "0.2.0"
3667
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3668
-
checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
3669
3985
3670
3986
[[package]]
3671
3987
name = "tempfile"
···
3789
4105
3790
4106
[[package]]
3791
4107
name = "tokio"
3792
-
version = "1.46.1"
4108
+
version = "1.47.1"
3793
4109
source = "registry+https://github.com/rust-lang/crates.io-index"
3794
-
checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
4110
+
checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
3795
4111
dependencies = [
3796
4112
"backtrace",
3797
4113
"bytes",
3798
4114
"io-uring",
3799
4115
"libc",
3800
4116
"mio",
4117
+
"parking_lot",
3801
4118
"pin-project-lite",
4119
+
"signal-hook-registry",
3802
4120
"slab",
3803
-
"socket2 0.5.10",
4121
+
"socket2 0.6.0",
3804
4122
"tokio-macros",
3805
-
"windows-sys 0.52.0",
4123
+
"windows-sys 0.59.0",
3806
4124
]
3807
4125
3808
4126
[[package]]
···
3817
4135
]
3818
4136
3819
4137
[[package]]
3820
-
name = "tokio-native-tls"
3821
-
version = "0.3.1"
3822
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3823
-
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
3824
-
dependencies = [
3825
-
"native-tls",
3826
-
"tokio",
3827
-
]
3828
-
3829
-
[[package]]
3830
-
name = "tokio-retry"
3831
-
version = "0.3.0"
4138
+
name = "tokio-rustls"
4139
+
version = "0.24.1"
3832
4140
source = "registry+https://github.com/rust-lang/crates.io-index"
3833
-
checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f"
4141
+
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
3834
4142
dependencies = [
3835
-
"pin-project",
3836
-
"rand 0.8.5",
4143
+
"rustls 0.21.12",
3837
4144
"tokio",
3838
4145
]
3839
4146
···
3843
4150
source = "registry+https://github.com/rust-lang/crates.io-index"
3844
4151
checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
3845
4152
dependencies = [
3846
-
"rustls",
4153
+
"rustls 0.23.31",
3847
4154
"tokio",
3848
4155
]
3849
4156
···
3860
4167
3861
4168
[[package]]
3862
4169
name = "tokio-tungstenite"
4170
+
version = "0.20.1"
4171
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4172
+
checksum = "212d5dcb2a1ce06d81107c3d0ffa3121fe974b73f068c8282cb1c32328113b6c"
4173
+
dependencies = [
4174
+
"futures-util",
4175
+
"log",
4176
+
"rustls 0.21.12",
4177
+
"rustls-native-certs 0.6.3",
4178
+
"tokio",
4179
+
"tokio-rustls 0.24.1",
4180
+
"tungstenite 0.20.1",
4181
+
"webpki-roots 0.25.4",
4182
+
]
4183
+
4184
+
[[package]]
4185
+
name = "tokio-tungstenite"
3863
4186
version = "0.24.0"
3864
4187
source = "registry+https://github.com/rust-lang/crates.io-index"
3865
4188
checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
3866
4189
dependencies = [
3867
4190
"futures-util",
3868
4191
"log",
4192
+
"rustls 0.23.31",
4193
+
"rustls-pki-types",
3869
4194
"tokio",
3870
-
"tungstenite",
4195
+
"tokio-rustls 0.26.2",
4196
+
"tungstenite 0.24.0",
4197
+
"webpki-roots 0.26.11",
3871
4198
]
3872
4199
3873
4200
[[package]]
···
3934
4261
"bitflags 2.9.1",
3935
4262
"bytes",
3936
4263
"futures-util",
3937
-
"http",
4264
+
"http 1.3.1",
3938
4265
"http-body",
3939
4266
"iri-string",
3940
4267
"pin-project-lite",
···
4036
4363
4037
4364
[[package]]
4038
4365
name = "tungstenite"
4366
+
version = "0.20.1"
4367
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4368
+
checksum = "9e3dac10fd62eaf6617d3a904ae222845979aec67c615d1c842b4002c7666fb9"
4369
+
dependencies = [
4370
+
"byteorder",
4371
+
"bytes",
4372
+
"data-encoding",
4373
+
"http 0.2.12",
4374
+
"httparse",
4375
+
"log",
4376
+
"rand 0.8.5",
4377
+
"rustls 0.21.12",
4378
+
"sha1",
4379
+
"thiserror 1.0.69",
4380
+
"url",
4381
+
"utf-8",
4382
+
]
4383
+
4384
+
[[package]]
4385
+
name = "tungstenite"
4039
4386
version = "0.24.0"
4040
4387
source = "registry+https://github.com/rust-lang/crates.io-index"
4041
4388
checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
···
4043
4390
"byteorder",
4044
4391
"bytes",
4045
4392
"data-encoding",
4046
-
"http",
4393
+
"http 1.3.1",
4047
4394
"httparse",
4048
4395
"log",
4049
4396
"rand 0.8.5",
4397
+
"rustls 0.23.31",
4398
+
"rustls-pki-types",
4050
4399
"sha1",
4051
4400
"thiserror 1.0.69",
4052
4401
"utf-8",
···
4065
4414
"atrium-api",
4066
4415
"atrium-xrpc",
4067
4416
"chrono",
4068
-
"http",
4417
+
"http 1.3.1",
4069
4418
"ipld-core",
4070
4419
"langtag",
4071
4420
"regex",
···
4074
4423
"serde_ipld_dagcbor",
4075
4424
"serde_json",
4076
4425
"thiserror 2.0.12",
4077
-
"uuid",
4078
4426
]
4079
4427
4080
4428
[[package]]
···
4331
4679
]
4332
4680
4333
4681
[[package]]
4682
+
name = "wasm-streams"
4683
+
version = "0.4.2"
4684
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4685
+
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
4686
+
dependencies = [
4687
+
"futures-util",
4688
+
"js-sys",
4689
+
"wasm-bindgen",
4690
+
"wasm-bindgen-futures",
4691
+
"web-sys",
4692
+
]
4693
+
4694
+
[[package]]
4334
4695
name = "web-sys"
4335
4696
version = "0.3.77"
4336
4697
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4348
4709
dependencies = [
4349
4710
"js-sys",
4350
4711
"wasm-bindgen",
4712
+
]
4713
+
4714
+
[[package]]
4715
+
name = "webpki-roots"
4716
+
version = "0.25.4"
4717
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4718
+
checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1"
4719
+
4720
+
[[package]]
4721
+
name = "webpki-roots"
4722
+
version = "0.26.11"
4723
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4724
+
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
4725
+
dependencies = [
4726
+
"webpki-roots 1.0.2",
4727
+
]
4728
+
4729
+
[[package]]
4730
+
name = "webpki-roots"
4731
+
version = "1.0.2"
4732
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4733
+
checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2"
4734
+
dependencies = [
4735
+
"rustls-pki-types",
4351
4736
]
4352
4737
4353
4738
[[package]]
···
4523
4908
]
4524
4909
4525
4910
[[package]]
4526
-
name = "windows-registry"
4527
-
version = "0.5.3"
4528
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4529
-
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
4530
-
dependencies = [
4531
-
"windows-link",
4532
-
"windows-result 0.3.4",
4533
-
"windows-strings",
4534
-
]
4535
-
4536
-
[[package]]
4537
4911
name = "windows-result"
4538
4912
version = "0.1.2"
4539
4913
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4593
4967
source = "registry+https://github.com/rust-lang/crates.io-index"
4594
4968
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
4595
4969
dependencies = [
4596
-
"windows-targets 0.53.2",
4970
+
"windows-targets 0.53.3",
4597
4971
]
4598
4972
4599
4973
[[package]]
···
4629
5003
4630
5004
[[package]]
4631
5005
name = "windows-targets"
4632
-
version = "0.53.2"
5006
+
version = "0.53.3"
4633
5007
source = "registry+https://github.com/rust-lang/crates.io-index"
4634
-
checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
5008
+
checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
4635
5009
dependencies = [
5010
+
"windows-link",
4636
5011
"windows_aarch64_gnullvm 0.53.0",
4637
5012
"windows_aarch64_msvc 0.53.0",
4638
5013
"windows_i686_gnu 0.53.0",
+30
-8
Cargo.toml
+30
-8
Cargo.toml
···
1
1
[workspace]
2
-
members = ["apps/aqua", "services/cadet", "services/rocketman"]
2
+
members = ["apps/aqua", "services/cadet", "tools/teal-cli"]
3
+
default-members = ["services/types"]
3
4
resolver = "2"
4
5
5
6
[workspace.dependencies]
6
7
# Shared dependencies
7
-
tokio = { version = "1.0", features = ["rt-multi-thread", "macros"] }
8
+
tokio = { version = "1.0", features = [
9
+
"rt-multi-thread",
10
+
"macros",
11
+
"time",
12
+
"net",
13
+
"sync",
14
+
] }
8
15
axum = { version = "0.8", features = ["macros"] }
9
16
tower-http = { version = "0.6", features = ["cors"] }
10
-
sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "uuid"] }
17
+
sqlx = { version = "0.8", features = [
18
+
"runtime-tokio",
19
+
"postgres",
20
+
"uuid",
21
+
"tls-rustls",
22
+
] }
11
23
serde = { version = "1.0", features = ["derive"] }
12
24
anyhow = "1.0"
13
25
serde_json = "1.0"
14
26
tracing = "0.1"
15
27
tracing-subscriber = "0.3"
16
28
metrics = "0.23"
17
-
reqwest = { version = "0.12", features = ["json"] }
29
+
reqwest = { version = "0.12", default-features = false, features = [
30
+
"json",
31
+
"rustls-tls",
32
+
"stream",
33
+
"gzip",
34
+
] }
18
35
url = "2.5"
19
36
rand = "0.8"
20
37
flume = "0.11"
21
38
async-trait = "0.1"
22
39
time = "0.3"
23
40
dotenvy = "0.15"
24
-
tokio-tungstenite = "0.24"
41
+
tokio-tungstenite = { version = "*", default-features = false, features = [
42
+
"rustls-tls-webpki-roots",
43
+
"connect",
44
+
"handshake",
45
+
] }
25
46
atrium-api = "0.25"
26
47
chrono = "0.4"
27
48
uuid = { version = "1.0", features = ["v4", "serde"] }
28
49
types = { path = "services/types" }
29
-
rocketman = { path = "services/rocketman" }
50
+
rocketman = "0.2.3"
30
51
31
52
# CAR and IPLD dependencies
32
-
iroh-car = "0.4"
53
+
iroh-car = "0.5"
33
54
libipld = { version = "0.16", features = ["dag-cbor", "dag-json"] }
34
55
cid = "0.11"
35
56
base64 = "0.22"
57
+
atmst = "0.0.1"
36
58
37
59
# Redis for job queues and caching
38
-
redis = { version = "0.24", features = ["tokio-comp", "connection-manager"] }
60
+
redis = { version = "0.32", features = ["tokio-comp", "connection-manager"] }
+18
Cross.toml
+18
Cross.toml
···
1
+
[build.env]
2
+
passthrough = [
3
+
"CARGO_HOME",
4
+
"CARGO_TARGET_DIR",
5
+
"SQLX_OFFLINE",
6
+
"PKG_CONFIG_ALLOW_CROSS",
7
+
]
8
+
9
+
[target.aarch64-unknown-linux-gnu]
10
+
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main"
11
+
12
+
[target.aarch64-unknown-linux-gnu.env]
13
+
passthrough = [
14
+
"CARGO_HOME",
15
+
"CARGO_TARGET_DIR",
16
+
"SQLX_OFFLINE",
17
+
"PKG_CONFIG_ALLOW_CROSS",
18
+
]
+14
-9
README.md
+14
-9
README.md
···
29
29
# Install all dependencies (Node.js and Rust)
30
30
pnpm install
31
31
32
+
# clone submodules
33
+
git submodule update --init --recursive
34
+
32
35
# Set up environment configuration
33
36
cp apps/aqua/.env.example apps/aqua/.env
34
37
···
90
93
- **Format**: `YYYYMMDDHHMMSS_description.sql` (timestamped SQL files)
91
94
- **Type**: Forward-only SQL migrations managed by SQLx
92
95
93
-
#### Database Schema
94
-
95
-
The database includes tables for:
96
-
- **Music data**: `artists`, `releases`, `recordings`, `plays`
97
-
- **User data**: `profiles`, `statii` (status records), `featured_items`
98
-
- **CAR imports**: `car_import_requests`, `car_blocks`, `car_extracted_records`
99
-
- **Analytics**: Materialized views for play counts and top charts
100
-
101
96
## Development
102
97
103
98
To start the development server run:
···
106
101
turbo dev --filter=@teal/aqua
107
102
```
108
103
109
-
Open http://localhost:3000/ with your browser to see the home page. You will need to login with Bluesky to test the posting functionality of the app. Note: if the redirect back to the app after you login isn't working correctly, you may need to replace the `127.0.0.1` with `localhost`.
104
+
Open http://localhost:3000/ with your browser to see the home page. Note: if the redirect back to the app after you login isn't working correctly, you may need to replace the `127.0.0.1` with `localhost`, or you may need to set up a publicly accessible endpoint for the app to post to (see below).
110
105
111
106
### Running the full stack in docker for development
112
107
···
153
148
154
149
# Show lexicon change impact
155
150
pnpm lex:diff
151
+
```
152
+
153
+
# Updating Vendored Lexicons
154
+
To update vendored lexicons (anything that's not under fm.teal), follow these steps:
155
+
```bash
156
+
cd vendor/atproto
157
+
git pull origin main
158
+
cd ../..
159
+
git add vendor/atproto
160
+
git commit -m "Update atproto lexicons to latest"
156
161
```
157
162
158
163
See [`tools/lexicon-cli/README.md`](tools/lexicon-cli/README.md) for detailed documentation.
+24
-14
apps/amethyst/Dockerfile
+24
-14
apps/amethyst/Dockerfile
···
18
18
COPY packages/lexicons/ ./packages/lexicons/
19
19
COPY packages/tsconfig/ ./packages/tsconfig/
20
20
21
+
# Copy lexicons source data
22
+
COPY lexicons/ ./lexicons/
23
+
21
24
# Copy the aqua app
22
25
COPY apps/amethyst/ ./apps/amethyst/
23
26
24
27
# Copy .env
25
28
COPY ../../.env ./apps/amethyst/.env
26
29
27
-
# Build the aqua app
30
+
# Install dependencies and generate lexicons
31
+
RUN cd tools/lexicon-cli && pnpm build
32
+
33
+
# Generate lexicons before building amethyst
34
+
RUN pnpm lex:gen-server
35
+
36
+
RUN pnpm install
37
+
38
+
# Build the amethyst app
28
39
WORKDIR /app/apps/amethyst
29
-
RUN pnpm install
30
40
RUN pnpm run build:web
31
41
32
42
#create the client-json
33
43
RUN echo '{ \
34
-
"redirect_uris": ["https://'"${CLIENT_ADDRESS}"'/auth/callback"], \
35
-
"response_types": ["code"], \
36
-
"grant_types": ["authorization_code", "refresh_token"], \
37
-
"scope": "atproto transition:generic", \
38
-
"token_endpoint_auth_method": "none", \
39
-
"application_type": "web", \
40
-
"client_id": "https://'"${CLIENT_ADDRESS}"'/client-metadata.json", \
41
-
"client_name": "teal", \
42
-
"client_uri": "https://'"${CLIENT_ADDRESS}"'", \
43
-
"dpop_bound_access_tokens": true \
44
-
}' > /app/client-metadata.json
44
+
"redirect_uris": ["https://'"${CLIENT_ADDRESS}"'/auth/callback"], \
45
+
"response_types": ["code"], \
46
+
"grant_types": ["authorization_code", "refresh_token"], \
47
+
"scope": "atproto transition:generic", \
48
+
"token_endpoint_auth_method": "none", \
49
+
"application_type": "web", \
50
+
"client_id": "https://'"${CLIENT_ADDRESS}"'/client-metadata.json", \
51
+
"client_name": "teal", \
52
+
"client_uri": "https://'"${CLIENT_ADDRESS}"'", \
53
+
"dpop_bound_access_tokens": true \
54
+
}' > /app/client-metadata.json
45
55
46
56
47
57
FROM caddy:2.1.0-alpine AS caddy
···
50
60
EXPOSE 443/udp
51
61
COPY /apps/amethyst/Caddyfile /etc/caddy/Caddyfile
52
62
COPY --from=builder /app/apps/amethyst/build /srv
53
-
COPY --from=builder /app/client-metadata.json /srv/client-metadata.json
63
+
COPY --from=builder /app/client-metadata.json /srv/client-metadata.json
+2
-2
apps/aqua/Cargo.toml
+2
-2
apps/aqua/Cargo.toml
···
19
19
tracing-subscriber.workspace = true
20
20
sqlx = { workspace = true, features = ["time"] }
21
21
dotenvy.workspace = true
22
-
23
22
types.workspace = true
24
-
chrono = "0.4.41"
23
+
chrono.workspace = true
25
24
26
25
# CAR import functionality
27
26
iroh-car.workspace = true
···
29
28
reqwest.workspace = true
30
29
url.workspace = true
31
30
clap = { version = "4.0", features = ["derive"] }
31
+
atmst.workspace = true
32
32
33
33
# Redis for job queues
34
34
redis.workspace = true
+20
apps/aqua/Cross.toml
+20
apps/aqua/Cross.toml
···
1
+
[build.env]
2
+
passthrough = [
3
+
"CARGO_HOME",
4
+
"CARGO_TARGET_DIR",
5
+
"SQLX_OFFLINE",
6
+
"PKG_CONFIG_ALLOW_CROSS",
7
+
]
8
+
9
+
[target.aarch64-unknown-linux-gnu]
10
+
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main"
11
+
12
+
[target.aarch64-unknown-linux-gnu.env]
13
+
passthrough = ["CARGO_HOME", "CARGO_TARGET_DIR", "SQLX_OFFLINE"]
14
+
# Allow cross-compilation of native dependencies
15
+
PKG_CONFIG_ALLOW_CROSS = "1"
16
+
# Use static linking to reduce runtime dependencies
17
+
RUSTFLAGS = "-C target-feature=+crt-static -C link-arg=-s"
18
+
# Disable problematic features that might require OpenSSL
19
+
CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc"
20
+
CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"
+90
apps/aqua/Dockerfile
+90
apps/aqua/Dockerfile
···
1
+
# Docker build args for cross-platform builds (must be at the top)
2
+
ARG TARGETPLATFORM
3
+
ARG BUILDPLATFORM
4
+
ARG TARGETARCH
5
+
ARG TARGETOS
6
+
7
+
FROM --platform=${BUILDPLATFORM} rust:latest AS buildah
8
+
9
+
# Create appuser
10
+
ENV USER=app
11
+
ENV UID=10001
12
+
13
+
RUN adduser \
14
+
--disabled-password \
15
+
--gecos "" \
16
+
--home "/nonexistent" \
17
+
--shell "/sbin/nologin" \
18
+
--no-create-home \
19
+
--uid "${UID}" \
20
+
"${USER}"
21
+
22
+
WORKDIR /buildah
23
+
24
+
# Re-declare ARGs after FROM (Docker requirement)
25
+
ARG TARGETPLATFORM
26
+
ARG BUILDPLATFORM
27
+
ARG TARGETARCH
28
+
ARG TARGETOS
29
+
30
+
# Debug platform detection before copying files
31
+
RUN echo "DEBUG Before copy: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH BUILDPLATFORM=$BUILDPLATFORM"
32
+
33
+
COPY ./ .
34
+
35
+
# Setup lexicons and install dependencies
36
+
RUN ./scripts/setup-lexicons.sh
37
+
38
+
# Install Node.js and pnpm for lexicon generation
39
+
RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/*
40
+
RUN npm install -g pnpm
41
+
42
+
# Install dependencies and generate lexicons
43
+
RUN pnpm install
44
+
RUN cd tools/lexicon-cli && pnpm build
45
+
RUN pnpm lex:gen --rust-only
46
+
47
+
# Install cross-compilation toolchains
48
+
RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu
49
+
50
+
# Enable ARM64 architecture and install cross-compilation tools
51
+
RUN dpkg --add-architecture arm64 && \
52
+
apt-get update && \
53
+
apt-get install -y \
54
+
gcc-aarch64-linux-gnu \
55
+
libssl-dev:arm64 \
56
+
libssl-dev \
57
+
pkg-config \
58
+
&& rm -rf /var/lib/apt/lists/*
59
+
60
+
# Set up cross-compilation environment
61
+
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc
62
+
ENV PKG_CONFIG_ALLOW_CROSS=1
63
+
ENV PKG_CONFIG_PATH_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu/pkgconfig
64
+
ENV OPENSSL_DIR_aarch64_unknown_linux_gnu=/usr
65
+
ENV OPENSSL_LIB_DIR_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu
66
+
ENV OPENSSL_INCLUDE_DIR_aarch64_unknown_linux_gnu=/usr/include/openssl
67
+
68
+
69
+
# Debug platform detection and run build
70
+
RUN . ./target.sh && \
71
+
touch apps/aqua/src/main.rs && \
72
+
echo "Building for $TARGET_ARCH" && \
73
+
cargo build --release --target $RUST_TARGET --package aqua && \
74
+
cp target/$RUST_TARGET/release/aqua target/aqua
75
+
76
+
FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc
77
+
78
+
# Import from builder.
79
+
COPY --from=buildah /etc/passwd /etc/passwd
80
+
COPY --from=buildah /etc/group /etc/group
81
+
82
+
WORKDIR /app
83
+
84
+
# Copy our build
85
+
COPY --from=buildah /buildah/target/aqua ./
86
+
87
+
# Use an unprivileged user.
88
+
USER app:app
89
+
90
+
CMD ["/app/aqua"]
+62
apps/aqua/examples/did_demo.rs
+62
apps/aqua/examples/did_demo.rs
···
1
+
use serde_json::json;
2
+
3
+
/// Generate a DID document for did:web
4
+
fn generate_did_document(host: &str) -> serde_json::Value {
5
+
json!({
6
+
"@context": [
7
+
"https://www.w3.org/ns/did/v1",
8
+
"https://w3id.org/security/multikey/v1",
9
+
"https://w3id.org/security/suites/secp256k1-2019/v1"
10
+
],
11
+
"id": format!("did:web:{}", host),
12
+
"alsoKnownAs": [
13
+
format!("at://{}", host)
14
+
],
15
+
"service": [
16
+
{
17
+
"id": "#bsky_fg",
18
+
"type": "BskyFeedGenerator",
19
+
"serviceEndpoint": format!("https://{}", host)
20
+
},
21
+
{
22
+
"id": "#atproto_pds",
23
+
"type": "AtprotoPersonalDataServer",
24
+
"serviceEndpoint": format!("https://{}", host)
25
+
}
26
+
],
27
+
"verificationMethod": [
28
+
{
29
+
"id": format!("did:web:{}#atproto", host),
30
+
"type": "Multikey",
31
+
"controller": format!("did:web:{}", host),
32
+
"publicKeyMultibase": "z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK"
33
+
}
34
+
]
35
+
})
36
+
}
37
+
38
+
fn main() {
39
+
println!("DID Document Generation Demo");
40
+
println!("===========================\n");
41
+
42
+
let test_hosts = vec![
43
+
"localhost:3000",
44
+
"bsky.social",
45
+
"my-atproto-service.com",
46
+
"example.org:8080",
47
+
];
48
+
49
+
for host in test_hosts {
50
+
println!("DID Document for host: {}", host);
51
+
println!("URL: https://{}/.well-known/did.json", host);
52
+
println!("DID: did:web:{}", host);
53
+
println!();
54
+
55
+
let did_doc = generate_did_document(host);
56
+
println!("{}", serde_json::to_string_pretty(&did_doc).unwrap());
57
+
println!("\n{}\n", "=".repeat(80));
58
+
}
59
+
60
+
println!("The well-known endpoint /.well-known/did.json will serve this JSON structure");
61
+
println!("when accessed via HTTP GET request to your Aqua server.");
62
+
}
+275
-82
apps/aqua/src/api/mod.rs
+275
-82
apps/aqua/src/api/mod.rs
···
1
+
use anyhow::Result;
1
2
use axum::{Extension, Json, extract::Multipart, extract::Path, http::StatusCode};
2
3
use serde::{Deserialize, Serialize};
3
-
use tracing::{info, error};
4
-
use anyhow::Result;
5
-
use uuid;
6
-
7
-
use sys_info;
4
+
use serde_json::{Value, json};
5
+
use tracing::{error, info};
8
6
9
7
use crate::ctx::Context;
10
8
use crate::redis_client::RedisClient;
9
+
use crate::types::CarImportJobStatus;
11
10
12
11
#[derive(Debug, Serialize, Deserialize)]
13
12
pub struct MetaOsInfo {
···
61
60
/// Get CAR import job status
62
61
pub async fn get_car_import_job_status(
63
62
Path(job_id): Path<String>,
64
-
) -> Result<Json<types::jobs::CarImportJobStatus>, (StatusCode, Json<ErrorResponse>)> {
65
-
use types::jobs::queue_keys;
66
-
63
+
) -> Result<Json<CarImportJobStatus>, (StatusCode, Json<ErrorResponse>)> {
64
+
use crate::types::queue_keys;
65
+
67
66
info!("Getting status for job: {}", job_id);
68
-
67
+
69
68
// Parse job ID
70
69
let job_uuid = match uuid::Uuid::parse_str(&job_id) {
71
70
Ok(uuid) => uuid,
···
77
76
return Err((StatusCode::BAD_REQUEST, Json(error_response)));
78
77
}
79
78
};
80
-
79
+
81
80
// Connect to Redis
82
-
let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
81
+
let redis_url =
82
+
std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
83
83
let redis_client = match RedisClient::new(&redis_url) {
84
84
Ok(client) => client,
85
85
Err(e) => {
···
91
91
return Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response)));
92
92
}
93
93
};
94
-
94
+
95
95
// Get job status
96
-
match redis_client.get_job_status(&queue_keys::job_status_key(&job_uuid)).await {
97
-
Ok(Some(status_data)) => {
98
-
match serde_json::from_str::<types::jobs::CarImportJobStatus>(&status_data) {
99
-
Ok(status) => Ok(Json(status)),
100
-
Err(e) => {
101
-
error!("Failed to parse job status: {}", e);
102
-
let error_response = ErrorResponse {
103
-
error: "Failed to parse job status".to_string(),
104
-
details: Some(e.to_string()),
105
-
};
106
-
Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response)))
107
-
}
96
+
match redis_client
97
+
.get_job_status(&queue_keys::job_status_key(&job_uuid))
98
+
.await
99
+
{
100
+
Ok(Some(status_data)) => match serde_json::from_str::<CarImportJobStatus>(&status_data) {
101
+
Ok(status) => Ok(Json(status)),
102
+
Err(e) => {
103
+
error!("Failed to parse job status: {}", e);
104
+
let error_response = ErrorResponse {
105
+
error: "Failed to parse job status".to_string(),
106
+
details: Some(e.to_string()),
107
+
};
108
+
Err((StatusCode::INTERNAL_SERVER_ERROR, Json(error_response)))
108
109
}
109
-
}
110
+
},
110
111
Ok(None) => {
111
112
let error_response = ErrorResponse {
112
113
error: "Job not found".to_string(),
···
165
166
mut multipart: Multipart,
166
167
) -> Result<Json<CarImportResponse>, StatusCode> {
167
168
info!("Received CAR file upload request");
168
-
169
+
169
170
let mut car_data: Option<Vec<u8>> = None;
170
171
let mut import_id: Option<String> = None;
171
172
let mut description: Option<String> = None;
172
-
173
+
173
174
// Process multipart form data
174
-
while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? {
175
+
while let Some(field) = multipart
176
+
.next_field()
177
+
.await
178
+
.map_err(|_| StatusCode::BAD_REQUEST)?
179
+
{
175
180
let name = field.name().unwrap_or("").to_string();
176
-
181
+
177
182
match name.as_str() {
178
183
"car_file" => {
179
184
let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?;
···
192
197
}
193
198
}
194
199
}
195
-
200
+
196
201
let car_bytes = car_data.ok_or(StatusCode::BAD_REQUEST)?;
197
202
let final_import_id = import_id.unwrap_or_else(|| {
198
203
// Generate a unique import ID
199
204
format!("car-import-{}", chrono::Utc::now().timestamp())
200
205
});
201
-
206
+
202
207
// Validate CAR file format
203
208
match validate_car_file(&car_bytes).await {
204
209
Ok(_) => {
205
-
info!("CAR file validation successful for import {}", final_import_id);
210
+
info!(
211
+
"CAR file validation successful for import {}",
212
+
final_import_id
213
+
);
206
214
}
207
215
Err(e) => {
208
216
error!("CAR file validation failed: {}", e);
209
217
return Err(StatusCode::BAD_REQUEST);
210
218
}
211
219
}
212
-
220
+
213
221
// Store CAR import request in database for processing
214
-
match store_car_import_request(&ctx, &final_import_id, &car_bytes, description.as_deref()).await {
222
+
match store_car_import_request(&ctx, &final_import_id, &car_bytes, description.as_deref()).await
223
+
{
215
224
Ok(_) => {
216
-
info!("CAR import request stored successfully: {}", final_import_id);
225
+
info!(
226
+
"CAR import request stored successfully: {}",
227
+
final_import_id
228
+
);
217
229
Ok(Json(CarImportResponse {
218
230
import_id: final_import_id,
219
231
status: "queued".to_string(),
···
232
244
axum::extract::Path(import_id): axum::extract::Path<String>,
233
245
) -> Result<Json<CarImportResponse>, StatusCode> {
234
246
match get_import_status(&ctx, &import_id).await {
235
-
Ok(Some(status)) => {
236
-
Ok(Json(CarImportResponse {
237
-
import_id,
238
-
status: status.status,
239
-
message: status.message,
240
-
}))
241
-
}
247
+
Ok(Some(status)) => Ok(Json(CarImportResponse {
248
+
import_id,
249
+
status: status.status,
250
+
message: status.message,
251
+
})),
242
252
Ok(None) => Err(StatusCode::NOT_FOUND),
243
253
Err(e) => {
244
254
error!("Failed to get import status: {}", e);
···
248
258
}
249
259
250
260
async fn validate_car_file(car_data: &[u8]) -> Result<()> {
251
-
use std::io::Cursor;
252
261
use iroh_car::CarReader;
253
-
262
+
use std::io::Cursor;
263
+
254
264
let cursor = Cursor::new(car_data);
255
265
let reader = CarReader::new(cursor).await?;
256
266
let header = reader.header();
257
-
267
+
258
268
// Basic validation - ensure we have at least one root CID
259
269
if header.roots().is_empty() {
260
270
return Err(anyhow::anyhow!("CAR file has no root CIDs"));
261
271
}
262
-
272
+
263
273
info!("CAR file validated: {} root CIDs", header.roots().len());
264
274
Ok(())
265
275
}
···
293
303
Extension(ctx): Extension<Context>,
294
304
Json(request): Json<FetchCarRequest>,
295
305
) -> Result<Json<FetchCarResponse>, (StatusCode, Json<ErrorResponse>)> {
296
-
info!("Received CAR fetch request for user: {}", request.user_identifier);
297
-
306
+
info!(
307
+
"Received CAR fetch request for user: {}",
308
+
request.user_identifier
309
+
);
310
+
298
311
// Resolve user identifier to DID and PDS
299
312
let (user_did, pds_host) = match resolve_user_to_pds(&request.user_identifier).await {
300
313
Ok(result) => result,
···
302
315
error!("Failed to resolve user {}: {}", request.user_identifier, e);
303
316
let error_response = ErrorResponse {
304
317
error: "Failed to resolve user".to_string(),
305
-
details: if request.debug.unwrap_or(false) { Some(e.to_string()) } else { None },
318
+
details: if request.debug.unwrap_or(false) {
319
+
Some(e.to_string())
320
+
} else {
321
+
None
322
+
},
306
323
};
307
324
return Err((StatusCode::BAD_REQUEST, Json(error_response)));
308
325
}
309
326
};
310
-
311
-
info!("Resolved {} to DID {} on PDS {}", request.user_identifier, user_did, pds_host);
312
-
327
+
328
+
info!(
329
+
"Resolved {} to DID {} on PDS {}",
330
+
request.user_identifier, user_did, pds_host
331
+
);
332
+
313
333
// Generate import ID
314
-
let import_id = format!("pds-fetch-{}-{}",
315
-
user_did.replace(":", "-"),
334
+
let import_id = format!(
335
+
"pds-fetch-{}-{}",
336
+
user_did.replace(":", "-"),
316
337
chrono::Utc::now().timestamp()
317
338
);
318
-
339
+
319
340
// Fetch CAR file from PDS
320
341
match fetch_car_from_pds(&pds_host, &user_did, request.since.as_deref()).await {
321
342
Ok(car_data) => {
322
-
info!("Successfully fetched CAR file for {} ({} bytes)", user_did, car_data.len());
323
-
343
+
info!(
344
+
"Successfully fetched CAR file for {} ({} bytes)",
345
+
user_did,
346
+
car_data.len()
347
+
);
348
+
324
349
// Store the fetched CAR file for processing
325
-
let description = Some(format!("Fetched from PDS {} for user {}", pds_host, request.user_identifier));
326
-
match store_car_import_request(&ctx, &import_id, &car_data, description.as_deref()).await {
350
+
let description = Some(format!(
351
+
"Fetched from PDS {} for user {}",
352
+
pds_host, request.user_identifier
353
+
));
354
+
match store_car_import_request(&ctx, &import_id, &car_data, description.as_deref())
355
+
.await
356
+
{
327
357
Ok(_) => {
328
358
info!("CAR import request stored successfully: {}", import_id);
329
359
Ok(Json(FetchCarResponse {
···
371
401
372
402
/// Resolve a handle to a DID using com.atproto.identity.resolveHandle
373
403
async fn resolve_handle_to_did(handle: &str) -> Result<String> {
374
-
let url = format!("https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}", handle);
375
-
404
+
let url = format!(
405
+
"https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}",
406
+
handle
407
+
);
408
+
376
409
let response = reqwest::get(&url).await?;
377
410
if !response.status().is_success() {
378
-
return Err(anyhow::anyhow!("Failed to resolve handle {}: {}", handle, response.status()));
411
+
return Err(anyhow::anyhow!(
412
+
"Failed to resolve handle {}: {}",
413
+
handle,
414
+
response.status()
415
+
));
379
416
}
380
-
417
+
381
418
let json: serde_json::Value = response.json().await?;
382
-
let did = json["did"].as_str()
419
+
let did = json["did"]
420
+
.as_str()
383
421
.ok_or_else(|| anyhow::anyhow!("No DID found in response for handle {}", handle))?;
384
-
422
+
385
423
Ok(did.to_string())
386
424
}
387
425
···
390
428
// For DID:plc, use the PLC directory
391
429
if did.starts_with("did:plc:") {
392
430
let url = format!("https://plc.directory/{}", did);
393
-
431
+
394
432
let response = reqwest::get(&url).await?;
395
433
if !response.status().is_success() {
396
-
return Err(anyhow::anyhow!("Failed to resolve DID {}: {}", did, response.status()));
434
+
return Err(anyhow::anyhow!(
435
+
"Failed to resolve DID {}: {}",
436
+
did,
437
+
response.status()
438
+
));
397
439
}
398
-
440
+
399
441
let doc: serde_json::Value = response.json().await?;
400
-
442
+
401
443
// Find the PDS service endpoint
402
444
if let Some(services) = doc["service"].as_array() {
403
445
for service in services {
···
405
447
if let Some(endpoint) = service["serviceEndpoint"].as_str() {
406
448
// Extract hostname from URL
407
449
let url = url::Url::parse(endpoint)?;
408
-
let host = url.host_str()
409
-
.ok_or_else(|| anyhow::anyhow!("Invalid PDS endpoint URL: {}", endpoint))?;
450
+
let host = url.host_str().ok_or_else(|| {
451
+
anyhow::anyhow!("Invalid PDS endpoint URL: {}", endpoint)
452
+
})?;
410
453
return Ok(host.to_string());
411
454
}
412
455
}
413
456
}
414
457
}
415
-
416
-
Err(anyhow::anyhow!("No PDS service found in DID document for {}", did))
458
+
459
+
Err(anyhow::anyhow!(
460
+
"No PDS service found in DID document for {}",
461
+
did
462
+
))
417
463
} else {
418
464
Err(anyhow::anyhow!("Unsupported DID method: {}", did))
419
465
}
···
421
467
422
468
/// Fetch CAR file from PDS using com.atproto.sync.getRepo
423
469
pub async fn fetch_car_from_pds(pds_host: &str, did: &str, since: Option<&str>) -> Result<Vec<u8>> {
424
-
let mut url = format!("https://{}/xrpc/com.atproto.sync.getRepo?did={}", pds_host, did);
425
-
470
+
let mut url = format!(
471
+
"https://{}/xrpc/com.atproto.sync.getRepo?did={}",
472
+
pds_host, did
473
+
);
474
+
426
475
if let Some(since_rev) = since {
427
476
url.push_str(&format!("&since={}", since_rev));
428
477
}
429
-
478
+
430
479
info!("Fetching CAR file from: {}", url);
431
-
480
+
432
481
let response = reqwest::get(&url).await?;
433
482
if !response.status().is_success() {
434
-
return Err(anyhow::anyhow!("Failed to fetch CAR from PDS {}: {}", pds_host, response.status()));
483
+
return Err(anyhow::anyhow!(
484
+
"Failed to fetch CAR from PDS {}: {}",
485
+
pds_host,
486
+
response.status()
487
+
));
435
488
}
436
-
489
+
437
490
// Verify content type
438
-
let content_type = response.headers()
491
+
let content_type = response
492
+
.headers()
439
493
.get("content-type")
440
494
.and_then(|h| h.to_str().ok())
441
495
.unwrap_or("");
442
-
496
+
443
497
if !content_type.contains("application/vnd.ipld.car") {
444
498
return Err(anyhow::anyhow!("Unexpected content type: {}", content_type));
445
499
}
446
-
500
+
447
501
let car_data = response.bytes().await?;
448
502
Ok(car_data.to_vec())
449
503
}
504
+
505
+
/// Generate a DID document for did:web
506
+
fn generate_did_document(host: &str, pubkey: &str) -> Value {
507
+
json!({
508
+
"@context": [
509
+
"https://www.w3.org/ns/did/v1",
510
+
"https://w3id.org/security/multikey/v1",
511
+
"https://w3id.org/security/suites/secp256k1-2019/v1"
512
+
],
513
+
"id": format!("did:web:{}", host),
514
+
"alsoKnownAs": [
515
+
format!("at://{}", host)
516
+
],
517
+
"service": [
518
+
{
519
+
"id": "#bsky_fg",
520
+
"type": "BskyFeedGenerator",
521
+
"serviceEndpoint": format!("https://{}", host)
522
+
},
523
+
{
524
+
"id": "#atproto_pds",
525
+
"type": "AtprotoPersonalDataServer",
526
+
"serviceEndpoint": format!("https://{}", host)
527
+
}
528
+
],
529
+
"verificationMethod": [
530
+
{
531
+
"id": format!("did:web:{}#atproto", host),
532
+
"type": "Multikey",
533
+
"controller": format!("did:web:{}", host),
534
+
"publicKeyMultibase": pubkey
535
+
}
536
+
]
537
+
})
538
+
}
539
+
540
+
/// Handler for /.well-known/did.json endpoint
541
+
pub async fn get_did_document(
542
+
Extension(_ctx): Extension<Context>,
543
+
) -> impl axum::response::IntoResponse {
544
+
// Get the host from environment variable or use default
545
+
let host = std::env::var("APP_HOST")
546
+
.or_else(|_| std::env::var("HOST"))
547
+
.unwrap_or_else(|_| "localhost:3000".to_string());
548
+
549
+
// get pubkey from environment variable or use default
550
+
let pubkey = std::env::var("TEST_PUBKEY").unwrap_or_else(|_| {
551
+
"z6Mkw5f8g3h4j5k6l7m8n9o0p1q2r3s4t5u6v7w8x9y0z1a2b3c4d5e6f7g8h9i".to_string()
552
+
});
553
+
554
+
let did_doc = generate_did_document(&host, &pubkey);
555
+
556
+
(
557
+
StatusCode::OK,
558
+
[("Content-Type", "application/json")],
559
+
Json(did_doc),
560
+
)
561
+
}
562
+
563
+
#[cfg(test)]
564
+
mod tests {
565
+
use super::*;
566
+
567
+
const TEST_PUBKEY: &str = "z6Mkw5f8g3h4j5k6l7m8n9o0p1q2r3s4t5u6v7w8x9y0z1a2b3c4d5e6f7g8h9i";
568
+
569
+
#[test]
570
+
fn test_generate_did_document() {
571
+
let host = "example.com";
572
+
let did_doc = generate_did_document(host, TEST_PUBKEY);
573
+
574
+
// Verify the structure of the generated DID document
575
+
assert_eq!(did_doc["id"], format!("did:web:{}", host));
576
+
assert_eq!(did_doc["alsoKnownAs"][0], format!("at://{}", host));
577
+
578
+
// Check services
579
+
let services = did_doc["service"].as_array().unwrap();
580
+
assert_eq!(services.len(), 2);
581
+
582
+
let bsky_fg = &services[0];
583
+
assert_eq!(bsky_fg["id"], "#bsky_fg");
584
+
assert_eq!(bsky_fg["type"], "BskyFeedGenerator");
585
+
assert_eq!(bsky_fg["serviceEndpoint"], format!("https://{}", host));
586
+
587
+
let atproto_pds = &services[1];
588
+
assert_eq!(atproto_pds["id"], "#atproto_pds");
589
+
assert_eq!(atproto_pds["type"], "AtprotoPersonalDataServer");
590
+
assert_eq!(atproto_pds["serviceEndpoint"], format!("https://{}", host));
591
+
592
+
// Check verification method
593
+
let verification_methods = did_doc["verificationMethod"].as_array().unwrap();
594
+
assert_eq!(verification_methods.len(), 1);
595
+
596
+
let vm = &verification_methods[0];
597
+
assert_eq!(vm["id"], format!("did:web:{}#atproto", host));
598
+
assert_eq!(vm["type"], "Multikey");
599
+
assert_eq!(vm["controller"], format!("did:web:{}", host));
600
+
assert!(vm["publicKeyMultibase"].as_str().unwrap().starts_with("z"));
601
+
}
602
+
603
+
#[test]
604
+
fn test_did_document_context() {
605
+
let host = "test.example.org";
606
+
let did_doc = generate_did_document(host, TEST_PUBKEY);
607
+
608
+
let context = did_doc["@context"].as_array().unwrap();
609
+
assert_eq!(context.len(), 3);
610
+
assert_eq!(context[0], "https://www.w3.org/ns/did/v1");
611
+
assert_eq!(context[1], "https://w3id.org/security/multikey/v1");
612
+
assert_eq!(
613
+
context[2],
614
+
"https://w3id.org/security/suites/secp256k1-2019/v1"
615
+
);
616
+
}
617
+
618
+
#[test]
619
+
fn test_different_hosts() {
620
+
// Test with different host formats
621
+
let hosts = vec![
622
+
"localhost:3000",
623
+
"bsky.social",
624
+
"example.org:8080",
625
+
"my-service.com",
626
+
];
627
+
628
+
for host in hosts {
629
+
let did_doc = generate_did_document(host, TEST_PUBKEY);
630
+
631
+
// Verify basic structure for each host
632
+
assert_eq!(did_doc["id"], format!("did:web:{}", host));
633
+
assert_eq!(did_doc["alsoKnownAs"][0], format!("at://{}", host));
634
+
635
+
let services = did_doc["service"].as_array().unwrap();
636
+
assert_eq!(services.len(), 2);
637
+
638
+
let verification_methods = did_doc["verificationMethod"].as_array().unwrap();
639
+
assert_eq!(verification_methods.len(), 1);
640
+
}
641
+
}
642
+
}
+50
-25
apps/aqua/src/main.rs
+50
-25
apps/aqua/src/main.rs
···
1
-
use axum::{Router, extract::Extension, routing::{get, post}};
1
+
use axum::{
2
+
Router,
3
+
extract::Extension,
4
+
routing::{get, post},
5
+
};
6
+
use chrono::Utc;
7
+
use clap::{Arg, Command};
2
8
use std::net::SocketAddr;
3
9
use tower_http::cors::CorsLayer;
4
-
use clap::{Arg, Command};
5
10
use uuid::Uuid;
6
-
use chrono::Utc;
7
11
8
12
use ctx::RawContext;
13
+
use redis_client::RedisClient;
9
14
use repos::DataSource;
10
15
use repos::pg::PgDataSource;
11
-
use redis_client::RedisClient;
12
16
13
17
mod api;
14
18
mod ctx;
15
19
mod db;
20
+
mod redis_client;
16
21
mod repos;
22
+
mod types;
17
23
mod xrpc;
18
-
mod redis_client;
19
24
20
25
#[tokio::main]
21
26
async fn main() -> Result<(), String> {
···
32
37
.long("import-identity-car")
33
38
.value_name("HANDLE_OR_DID")
34
39
.help("Import CAR file for a specific identity (handle or DID)")
35
-
.action(clap::ArgAction::Set)
40
+
.action(clap::ArgAction::Set),
36
41
)
37
42
.get_matches();
38
43
39
44
let db = db::init_pool().await.expect("failed to init db");
40
45
let pgds = PgDataSource::new(db.clone()).boxed();
41
-
let ctx = RawContext::new(pgds).build();
46
+
let ctx = RawContext::new(pgds).build(); // Arc<RawContext>
42
47
43
48
// Check if we should import a CAR file instead of starting the server
44
49
if let Some(identity) = matches.get_one::<String>("import-identity-car") {
···
50
55
51
56
let app = Router::new()
52
57
.route("/meta_info", get(api::get_meta_info))
58
+
.route("/.well-known/did.json", get(api::get_did_document))
53
59
.route("/api/car/upload", post(api::upload_car_import))
54
60
.route("/api/car/fetch", post(api::fetch_car_from_user))
55
-
.route("/api/car/status/{import_id}", get(api::get_car_import_status))
56
-
.route("/api/car/job-status/{job_id}", get(api::get_car_import_job_status))
61
+
.route(
62
+
"/api/car/status/{import_id}",
63
+
get(api::get_car_import_status),
64
+
)
65
+
.route(
66
+
"/api/car/job-status/{job_id}",
67
+
get(api::get_car_import_job_status),
68
+
)
57
69
.nest("/xrpc/", xrpc::actor::actor_routes())
58
70
.nest("/xrpc/", xrpc::feed::feed_routes())
59
71
.nest("/xrpc/", xrpc::stats::stats_routes())
···
69
81
}
70
82
71
83
async fn import_identity_car(_ctx: &ctx::Context, identity: &str) -> Result<(), String> {
72
-
use tracing::{info, error};
73
-
use types::jobs::{CarImportJob, CarImportJobStatus, JobStatus, queue_keys};
74
-
84
+
use crate::types::{CarImportJob, CarImportJobStatus, JobStatus, queue_keys};
85
+
use tracing::{error, info};
86
+
75
87
info!("Submitting CAR import job for identity: {}", identity);
76
-
88
+
77
89
// Connect to Redis
78
-
let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
79
-
let redis_client = RedisClient::new(&redis_url).map_err(|e| format!("Failed to connect to Redis: {}", e))?;
80
-
90
+
let redis_url =
91
+
std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
92
+
let redis_client =
93
+
RedisClient::new(&redis_url).map_err(|e| format!("Failed to connect to Redis: {}", e))?;
94
+
81
95
// Create job
82
96
let job = CarImportJob {
83
97
request_id: Uuid::new_v4(),
···
86
100
created_at: Utc::now(),
87
101
description: Some(format!("CLI import request for {}", identity)),
88
102
};
89
-
103
+
90
104
// Serialize job for queue
91
-
let job_data = serde_json::to_string(&job).map_err(|e| format!("Failed to serialize job: {}", e))?;
92
-
105
+
let job_data =
106
+
serde_json::to_string(&job).map_err(|e| format!("Failed to serialize job: {}", e))?;
107
+
93
108
// Initialize job status
94
109
let status = CarImportJobStatus {
95
110
status: JobStatus::Pending,
···
99
114
error_message: None,
100
115
progress: None,
101
116
};
102
-
let status_data = serde_json::to_string(&status).map_err(|e| format!("Failed to serialize status: {}", e))?;
103
-
117
+
let status_data =
118
+
serde_json::to_string(&status).map_err(|e| format!("Failed to serialize status: {}", e))?;
119
+
104
120
// Submit to queue and set initial status
105
-
match redis_client.queue_job(queue_keys::CAR_IMPORT_JOBS, &job_data).await {
121
+
match redis_client
122
+
.queue_job(queue_keys::CAR_IMPORT_JOBS, &job_data)
123
+
.await
124
+
{
106
125
Ok(_) => {
107
126
// Set initial status
108
-
if let Err(e) = redis_client.set_job_status(&queue_keys::job_status_key(&job.request_id), &status_data).await {
127
+
if let Err(e) = redis_client
128
+
.set_job_status(&queue_keys::job_status_key(&job.request_id), &status_data)
129
+
.await
130
+
{
109
131
error!("Failed to set job status: {}", e);
110
132
}
111
-
133
+
112
134
info!("โ
CAR import job queued successfully!");
113
135
info!("Job ID: {}", job.request_id);
114
136
info!("Identity: {}", identity);
115
-
info!("Monitor status with: curl http://localhost:3000/api/car/status/{}", job.request_id);
137
+
info!(
138
+
"Monitor status with: curl http://localhost:3000/api/car/status/{}",
139
+
job.request_id
140
+
);
116
141
Ok(())
117
142
}
118
143
Err(e) => {
+1
-1
apps/aqua/src/redis_client.rs
+1
-1
apps/aqua/src/redis_client.rs
+7
-5
apps/aqua/src/repos/actor_profile.rs
+7
-5
apps/aqua/src/repos/actor_profile.rs
···
9
9
async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>>;
10
10
async fn get_multiple_actor_profiles(
11
11
&self,
12
-
identities: &Vec<String>,
12
+
identities: &[String],
13
13
) -> anyhow::Result<Vec<ProfileViewData>>;
14
14
}
15
15
···
30
30
avatar: row.avatar,
31
31
banner: row.banner,
32
32
// chrono -> atrium time
33
-
created_at: row.created_at.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
33
+
created_at: row
34
+
.created_at
35
+
.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
34
36
description: row.description,
35
37
description_facets: row
36
38
.description_facets
37
39
.and_then(|v| serde_json::from_value(v).ok()),
38
40
did: row.did,
39
-
featured_item: None,
40
41
display_name: row.display_name,
42
+
featured_item: None,
41
43
status: row.status.and_then(|v| serde_json::from_value(v).ok()),
42
44
}
43
45
}
···
46
48
#[async_trait]
47
49
impl ActorProfileRepo for PgDataSource {
48
50
async fn get_actor_profile(&self, identity: &str) -> anyhow::Result<Option<ProfileViewData>> {
49
-
self.get_multiple_actor_profiles(&vec![identity.to_string()])
51
+
self.get_multiple_actor_profiles(&[identity.to_string()])
50
52
.await
51
53
.map(|p| p.first().cloned())
52
54
}
53
55
async fn get_multiple_actor_profiles(
54
56
&self,
55
-
identities: &Vec<String>,
57
+
identities: &[String],
56
58
) -> anyhow::Result<Vec<ProfileViewData>> {
57
59
// split identities into dids (prefixed with "did:") and handles (not prefixed) in one iteration
58
60
let mut dids = Vec::new();
+22
-18
apps/aqua/src/repos/feed_play.rs
+22
-18
apps/aqua/src/repos/feed_play.rs
···
8
8
async fn get_feed_play(&self, identity: &str) -> anyhow::Result<Option<PlayViewData>>;
9
9
async fn get_feed_plays_for_profile(
10
10
&self,
11
-
identities: &Vec<String>,
11
+
identities: &[String],
12
12
) -> anyhow::Result<Vec<PlayViewData>>;
13
13
}
14
14
···
49
49
};
50
50
51
51
Ok(Some(PlayViewData {
52
+
track_name: row.track_name.clone(),
53
+
track_mb_id: row.recording_mbid.map(|u| u.to_string()),
54
+
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
55
+
duration: row.duration.map(|d| d as i64),
52
56
artists,
53
-
duration: row.duration.map(|d| d as i64),
57
+
release_name: row.release_name.clone(),
58
+
release_mb_id: row.release_mbid.map(|u| u.to_string()),
54
59
isrc: row.isrc,
55
-
music_service_base_domain: row.music_service_base_domain,
56
60
origin_url: row.origin_url,
57
-
played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))),
58
-
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
59
-
release_mb_id: row.release_mbid.map(|u| u.to_string()),
60
-
release_name: row.release_name,
61
+
music_service_base_domain: row.music_service_base_domain,
61
62
submission_client_agent: row.submission_client_agent,
62
-
track_mb_id: Some(row.rkey.clone()),
63
-
track_name: row.track_name.clone(),
63
+
played_time: row
64
+
.played_time
65
+
.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
64
66
}))
65
67
}
66
68
67
69
async fn get_feed_plays_for_profile(
68
70
&self,
69
-
identities: &Vec<String>,
71
+
identities: &[String],
70
72
) -> anyhow::Result<Vec<PlayViewData>> {
71
73
let rows = sqlx::query!(
72
74
r#"
···
105
107
};
106
108
107
109
result.push(PlayViewData {
110
+
track_name: row.track_name.clone(),
111
+
track_mb_id: row.recording_mbid.map(|u| u.to_string()),
112
+
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
113
+
duration: row.duration.map(|d| d as i64),
108
114
artists,
109
-
duration: row.duration.map(|d| d as i64),
115
+
release_name: row.release_name.clone(),
116
+
release_mb_id: row.release_mbid.map(|u| u.to_string()),
110
117
isrc: row.isrc,
111
-
music_service_base_domain: row.music_service_base_domain,
112
118
origin_url: row.origin_url,
113
-
played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))),
114
-
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
115
-
release_mb_id: row.release_mbid.map(|u| u.to_string()),
116
-
release_name: row.release_name,
119
+
music_service_base_domain: row.music_service_base_domain,
117
120
submission_client_agent: row.submission_client_agent,
118
-
track_mb_id: Some(row.rkey.clone()),
119
-
track_name: row.track_name.clone(),
121
+
played_time: row
122
+
.played_time
123
+
.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
120
124
});
121
125
}
122
126
+1
-2
apps/aqua/src/repos/mod.rs
+1
-2
apps/aqua/src/repos/mod.rs
···
27
27
}
28
28
29
29
pub fn time_to_chrono_utc(dt: time::OffsetDateTime) -> chrono::DateTime<chrono::Utc> {
30
-
chrono::DateTime::from_timestamp(dt.unix_timestamp(), dt.nanosecond())
31
-
.unwrap_or_default()
30
+
chrono::DateTime::from_timestamp(dt.unix_timestamp(), dt.nanosecond()).unwrap_or_default()
32
31
}
+10
-9
apps/aqua/src/repos/stats.rs
+10
-9
apps/aqua/src/repos/stats.rs
···
85
85
if let (Some(mbid), Some(name)) = (row.mbid, row.name) {
86
86
result.push(ReleaseViewData {
87
87
mbid: mbid.to_string(),
88
-
89
88
name,
90
89
play_count: row.play_count.unwrap_or(0),
91
90
});
···
217
216
};
218
217
219
218
result.push(PlayViewData {
219
+
track_name: row.track_name.clone(),
220
+
track_mb_id: row.recording_mbid.map(|u| u.to_string()),
221
+
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
222
+
duration: row.duration.map(|d| d as i64),
220
223
artists,
221
-
duration: row.duration.map(|d| d as i64),
224
+
release_name: row.release_name.clone(),
225
+
release_mb_id: row.release_mbid.map(|u| u.to_string()),
222
226
isrc: row.isrc,
223
-
music_service_base_domain: row.music_service_base_domain,
224
227
origin_url: row.origin_url,
225
-
played_time: row.played_time.map(|t| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(t))),
226
-
recording_mb_id: row.recording_mbid.map(|u| u.to_string()),
227
-
release_mb_id: row.release_mbid.map(|u| u.to_string()),
228
-
release_name: row.release_name,
228
+
music_service_base_domain: row.music_service_base_domain,
229
229
submission_client_agent: row.submission_client_agent,
230
-
track_mb_id: Some(row.rkey.clone()),
231
-
track_name: row.track_name.clone(),
230
+
played_time: row
231
+
.played_time
232
+
.map(|dt| utc_to_atrium_datetime(crate::repos::time_to_chrono_utc(dt))),
232
233
});
233
234
}
234
235
+51
apps/aqua/src/types/jobs.rs
+51
apps/aqua/src/types/jobs.rs
···
1
+
use chrono::{DateTime, Utc};
2
+
use serde::{Deserialize, Serialize};
3
+
use uuid::Uuid;
4
+
5
+
#[derive(Debug, Clone, Serialize, Deserialize)]
6
+
pub struct CarImportJob {
7
+
pub request_id: Uuid,
8
+
pub identity: String,
9
+
pub since: Option<DateTime<Utc>>,
10
+
pub created_at: DateTime<Utc>,
11
+
pub description: Option<String>,
12
+
}
13
+
14
+
#[derive(Debug, Clone, Serialize, Deserialize)]
15
+
pub struct CarImportJobStatus {
16
+
pub status: JobStatus,
17
+
pub created_at: DateTime<Utc>,
18
+
pub started_at: Option<DateTime<Utc>>,
19
+
pub completed_at: Option<DateTime<Utc>>,
20
+
pub error_message: Option<String>,
21
+
pub progress: Option<JobProgress>,
22
+
}
23
+
24
+
#[derive(Debug, Clone, Serialize, Deserialize)]
25
+
pub enum JobStatus {
26
+
Pending,
27
+
Processing,
28
+
Completed,
29
+
Failed,
30
+
Cancelled,
31
+
}
32
+
33
+
#[derive(Debug, Clone, Serialize, Deserialize)]
34
+
pub struct JobProgress {
35
+
step: String,
36
+
pub user_did: Option<String>,
37
+
pub pds_host: Option<String>,
38
+
pub car_size_bytes: Option<u64>,
39
+
pub blocks_processed: Option<u64>,
40
+
}
41
+
42
+
pub mod queue_keys {
43
+
use uuid::Uuid;
44
+
45
+
pub const CAR_IMPORT_JOBS: &str = "car_import_jobs";
46
+
pub const CAR_IMPORT_STATUS_PREFIX: &str = "car_import_status";
47
+
48
+
pub fn job_status_key(job_id: &Uuid) -> String {
49
+
format!("{}:{}", CAR_IMPORT_STATUS_PREFIX, job_id)
50
+
}
51
+
}
+1
-1
apps/aqua/src/xrpc/actor.rs
+1
-1
apps/aqua/src/xrpc/actor.rs
+17
-11
apps/aqua/src/xrpc/stats.rs
+17
-11
apps/aqua/src/xrpc/stats.rs
···
1
1
use crate::ctx::Context;
2
2
use axum::{Extension, http::StatusCode, response::IntoResponse, routing::get};
3
3
use serde::{Deserialize, Serialize};
4
-
use types::fm::teal::alpha::stats::defs::{ArtistViewData, ReleaseViewData};
5
4
use types::fm::teal::alpha::feed::defs::PlayViewData;
5
+
use types::fm::teal::alpha::stats::defs::{ArtistViewData, ReleaseViewData};
6
6
7
7
// mount stats routes
8
8
pub fn stats_routes() -> axum::Router {
9
9
axum::Router::new()
10
10
.route("/fm.teal.alpha.stats.getTopArtists", get(get_top_artists))
11
11
.route("/fm.teal.alpha.stats.getTopReleases", get(get_top_releases))
12
-
.route("/fm.teal.alpha.stats.getUserTopArtists", get(get_user_top_artists))
13
-
.route("/fm.teal.alpha.stats.getUserTopReleases", get(get_user_top_releases))
12
+
.route(
13
+
"/fm.teal.alpha.stats.getUserTopArtists",
14
+
get(get_user_top_artists),
15
+
)
16
+
.route(
17
+
"/fm.teal.alpha.stats.getUserTopReleases",
18
+
get(get_user_top_releases),
19
+
)
14
20
.route("/fm.teal.alpha.stats.getLatest", get(get_latest))
15
21
}
16
22
···
29
35
axum::extract::Query(query): axum::extract::Query<GetTopArtistsQuery>,
30
36
) -> Result<impl IntoResponse, (StatusCode, String)> {
31
37
let repo = &ctx.db;
32
-
38
+
33
39
match repo.get_top_artists(query.limit).await {
34
40
Ok(artists) => Ok(axum::Json(GetTopArtistsResponse { artists })),
35
41
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
51
57
axum::extract::Query(query): axum::extract::Query<GetTopReleasesQuery>,
52
58
) -> Result<impl IntoResponse, (StatusCode, String)> {
53
59
let repo = &ctx.db;
54
-
60
+
55
61
match repo.get_top_releases(query.limit).await {
56
62
Ok(releases) => Ok(axum::Json(GetTopReleasesResponse { releases })),
57
63
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
74
80
axum::extract::Query(query): axum::extract::Query<GetUserTopArtistsQuery>,
75
81
) -> Result<impl IntoResponse, (StatusCode, String)> {
76
82
let repo = &ctx.db;
77
-
83
+
78
84
if query.actor.is_empty() {
79
85
return Err((StatusCode::BAD_REQUEST, "actor is required".to_string()));
80
86
}
81
-
87
+
82
88
match repo.get_user_top_artists(&query.actor, query.limit).await {
83
89
Ok(artists) => Ok(axum::Json(GetUserTopArtistsResponse { artists })),
84
90
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
101
107
axum::extract::Query(query): axum::extract::Query<GetUserTopReleasesQuery>,
102
108
) -> Result<impl IntoResponse, (StatusCode, String)> {
103
109
let repo = &ctx.db;
104
-
110
+
105
111
if query.actor.is_empty() {
106
112
return Err((StatusCode::BAD_REQUEST, "actor is required".to_string()));
107
113
}
108
-
114
+
109
115
match repo.get_user_top_releases(&query.actor, query.limit).await {
110
116
Ok(releases) => Ok(axum::Json(GetUserTopReleasesResponse { releases })),
111
117
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
···
127
133
axum::extract::Query(query): axum::extract::Query<GetLatestQuery>,
128
134
) -> Result<impl IntoResponse, (StatusCode, String)> {
129
135
let repo = &ctx.db;
130
-
136
+
131
137
match repo.get_latest(query.limit).await {
132
138
Ok(plays) => Ok(axum::Json(GetLatestResponse { plays })),
133
139
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e.to_string())),
134
140
}
135
-
}
141
+
}
+82
-5
lexicons/README.md
+82
-5
lexicons/README.md
···
17
17
18
18
### Initial Setup
19
19
20
-
If you're cloning this repository for the first time, you'll need to initialize the submodules:
20
+
If you're cloning this repository for the first time, you'll need to initialize the submodules and create the symbolic links:
21
21
22
22
```bash
23
+
# Initialize submodules
23
24
git submodule update --init --recursive
25
+
26
+
# Create symbolic links to atproto lexicons
27
+
cd lexicons
28
+
ln -s ../vendor/atproto/lexicons/app app
29
+
ln -s ../vendor/atproto/lexicons/chat chat
30
+
ln -s ../vendor/atproto/lexicons/com com
31
+
ln -s ../vendor/atproto/lexicons/tools tools
32
+
cd ..
33
+
```
34
+
35
+
Or use the provided setup script:
36
+
37
+
```bash
38
+
./scripts/setup-lexicons.sh
24
39
```
25
40
26
41
### Updating ATProto Lexicons
27
42
28
-
To update to the latest ATProto lexicons:
43
+
To update to the latest ATProto lexicons, use the provided update script:
29
44
30
45
```bash
46
+
./scripts/update-lexicons.sh
47
+
```
48
+
49
+
This will:
50
+
1. Fetch the latest changes from the atproto repository
51
+
2. Show you what changed
52
+
3. Stage the submodule update for commit
53
+
54
+
Then commit the changes:
55
+
```bash
56
+
git commit -m "Update atproto lexicons to latest"
57
+
```
58
+
59
+
**Manual approach:**
60
+
```bash
31
61
cd vendor/atproto
32
62
git pull origin main
33
63
cd ../..
···
35
65
git commit -m "Update atproto lexicons to latest"
36
66
```
37
67
68
+
### Available Scripts
69
+
70
+
Two convenience scripts are available:
71
+
72
+
**Setup Script** - Handle the initial setup:
73
+
74
+
```bash
75
+
#!/bin/bash
76
+
# scripts/setup-lexicons.sh
77
+
78
+
echo "Setting up lexicons..."
79
+
80
+
# Initialize submodules
81
+
git submodule update --init --recursive
82
+
83
+
# Create symbolic links if they don't exist
84
+
cd lexicons
85
+
if [ ! -L app ]; then
86
+
ln -s ../vendor/atproto/lexicons/app app
87
+
echo "Created symlink: lexicons/app"
88
+
fi
89
+
if [ ! -L chat ]; then
90
+
ln -s ../vendor/atproto/lexicons/chat chat
91
+
echo "Created symlink: lexicons/chat"
92
+
fi
93
+
if [ ! -L com ]; then
94
+
ln -s ../vendor/atproto/lexicons/com com
95
+
echo "Created symlink: lexicons/com"
96
+
fi
97
+
if [ ! -L tools ]; then
98
+
ln -s ../vendor/atproto/lexicons/tools tools
99
+
echo "Created symlink: lexicons/tools"
100
+
fi
101
+
cd ..
102
+
103
+
echo "Lexicons setup complete!"
104
+
```
105
+
106
+
**Update Script** - Update ATProto lexicons:
107
+
108
+
```bash
109
+
#!/bin/bash
110
+
# scripts/update-lexicons.sh
111
+
112
+
# Fetches latest changes from atproto repository
113
+
# Shows what changed and stages the update for commit
114
+
./scripts/update-lexicons.sh
115
+
```
116
+
38
117
### Adding Custom Lexicons
39
118
40
119
Custom lexicons should be added to the `fm.teal.alpha/` directory following the ATProto lexicon schema format. These files are tracked directly in our repository and not affected by submodule updates.
41
120
42
-
## Generated Files
43
-
44
-
This directory may contain generated files (`.js`, `.d.ts`, etc.) that are created by lexicon compilation tools. These are ignored by git as specified in the `.gitignore` file.
121
+
**Note**: The symbolic links (`app`, `chat`, `com`, `tools`) are not tracked in git and will be created during setup. They are ignored in `.gitignore` to avoid conflicts.
+21
-1
lexicons/fm.teal.alpha/actor/defs.json
+21
-1
lexicons/fm.teal.alpha/actor/defs.json
···
36
36
},
37
37
"status": {
38
38
"type": "ref",
39
-
"ref": "fm.teal.alpha.actor.status#main"
39
+
"ref": "#statusView"
40
40
},
41
41
"createdAt": { "type": "string", "format": "datetime" }
42
42
}
···
57
57
"avatar": {
58
58
"type": "string",
59
59
"description": "IPLD of the avatar"
60
+
}
61
+
}
62
+
},
63
+
"statusView": {
64
+
"type": "object",
65
+
"description": "A declaration of the status of the actor.",
66
+
"properties": {
67
+
"time": {
68
+
"type": "string",
69
+
"format": "datetime",
70
+
"description": "The unix timestamp of when the item was recorded"
71
+
},
72
+
"expiry": {
73
+
"type": "string",
74
+
"format": "datetime",
75
+
"description": "The unix timestamp of the expiry time of the item. If unavailable, default to 10 minutes past the start time."
76
+
},
77
+
"item": {
78
+
"type": "ref",
79
+
"ref": "fm.teal.alpha.feed.defs#playView"
60
80
}
61
81
}
62
82
}
+12
-1
lexicons/fm.teal.alpha/feed/play.json
+12
-1
lexicons/fm.teal.alpha/feed/play.json
···
19
19
},
20
20
"trackMbId": {
21
21
"type": "string",
22
-
23
22
"description": "The Musicbrainz ID of the track"
24
23
},
25
24
"recordingMbId": {
···
87
86
"type": "string",
88
87
"format": "datetime",
89
88
"description": "The unix timestamp of when the track was played"
89
+
},
90
+
"trackDiscriminant": {
91
+
"type": "string",
92
+
"maxLength": 128,
93
+
"maxGraphemes": 1280,
94
+
"description": "Distinguishing information for track variants (e.g. 'Acoustic Version', 'Live at Wembley', 'Radio Edit', 'Demo'). Used to differentiate between different versions of the same base track while maintaining grouping capabilities."
95
+
},
96
+
"releaseDiscriminant": {
97
+
"type": "string",
98
+
"maxLength": 128,
99
+
"maxGraphemes": 1280,
100
+
"description": "Distinguishing information for release variants (e.g. 'Deluxe Edition', 'Remastered', '2023 Remaster', 'Special Edition'). Used to differentiate between different versions of the same base release while maintaining grouping capabilities."
90
101
}
91
102
}
92
103
}
+24
lexicons/fm.teal.alpha/richtext/facet.json
+24
lexicons/fm.teal.alpha/richtext/facet.json
···
1
+
{
2
+
"lexicon": 1,
3
+
"id": "fm.teal.alpha.richtext.facet",
4
+
"defs": {
5
+
"main": {
6
+
"type": "object",
7
+
"description": "Annotation of a sub-string within rich text.",
8
+
"required": ["index", "features"],
9
+
"properties": {
10
+
"index": { "type": "ref", "ref": "app.bsky.richtext.facet#byteSlice" },
11
+
"features": {
12
+
"type": "array",
13
+
"items": {
14
+
"type": "union",
15
+
"refs": [
16
+
"app.bsky.richtext.facet#mention",
17
+
"app.bsky.richtext.facet#link"
18
+
]
19
+
}
20
+
}
21
+
}
22
+
}
23
+
}
24
+
}
+226
migrations/20241220000001_initial_schema.sql
+226
migrations/20241220000001_initial_schema.sql
···
1
+
-- Initial comprehensive schema for Teal music platform
2
+
-- Based on services/cadet/sql/base.sql
3
+
4
+
CREATE TABLE artists (
5
+
mbid UUID PRIMARY KEY,
6
+
name TEXT NOT NULL,
7
+
play_count INTEGER DEFAULT 0
8
+
);
9
+
10
+
-- releases are synologous to 'albums'
11
+
CREATE TABLE releases (
12
+
mbid UUID PRIMARY KEY,
13
+
name TEXT NOT NULL,
14
+
play_count INTEGER DEFAULT 0
15
+
);
16
+
17
+
-- recordings are synologous to 'tracks' BUT tracks can be in multiple releases!
18
+
CREATE TABLE recordings (
19
+
mbid UUID PRIMARY KEY,
20
+
name TEXT NOT NULL,
21
+
play_count INTEGER DEFAULT 0
22
+
);
23
+
24
+
CREATE TABLE plays (
25
+
uri TEXT PRIMARY KEY,
26
+
did TEXT NOT NULL,
27
+
rkey TEXT NOT NULL,
28
+
cid TEXT NOT NULL,
29
+
isrc TEXT,
30
+
duration INTEGER,
31
+
track_name TEXT NOT NULL,
32
+
played_time TIMESTAMP WITH TIME ZONE,
33
+
processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
34
+
release_mbid UUID,
35
+
release_name TEXT,
36
+
recording_mbid UUID,
37
+
submission_client_agent TEXT,
38
+
music_service_base_domain TEXT,
39
+
origin_url TEXT,
40
+
FOREIGN KEY (release_mbid) REFERENCES releases (mbid),
41
+
FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid)
42
+
);
43
+
44
+
CREATE INDEX idx_plays_release_mbid ON plays (release_mbid);
45
+
CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid);
46
+
CREATE INDEX idx_plays_played_time ON plays (played_time);
47
+
CREATE INDEX idx_plays_did ON plays (did);
48
+
49
+
CREATE TABLE play_to_artists (
50
+
play_uri TEXT, -- references plays(uri)
51
+
artist_mbid UUID REFERENCES artists (mbid),
52
+
artist_name TEXT, -- storing here for ease of use when joining
53
+
PRIMARY KEY (play_uri, artist_mbid),
54
+
FOREIGN KEY (play_uri) REFERENCES plays (uri)
55
+
);
56
+
57
+
CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid);
58
+
59
+
-- Profiles table
60
+
CREATE TABLE profiles (
61
+
did TEXT PRIMARY KEY,
62
+
handle TEXT,
63
+
display_name TEXT,
64
+
description TEXT,
65
+
description_facets JSONB,
66
+
avatar TEXT, -- IPLD of the image, bafy...
67
+
banner TEXT,
68
+
created_at TIMESTAMP WITH TIME ZONE
69
+
);
70
+
71
+
-- User featured items table
72
+
CREATE TABLE featured_items (
73
+
did TEXT PRIMARY KEY,
74
+
mbid TEXT NOT NULL,
75
+
type TEXT NOT NULL
76
+
);
77
+
78
+
-- Statii table (status records)
79
+
CREATE TABLE statii (
80
+
uri TEXT PRIMARY KEY,
81
+
did TEXT NOT NULL,
82
+
rkey TEXT NOT NULL,
83
+
cid TEXT NOT NULL,
84
+
record JSONB NOT NULL,
85
+
indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
86
+
);
87
+
88
+
CREATE INDEX idx_statii_did_rkey ON statii (did, rkey);
89
+
90
+
-- Materialized view for artists' play counts
91
+
CREATE MATERIALIZED VIEW mv_artist_play_counts AS
92
+
SELECT
93
+
a.mbid AS artist_mbid,
94
+
a.name AS artist_name,
95
+
COUNT(p.uri) AS play_count
96
+
FROM
97
+
artists a
98
+
LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
99
+
LEFT JOIN plays p ON p.uri = pta.play_uri
100
+
GROUP BY
101
+
a.mbid,
102
+
a.name;
103
+
104
+
CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid);
105
+
106
+
-- Materialized view for releases' play counts
107
+
CREATE MATERIALIZED VIEW mv_release_play_counts AS
108
+
SELECT
109
+
r.mbid AS release_mbid,
110
+
r.name AS release_name,
111
+
COUNT(p.uri) AS play_count
112
+
FROM
113
+
releases r
114
+
LEFT JOIN plays p ON p.release_mbid = r.mbid
115
+
GROUP BY
116
+
r.mbid,
117
+
r.name;
118
+
119
+
CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid);
120
+
121
+
-- Materialized view for recordings' play counts
122
+
CREATE MATERIALIZED VIEW mv_recording_play_counts AS
123
+
SELECT
124
+
rec.mbid AS recording_mbid,
125
+
rec.name AS recording_name,
126
+
COUNT(p.uri) AS play_count
127
+
FROM
128
+
recordings rec
129
+
LEFT JOIN plays p ON p.recording_mbid = rec.mbid
130
+
GROUP BY
131
+
rec.mbid,
132
+
rec.name;
133
+
134
+
CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid);
135
+
136
+
-- Global play count materialized view
137
+
CREATE MATERIALIZED VIEW mv_global_play_count AS
138
+
SELECT
139
+
COUNT(uri) AS total_plays,
140
+
COUNT(DISTINCT did) AS unique_listeners
141
+
FROM plays;
142
+
143
+
CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays);
144
+
145
+
-- Top artists in the last 30 days
146
+
CREATE MATERIALIZED VIEW mv_top_artists_30days AS
147
+
SELECT
148
+
a.mbid AS artist_mbid,
149
+
a.name AS artist_name,
150
+
COUNT(p.uri) AS play_count
151
+
FROM artists a
152
+
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
153
+
INNER JOIN plays p ON p.uri = pta.play_uri
154
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
155
+
GROUP BY a.mbid, a.name
156
+
ORDER BY COUNT(p.uri) DESC;
157
+
158
+
-- Top releases in the last 30 days
159
+
CREATE MATERIALIZED VIEW mv_top_releases_30days AS
160
+
SELECT
161
+
r.mbid AS release_mbid,
162
+
r.name AS release_name,
163
+
COUNT(p.uri) AS play_count
164
+
FROM releases r
165
+
INNER JOIN plays p ON p.release_mbid = r.mbid
166
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
167
+
GROUP BY r.mbid, r.name
168
+
ORDER BY COUNT(p.uri) DESC;
169
+
170
+
-- Top artists for user in the last 30 days
171
+
CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS
172
+
SELECT
173
+
prof.did,
174
+
a.mbid AS artist_mbid,
175
+
a.name AS artist_name,
176
+
COUNT(p.uri) AS play_count
177
+
FROM artists a
178
+
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
179
+
INNER JOIN plays p ON p.uri = pta.play_uri
180
+
INNER JOIN profiles prof ON prof.did = p.did
181
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
182
+
GROUP BY prof.did, a.mbid, a.name
183
+
ORDER BY COUNT(p.uri) DESC;
184
+
185
+
-- Top artists for user in the last 7 days
186
+
CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS
187
+
SELECT
188
+
prof.did,
189
+
a.mbid AS artist_mbid,
190
+
a.name AS artist_name,
191
+
COUNT(p.uri) AS play_count
192
+
FROM artists a
193
+
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
194
+
INNER JOIN plays p ON p.uri = pta.play_uri
195
+
INNER JOIN profiles prof ON prof.did = p.did
196
+
WHERE p.played_time >= NOW() - INTERVAL '7 days'
197
+
GROUP BY prof.did, a.mbid, a.name
198
+
ORDER BY COUNT(p.uri) DESC;
199
+
200
+
-- Top releases for user in the last 30 days
201
+
CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS
202
+
SELECT
203
+
prof.did,
204
+
r.mbid AS release_mbid,
205
+
r.name AS release_name,
206
+
COUNT(p.uri) AS play_count
207
+
FROM releases r
208
+
INNER JOIN plays p ON p.release_mbid = r.mbid
209
+
INNER JOIN profiles prof ON prof.did = p.did
210
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
211
+
GROUP BY prof.did, r.mbid, r.name
212
+
ORDER BY COUNT(p.uri) DESC;
213
+
214
+
-- Top releases for user in the last 7 days
215
+
CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS
216
+
SELECT
217
+
prof.did,
218
+
r.mbid AS release_mbid,
219
+
r.name AS release_name,
220
+
COUNT(p.uri) AS play_count
221
+
FROM releases r
222
+
INNER JOIN plays p ON p.release_mbid = r.mbid
223
+
INNER JOIN profiles prof ON prof.did = p.did
224
+
WHERE p.played_time >= NOW() - INTERVAL '7 days'
225
+
GROUP BY prof.did, r.mbid, r.name
226
+
ORDER BY COUNT(p.uri) DESC;
+59
migrations/20241220000002_car_import_tables.sql
+59
migrations/20241220000002_car_import_tables.sql
···
1
+
-- CAR import functionality tables
2
+
-- For handling AT Protocol CAR file imports and processing
3
+
4
+
-- Tracks uploaded CAR files that are queued for processing
5
+
CREATE TABLE IF NOT EXISTS car_import_requests (
6
+
import_id TEXT PRIMARY KEY,
7
+
car_data_base64 TEXT NOT NULL,
8
+
status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed
9
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
10
+
processed_at TIMESTAMP WITH TIME ZONE,
11
+
error_message TEXT,
12
+
file_size_bytes INTEGER,
13
+
block_count INTEGER,
14
+
extracted_records_count INTEGER DEFAULT 0
15
+
);
16
+
17
+
CREATE INDEX idx_car_import_requests_status ON car_import_requests (status);
18
+
CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at);
19
+
20
+
-- Tracks raw IPLD blocks extracted from CAR files
21
+
CREATE TABLE IF NOT EXISTS car_blocks (
22
+
cid TEXT PRIMARY KEY,
23
+
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
24
+
block_data BYTEA NOT NULL,
25
+
decoded_successfully BOOLEAN DEFAULT FALSE,
26
+
collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc.
27
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
28
+
);
29
+
30
+
CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id);
31
+
CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type);
32
+
33
+
-- Tracks records extracted from CAR imports that were successfully processed
34
+
CREATE TABLE IF NOT EXISTS car_extracted_records (
35
+
id SERIAL PRIMARY KEY,
36
+
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
37
+
cid TEXT NOT NULL REFERENCES car_blocks(cid),
38
+
collection_type TEXT NOT NULL,
39
+
record_uri TEXT, -- AT URI if applicable (e.g., for play records)
40
+
synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123')
41
+
rkey TEXT,
42
+
extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
43
+
processing_notes TEXT
44
+
);
45
+
46
+
CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id);
47
+
CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type);
48
+
CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri);
49
+
50
+
-- Tracks import metadata and commit information
51
+
CREATE TABLE IF NOT EXISTS car_import_metadata (
52
+
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
53
+
metadata_key TEXT NOT NULL,
54
+
metadata_value JSONB NOT NULL,
55
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
56
+
PRIMARY KEY (import_id, metadata_key)
57
+
);
58
+
59
+
CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);
+112
migrations/20241220000003_artists_without_mbids.sql
+112
migrations/20241220000003_artists_without_mbids.sql
···
1
+
-- Migration to support artists without MusicBrainz IDs
2
+
-- This allows the system to comply with the Teal lexicon where only trackName is required
3
+
4
+
-- Add a field to plays table to store raw artist names for records without MBIDs
5
+
ALTER TABLE plays ADD COLUMN artist_names_raw JSONB;
6
+
7
+
-- Create a new artists table that doesn't require MBID as primary key
8
+
CREATE TABLE artists_extended (
9
+
id SERIAL PRIMARY KEY,
10
+
mbid UUID UNIQUE, -- Optional MusicBrainz ID
11
+
name TEXT NOT NULL,
12
+
name_normalized TEXT GENERATED ALWAYS AS (LOWER(TRIM(name))) STORED,
13
+
play_count INTEGER DEFAULT 0,
14
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
15
+
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
16
+
);
17
+
18
+
-- Create index for efficient lookups
19
+
CREATE INDEX idx_artists_extended_mbid ON artists_extended (mbid) WHERE mbid IS NOT NULL;
20
+
CREATE INDEX idx_artists_extended_name_normalized ON artists_extended (name_normalized);
21
+
CREATE UNIQUE INDEX idx_artists_extended_name_unique ON artists_extended (name_normalized) WHERE mbid IS NULL;
22
+
23
+
-- Create a new junction table that can handle both MBID and non-MBID artists
24
+
CREATE TABLE play_to_artists_extended (
25
+
play_uri TEXT NOT NULL REFERENCES plays(uri),
26
+
artist_id INTEGER NOT NULL REFERENCES artists_extended(id),
27
+
artist_name TEXT NOT NULL, -- Denormalized for performance
28
+
PRIMARY KEY (play_uri, artist_id)
29
+
);
30
+
31
+
CREATE INDEX idx_play_to_artists_extended_artist ON play_to_artists_extended (artist_id);
32
+
33
+
-- Migrate existing data from old tables to new structure
34
+
INSERT INTO artists_extended (mbid, name, play_count)
35
+
SELECT mbid, name, play_count FROM artists;
36
+
37
+
INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name)
38
+
SELECT
39
+
pta.play_uri,
40
+
ae.id,
41
+
pta.artist_name
42
+
FROM play_to_artists pta
43
+
JOIN artists_extended ae ON ae.mbid = pta.artist_mbid;
44
+
45
+
-- Update materialized views to use new structure
46
+
DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts;
47
+
CREATE MATERIALIZED VIEW mv_artist_play_counts AS
48
+
SELECT
49
+
ae.id AS artist_id,
50
+
ae.mbid AS artist_mbid,
51
+
ae.name AS artist_name,
52
+
COUNT(p.uri) AS play_count
53
+
FROM
54
+
artists_extended ae
55
+
LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
56
+
LEFT JOIN plays p ON p.uri = ptae.play_uri
57
+
GROUP BY
58
+
ae.id, ae.mbid, ae.name;
59
+
60
+
CREATE UNIQUE INDEX idx_mv_artist_play_counts_new ON mv_artist_play_counts (artist_id);
61
+
62
+
-- Update other materialized views that reference artists
63
+
DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_30days;
64
+
CREATE MATERIALIZED VIEW mv_top_artists_30days AS
65
+
SELECT
66
+
ae.id AS artist_id,
67
+
ae.mbid AS artist_mbid,
68
+
ae.name AS artist_name,
69
+
COUNT(p.uri) AS play_count
70
+
FROM artists_extended ae
71
+
INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
72
+
INNER JOIN plays p ON p.uri = ptae.play_uri
73
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
74
+
GROUP BY ae.id, ae.mbid, ae.name
75
+
ORDER BY COUNT(p.uri) DESC;
76
+
77
+
DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_30days;
78
+
CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS
79
+
SELECT
80
+
prof.did,
81
+
ae.id AS artist_id,
82
+
ae.mbid AS artist_mbid,
83
+
ae.name AS artist_name,
84
+
COUNT(p.uri) AS play_count
85
+
FROM artists_extended ae
86
+
INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
87
+
INNER JOIN plays p ON p.uri = ptae.play_uri
88
+
INNER JOIN profiles prof ON prof.did = p.did
89
+
WHERE p.played_time >= NOW() - INTERVAL '30 days'
90
+
GROUP BY prof.did, ae.id, ae.mbid, ae.name
91
+
ORDER BY COUNT(p.uri) DESC;
92
+
93
+
DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_7days;
94
+
CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS
95
+
SELECT
96
+
prof.did,
97
+
ae.id AS artist_id,
98
+
ae.mbid AS artist_mbid,
99
+
ae.name AS artist_name,
100
+
COUNT(p.uri) AS play_count
101
+
FROM artists_extended ae
102
+
INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
103
+
INNER JOIN plays p ON p.uri = ptae.play_uri
104
+
INNER JOIN profiles prof ON prof.did = p.did
105
+
WHERE p.played_time >= NOW() - INTERVAL '7 days'
106
+
GROUP BY prof.did, ae.id, ae.mbid, ae.name
107
+
ORDER BY COUNT(p.uri) DESC;
108
+
109
+
-- Comment explaining the migration strategy
110
+
COMMENT ON TABLE artists_extended IS 'Extended artists table that supports both MusicBrainz and non-MusicBrainz artists. Uses serial ID as primary key with optional MBID.';
111
+
COMMENT ON TABLE play_to_artists_extended IS 'Junction table linking plays to artists using the new artists_extended table structure.';
112
+
COMMENT ON COLUMN plays.artist_names_raw IS 'Raw artist names as JSON array for plays without MusicBrainz data, used as fallback when artist relationships cannot be established.';
+76
migrations/20241220000004_synthetic_mbids.sql
+76
migrations/20241220000004_synthetic_mbids.sql
···
1
+
-- Migration to support synthetic MBIDs for artists without MusicBrainz data
2
+
-- This ensures all artists have some form of ID while maintaining uniqueness
3
+
4
+
-- Enable UUID extension for v5 UUID generation
5
+
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
6
+
7
+
-- Add a column to track MBID type (musicbrainz, synthetic, unknown)
8
+
ALTER TABLE artists_extended ADD COLUMN mbid_type TEXT DEFAULT 'unknown' NOT NULL;
9
+
10
+
-- Add check constraint for valid MBID types
11
+
ALTER TABLE artists_extended ADD CONSTRAINT chk_mbid_type
12
+
CHECK (mbid_type IN ('musicbrainz', 'synthetic', 'unknown'));
13
+
14
+
-- Update existing records to set proper MBID type
15
+
UPDATE artists_extended SET mbid_type = 'musicbrainz' WHERE mbid IS NOT NULL;
16
+
17
+
-- Drop the unique constraint on name_normalized for null MBIDs since we'll handle duplicates differently
18
+
DROP INDEX IF EXISTS idx_artists_extended_name_unique;
19
+
20
+
-- Add index for efficient querying by MBID type
21
+
CREATE INDEX idx_artists_extended_mbid_type ON artists_extended (mbid_type);
22
+
23
+
-- Create a view to easily work with different artist types
24
+
CREATE VIEW artists_with_type AS
25
+
SELECT
26
+
id,
27
+
mbid,
28
+
name,
29
+
mbid_type,
30
+
play_count,
31
+
created_at,
32
+
updated_at,
33
+
-- For synthetic MBIDs, we can show the source name used for generation
34
+
CASE
35
+
WHEN mbid_type = 'synthetic' THEN 'Generated from: ' || name
36
+
WHEN mbid_type = 'musicbrainz' THEN 'MusicBrainz: ' || mbid::text
37
+
ELSE 'No MBID available'
38
+
END as mbid_info
39
+
FROM artists_extended;
40
+
41
+
-- Update materialized views to include MBID type information
42
+
DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts;
43
+
CREATE MATERIALIZED VIEW mv_artist_play_counts AS
44
+
SELECT
45
+
ae.id AS artist_id,
46
+
ae.mbid AS artist_mbid,
47
+
ae.name AS artist_name,
48
+
ae.mbid_type,
49
+
COUNT(p.uri) AS play_count
50
+
FROM
51
+
artists_extended ae
52
+
LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
53
+
LEFT JOIN plays p ON p.uri = ptae.play_uri
54
+
GROUP BY
55
+
ae.id, ae.mbid, ae.name, ae.mbid_type;
56
+
57
+
CREATE UNIQUE INDEX idx_mv_artist_play_counts_with_type ON mv_artist_play_counts (artist_id);
58
+
59
+
-- Add comments explaining the synthetic MBID system
60
+
COMMENT ON COLUMN artists_extended.mbid_type IS 'Type of MBID: musicbrainz (real), synthetic (generated), or unknown (legacy data)';
61
+
COMMENT ON COLUMN artists_extended.mbid IS 'MusicBrainz ID (for musicbrainz type) or synthetic UUID (for synthetic type)';
62
+
COMMENT ON VIEW artists_with_type IS 'View that provides human-readable information about artist MBID sources';
63
+
64
+
-- Add a function to generate synthetic MBIDs
65
+
CREATE OR REPLACE FUNCTION generate_synthetic_mbid(artist_name TEXT) RETURNS UUID AS $$
66
+
DECLARE
67
+
namespace_uuid UUID := '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; -- DNS namespace
68
+
result_uuid UUID;
69
+
BEGIN
70
+
-- Generate deterministic UUID v5 based on artist name
71
+
SELECT uuid_generate_v5(namespace_uuid, artist_name) INTO result_uuid;
72
+
RETURN result_uuid;
73
+
END;
74
+
$$ LANGUAGE plpgsql IMMUTABLE;
75
+
76
+
COMMENT ON FUNCTION generate_synthetic_mbid IS 'Generates a deterministic UUID v5 for artist names without MusicBrainz IDs';
+101
migrations/20241220000005_fuzzy_matching.sql
+101
migrations/20241220000005_fuzzy_matching.sql
···
1
+
-- Migration to add fuzzy text matching capabilities
2
+
-- This enables better artist name matching using trigram similarity
3
+
4
+
-- Enable pg_trgm extension for trigram similarity matching
5
+
CREATE EXTENSION IF NOT EXISTS pg_trgm;
6
+
7
+
-- Create indexes for efficient trigram matching on artist names
8
+
CREATE INDEX idx_artists_extended_name_trgm ON artists_extended USING gin (name gin_trgm_ops);
9
+
CREATE INDEX idx_artists_extended_name_normalized_trgm ON artists_extended USING gin (name_normalized gin_trgm_ops);
10
+
11
+
-- Create a function to calculate comprehensive artist similarity
12
+
CREATE OR REPLACE FUNCTION calculate_artist_similarity(
13
+
input_name TEXT,
14
+
existing_name TEXT,
15
+
input_album TEXT DEFAULT NULL,
16
+
existing_album TEXT DEFAULT NULL
17
+
) RETURNS FLOAT AS $$
18
+
DECLARE
19
+
name_similarity FLOAT;
20
+
album_similarity FLOAT := 0.0;
21
+
final_score FLOAT;
22
+
BEGIN
23
+
-- Calculate trigram similarity for artist names
24
+
name_similarity := similarity(LOWER(TRIM(input_name)), LOWER(TRIM(existing_name)));
25
+
26
+
-- Boost for exact matches after normalization
27
+
IF LOWER(TRIM(regexp_replace(input_name, '[^a-zA-Z0-9\s]', '', 'g'))) =
28
+
LOWER(TRIM(regexp_replace(existing_name, '[^a-zA-Z0-9\s]', '', 'g'))) THEN
29
+
name_similarity := GREATEST(name_similarity, 0.95);
30
+
END IF;
31
+
32
+
-- Factor in album similarity if both are provided
33
+
IF input_album IS NOT NULL AND existing_album IS NOT NULL THEN
34
+
album_similarity := similarity(LOWER(TRIM(input_album)), LOWER(TRIM(existing_album)));
35
+
-- Weight: 80% name, 20% album
36
+
final_score := (name_similarity * 0.8) + (album_similarity * 0.2);
37
+
ELSE
38
+
final_score := name_similarity;
39
+
END IF;
40
+
41
+
RETURN final_score;
42
+
END;
43
+
$$ LANGUAGE plpgsql IMMUTABLE;
44
+
45
+
-- Create a view for fuzzy artist matching with confidence scores
46
+
CREATE VIEW fuzzy_artist_matches AS
47
+
SELECT DISTINCT
48
+
ae1.id as query_artist_id,
49
+
ae1.name as query_artist_name,
50
+
ae1.mbid_type as query_mbid_type,
51
+
ae2.id as match_artist_id,
52
+
ae2.name as match_artist_name,
53
+
ae2.mbid as match_mbid,
54
+
ae2.mbid_type as match_mbid_type,
55
+
similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as name_similarity,
56
+
CASE
57
+
WHEN ae2.mbid_type = 'musicbrainz' THEN 'upgrade_to_mb'
58
+
WHEN ae1.mbid_type = 'musicbrainz' AND ae2.mbid_type = 'synthetic' THEN 'consolidate_to_mb'
59
+
ELSE 'merge_synthetic'
60
+
END as match_action
61
+
FROM artists_extended ae1
62
+
CROSS JOIN artists_extended ae2
63
+
WHERE ae1.id != ae2.id
64
+
AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) > 0.8
65
+
AND (
66
+
ae1.mbid_type = 'synthetic' OR ae2.mbid_type = 'musicbrainz'
67
+
);
68
+
69
+
-- Add comments
70
+
COMMENT ON EXTENSION pg_trgm IS 'Trigram extension for fuzzy text matching';
71
+
COMMENT ON INDEX idx_artists_extended_name_trgm IS 'GIN index for trigram similarity on artist names';
72
+
COMMENT ON FUNCTION calculate_artist_similarity IS 'Calculates similarity score between artists considering name and optional album context';
73
+
COMMENT ON VIEW fuzzy_artist_matches IS 'Shows potential artist matches with confidence scores and recommended actions';
74
+
75
+
-- Create a function to suggest artist consolidations
76
+
CREATE OR REPLACE FUNCTION suggest_artist_consolidations(min_similarity FLOAT DEFAULT 0.9)
77
+
RETURNS TABLE(
78
+
action TEXT,
79
+
synthetic_artist TEXT,
80
+
target_artist TEXT,
81
+
similarity_score FLOAT,
82
+
synthetic_plays INTEGER,
83
+
target_plays INTEGER
84
+
) AS $$
85
+
BEGIN
86
+
RETURN QUERY
87
+
SELECT
88
+
fam.match_action as action,
89
+
fam.query_artist_name as synthetic_artist,
90
+
fam.match_artist_name as target_artist,
91
+
fam.name_similarity as similarity_score,
92
+
(SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.query_artist_id) as synthetic_plays,
93
+
(SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.match_artist_id) as target_plays
94
+
FROM fuzzy_artist_matches fam
95
+
WHERE fam.name_similarity >= min_similarity
96
+
AND fam.match_action = 'upgrade_to_mb'
97
+
ORDER BY fam.name_similarity DESC, synthetic_plays DESC;
98
+
END;
99
+
$$ LANGUAGE plpgsql;
100
+
101
+
COMMENT ON FUNCTION suggest_artist_consolidations IS 'Returns suggestions for consolidating synthetic artists with MusicBrainz artists based on similarity';
+138
migrations/20241220000006_discriminant_fields.sql
+138
migrations/20241220000006_discriminant_fields.sql
···
1
+
-- Migration to add discriminant fields for track and release variants
2
+
-- This enables proper handling of different versions while maintaining grouping capabilities
3
+
4
+
-- Add discriminant fields to plays table
5
+
ALTER TABLE plays ADD COLUMN track_discriminant TEXT;
6
+
ALTER TABLE plays ADD COLUMN release_discriminant TEXT;
7
+
8
+
-- Add discriminant field to releases table
9
+
ALTER TABLE releases ADD COLUMN discriminant TEXT;
10
+
11
+
-- Add discriminant field to recordings table
12
+
ALTER TABLE recordings ADD COLUMN discriminant TEXT;
13
+
14
+
-- Create indexes for efficient searching and filtering
15
+
CREATE INDEX idx_plays_track_discriminant ON plays (track_discriminant);
16
+
CREATE INDEX idx_plays_release_discriminant ON plays (release_discriminant);
17
+
CREATE INDEX idx_releases_discriminant ON releases (discriminant);
18
+
CREATE INDEX idx_recordings_discriminant ON recordings (discriminant);
19
+
20
+
-- Create composite indexes for grouping by base name + discriminant
21
+
CREATE INDEX idx_plays_track_name_discriminant ON plays (track_name, track_discriminant);
22
+
CREATE INDEX idx_plays_release_name_discriminant ON plays (release_name, release_discriminant);
23
+
24
+
-- Update materialized views to include discriminant information
25
+
DROP MATERIALIZED VIEW IF EXISTS mv_release_play_counts;
26
+
CREATE MATERIALIZED VIEW mv_release_play_counts AS
27
+
SELECT
28
+
r.mbid AS release_mbid,
29
+
r.name AS release_name,
30
+
r.discriminant AS release_discriminant,
31
+
COUNT(p.uri) AS play_count
32
+
FROM
33
+
releases r
34
+
LEFT JOIN plays p ON p.release_mbid = r.mbid
35
+
GROUP BY
36
+
r.mbid, r.name, r.discriminant;
37
+
38
+
CREATE UNIQUE INDEX idx_mv_release_play_counts_discriminant ON mv_release_play_counts (release_mbid);
39
+
40
+
DROP MATERIALIZED VIEW IF EXISTS mv_recording_play_counts;
41
+
CREATE MATERIALIZED VIEW mv_recording_play_counts AS
42
+
SELECT
43
+
rec.mbid AS recording_mbid,
44
+
rec.name AS recording_name,
45
+
rec.discriminant AS recording_discriminant,
46
+
COUNT(p.uri) AS play_count
47
+
FROM
48
+
recordings rec
49
+
LEFT JOIN plays p ON p.recording_mbid = rec.mbid
50
+
GROUP BY
51
+
rec.mbid, rec.name, rec.discriminant;
52
+
53
+
CREATE UNIQUE INDEX idx_mv_recording_play_counts_discriminant ON mv_recording_play_counts (recording_mbid);
54
+
55
+
-- Create views for analyzing track/release variants
56
+
CREATE VIEW track_variants AS
57
+
SELECT
58
+
track_name,
59
+
track_discriminant,
60
+
COUNT(*) AS play_count,
61
+
COUNT(DISTINCT did) AS unique_listeners,
62
+
COUNT(DISTINCT recording_mbid) AS unique_recordings
63
+
FROM plays
64
+
WHERE track_name IS NOT NULL
65
+
GROUP BY track_name, track_discriminant
66
+
ORDER BY track_name, play_count DESC;
67
+
68
+
CREATE VIEW release_variants AS
69
+
SELECT
70
+
release_name,
71
+
release_discriminant,
72
+
COUNT(*) AS play_count,
73
+
COUNT(DISTINCT did) AS unique_listeners,
74
+
COUNT(DISTINCT release_mbid) AS unique_releases
75
+
FROM plays
76
+
WHERE release_name IS NOT NULL
77
+
GROUP BY release_name, release_discriminant
78
+
ORDER BY release_name, play_count DESC;
79
+
80
+
-- Create function to extract potential discriminants from existing names
81
+
CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
82
+
DECLARE
83
+
discriminant_patterns TEXT[] := ARRAY[
84
+
'\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\)',
85
+
'\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\]',
86
+
'\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\}'
87
+
];
88
+
pattern TEXT;
89
+
match_result TEXT;
90
+
BEGIN
91
+
-- Try each pattern to find discriminant information
92
+
FOREACH pattern IN ARRAY discriminant_patterns
93
+
LOOP
94
+
SELECT substring(name_text FROM pattern) INTO match_result;
95
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
96
+
RETURN trim(match_result);
97
+
END IF;
98
+
END LOOP;
99
+
100
+
RETURN NULL;
101
+
END;
102
+
$$ LANGUAGE plpgsql IMMUTABLE;
103
+
104
+
-- Create function to get base name without discriminant
105
+
CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
106
+
DECLARE
107
+
cleanup_patterns TEXT[] := ARRAY[
108
+
'\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\)\s*',
109
+
'\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\]\s*',
110
+
'\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\}\s*'
111
+
];
112
+
pattern TEXT;
113
+
result_text TEXT := name_text;
114
+
BEGIN
115
+
-- Remove discriminant patterns to get base name
116
+
FOREACH pattern IN ARRAY cleanup_patterns
117
+
LOOP
118
+
result_text := regexp_replace(result_text, pattern, ' ', 'gi');
119
+
END LOOP;
120
+
121
+
-- Clean up extra whitespace
122
+
result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
123
+
124
+
RETURN result_text;
125
+
END;
126
+
$$ LANGUAGE plpgsql IMMUTABLE;
127
+
128
+
-- Add comments explaining the discriminant system
129
+
COMMENT ON COLUMN plays.track_discriminant IS 'Distinguishing information for track variants (e.g., "Acoustic Version", "Live at Wembley", "Radio Edit")';
130
+
COMMENT ON COLUMN plays.release_discriminant IS 'Distinguishing information for release variants (e.g., "Deluxe Edition", "Remastered", "2023 Remaster")';
131
+
COMMENT ON COLUMN releases.discriminant IS 'Distinguishing information for release variants to enable proper grouping';
132
+
COMMENT ON COLUMN recordings.discriminant IS 'Distinguishing information for recording variants to enable proper grouping';
133
+
134
+
COMMENT ON VIEW track_variants IS 'Shows all variants of tracks with their play counts and unique listeners';
135
+
COMMENT ON VIEW release_variants IS 'Shows all variants of releases with their play counts and unique listeners';
136
+
137
+
COMMENT ON FUNCTION extract_discriminant IS 'Extracts discriminant information from track/release names for migration purposes';
138
+
COMMENT ON FUNCTION get_base_name IS 'Returns the base name without discriminant information for grouping purposes';
+276
migrations/20241220000007_enhanced_discriminant_extraction.sql
+276
migrations/20241220000007_enhanced_discriminant_extraction.sql
···
1
+
-- Enhanced discriminant extraction with comprehensive edition/version patterns
2
+
-- This migration improves the auto-population of discriminants for better metadata handling
3
+
4
+
-- Drop existing functions to replace them with enhanced versions
5
+
DROP FUNCTION IF EXISTS extract_discriminant(TEXT);
6
+
DROP FUNCTION IF EXISTS get_base_name(TEXT);
7
+
8
+
-- Enhanced function to extract discriminants with comprehensive patterns
9
+
CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
10
+
DECLARE
11
+
-- Comprehensive patterns for discriminant extraction
12
+
discriminant_patterns TEXT[] := ARRAY[
13
+
-- Parentheses patterns
14
+
'\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)',
15
+
'\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)',
16
+
'\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)',
17
+
'\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)',
18
+
'\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)',
19
+
20
+
-- Brackets patterns
21
+
'\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]',
22
+
'\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]',
23
+
'\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]',
24
+
'\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]',
25
+
'\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]',
26
+
27
+
-- Braces patterns
28
+
'\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}',
29
+
'\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}',
30
+
'\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}',
31
+
'\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}',
32
+
'\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}',
33
+
34
+
-- Dash/hyphen patterns (common for editions)
35
+
'[-โโ]\s*([^-โโ]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$',
36
+
'[-โโ]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
37
+
38
+
-- Colon patterns (common for subtitles and versions)
39
+
':\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$',
40
+
':\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
41
+
];
42
+
43
+
pattern TEXT;
44
+
match_result TEXT;
45
+
BEGIN
46
+
-- Return early if input is null or empty
47
+
IF name_text IS NULL OR trim(name_text) = '' THEN
48
+
RETURN NULL;
49
+
END IF;
50
+
51
+
-- Try each pattern to find discriminant information
52
+
FOREACH pattern IN ARRAY discriminant_patterns
53
+
LOOP
54
+
SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result;
55
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
56
+
-- Clean up the match result
57
+
match_result := trim(match_result);
58
+
-- Remove leading/trailing punctuation
59
+
match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
60
+
-- Ensure it's not just whitespace or empty after cleanup
61
+
IF length(trim(match_result)) > 0 THEN
62
+
RETURN match_result;
63
+
END IF;
64
+
END IF;
65
+
END LOOP;
66
+
67
+
RETURN NULL;
68
+
END;
69
+
$$ LANGUAGE plpgsql IMMUTABLE;
70
+
71
+
-- Enhanced function to get base name without discriminant
72
+
CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
73
+
DECLARE
74
+
-- Comprehensive cleanup patterns matching the extraction patterns
75
+
cleanup_patterns TEXT[] := ARRAY[
76
+
-- Remove parentheses content
77
+
'\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*',
78
+
'\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*',
79
+
'\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*',
80
+
'\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*',
81
+
'\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*',
82
+
83
+
-- Remove brackets content
84
+
'\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*',
85
+
'\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*',
86
+
'\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*',
87
+
'\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*',
88
+
'\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*',
89
+
90
+
-- Remove braces content
91
+
'\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*',
92
+
'\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*',
93
+
'\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*',
94
+
'\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*',
95
+
'\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*',
96
+
97
+
-- Remove dash/hyphen patterns
98
+
'\s*[-โโ]\s*[^-โโ]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$',
99
+
'\s*[-โโ]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
100
+
101
+
-- Remove colon patterns
102
+
'\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$',
103
+
'\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
104
+
];
105
+
106
+
pattern TEXT;
107
+
result_text TEXT := name_text;
108
+
BEGIN
109
+
-- Return early if input is null or empty
110
+
IF name_text IS NULL OR trim(name_text) = '' THEN
111
+
RETURN name_text;
112
+
END IF;
113
+
114
+
-- Remove discriminant patterns to get base name
115
+
FOREACH pattern IN ARRAY cleanup_patterns
116
+
LOOP
117
+
result_text := regexp_replace(result_text, pattern, ' ', 'gi');
118
+
END LOOP;
119
+
120
+
-- Clean up extra whitespace and normalize
121
+
result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
122
+
123
+
-- Remove trailing punctuation that might be left after removal
124
+
result_text := regexp_replace(result_text, '[,;:\-โโ]\s*$', '', 'g');
125
+
result_text := trim(result_text);
126
+
127
+
-- Ensure we don't return an empty string
128
+
IF length(result_text) = 0 THEN
129
+
RETURN name_text;
130
+
END IF;
131
+
132
+
RETURN result_text;
133
+
END;
134
+
$$ LANGUAGE plpgsql IMMUTABLE;
135
+
136
+
-- Create function to extract discriminant specifically for editions and versions
137
+
CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$
138
+
DECLARE
139
+
-- Focused patterns for edition/version extraction
140
+
edition_patterns TEXT[] := ARRAY[
141
+
-- Edition patterns
142
+
'\(([^)]*edition[^)]*)\)',
143
+
'\[([^]]*edition[^]]*)\]',
144
+
'\{([^}]*edition[^}]*)\}',
145
+
'[-โโ]\s*([^-โโ]*edition[^-โโ]*)$',
146
+
':\s*([^:]*edition[^:]*)$',
147
+
148
+
-- Version patterns
149
+
'\(([^)]*version[^)]*)\)',
150
+
'\[([^]]*version[^]]*)\]',
151
+
'\{([^}]*version[^}]*)\}',
152
+
'[-โโ]\s*([^-โโ]*version[^-โโ]*)$',
153
+
':\s*([^:]*version[^:]*)$',
154
+
155
+
-- Remaster patterns
156
+
'\(([^)]*remaster[^)]*)\)',
157
+
'\[([^]]*remaster[^]]*)\]',
158
+
'\{([^}]*remaster[^}]*)\}',
159
+
'[-โโ]\s*([^-โโ]*remaster[^-โโ]*)$',
160
+
':\s*([^:]*remaster[^:]*)$',
161
+
162
+
-- Year-based patterns
163
+
'\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)',
164
+
'\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]',
165
+
'\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}'
166
+
];
167
+
168
+
pattern TEXT;
169
+
match_result TEXT;
170
+
BEGIN
171
+
-- Return early if input is null or empty
172
+
IF name_text IS NULL OR trim(name_text) = '' THEN
173
+
RETURN NULL;
174
+
END IF;
175
+
176
+
-- Try edition-specific patterns first
177
+
FOREACH pattern IN ARRAY edition_patterns
178
+
LOOP
179
+
SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result;
180
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
181
+
match_result := trim(match_result);
182
+
match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
183
+
IF length(trim(match_result)) > 0 THEN
184
+
RETURN match_result;
185
+
END IF;
186
+
END IF;
187
+
END LOOP;
188
+
189
+
RETURN NULL;
190
+
END;
191
+
$$ LANGUAGE plpgsql IMMUTABLE;
192
+
193
+
-- Update recordings table to populate discriminants from existing names
194
+
UPDATE recordings
195
+
SET discriminant = extract_discriminant(name)
196
+
WHERE discriminant IS NULL
197
+
AND extract_discriminant(name) IS NOT NULL;
198
+
199
+
-- Update releases table to populate discriminants from existing names
200
+
UPDATE releases
201
+
SET discriminant = extract_discriminant(name)
202
+
WHERE discriminant IS NULL
203
+
AND extract_discriminant(name) IS NOT NULL;
204
+
205
+
-- Update plays table to populate discriminants from existing names where not already set
206
+
UPDATE plays
207
+
SET track_discriminant = extract_discriminant(track_name)
208
+
WHERE track_discriminant IS NULL
209
+
AND extract_discriminant(track_name) IS NOT NULL;
210
+
211
+
UPDATE plays
212
+
SET release_discriminant = extract_discriminant(release_name)
213
+
WHERE release_discriminant IS NULL
214
+
AND release_name IS NOT NULL
215
+
AND extract_discriminant(release_name) IS NOT NULL;
216
+
217
+
-- Create indexes for efficient discriminant queries
218
+
CREATE INDEX IF NOT EXISTS idx_recordings_name_discriminant ON recordings (name, discriminant);
219
+
CREATE INDEX IF NOT EXISTS idx_releases_name_discriminant ON releases (name, discriminant);
220
+
221
+
-- Add comments for the new function
222
+
COMMENT ON FUNCTION extract_discriminant IS 'Enhanced discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons';
223
+
COMMENT ON FUNCTION get_base_name IS 'Enhanced base name extraction removing comprehensive discriminant patterns to enable proper grouping';
224
+
COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized function for extracting edition and version discriminants with focused patterns';
225
+
226
+
-- Create a view to show discriminant extraction results for analysis
227
+
CREATE OR REPLACE VIEW discriminant_analysis AS
228
+
SELECT
229
+
'recordings' as table_name,
230
+
name as original_name,
231
+
discriminant,
232
+
get_base_name(name) as base_name,
233
+
extract_discriminant(name) as extracted_discriminant,
234
+
extract_edition_discriminant(name) as edition_discriminant
235
+
FROM recordings
236
+
WHERE name IS NOT NULL
237
+
UNION ALL
238
+
SELECT
239
+
'releases' as table_name,
240
+
name as original_name,
241
+
discriminant,
242
+
get_base_name(name) as base_name,
243
+
extract_discriminant(name) as extracted_discriminant,
244
+
extract_edition_discriminant(name) as edition_discriminant
245
+
FROM releases
246
+
WHERE name IS NOT NULL;
247
+
248
+
COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing discriminant extraction results for quality assessment and debugging';
249
+
250
+
-- Refresh materialized views to include discriminant information
251
+
REFRESH MATERIALIZED VIEW mv_release_play_counts;
252
+
REFRESH MATERIALIZED VIEW mv_recording_play_counts;
253
+
254
+
-- Create summary statistics for discriminant usage
255
+
CREATE OR REPLACE VIEW discriminant_stats AS
256
+
SELECT
257
+
'recordings' as entity_type,
258
+
COUNT(*) as total_count,
259
+
COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant,
260
+
COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant,
261
+
ROUND(
262
+
COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2
263
+
) as discriminant_percentage
264
+
FROM recordings
265
+
UNION ALL
266
+
SELECT
267
+
'releases' as entity_type,
268
+
COUNT(*) as total_count,
269
+
COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant,
270
+
COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant,
271
+
ROUND(
272
+
COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2
273
+
) as discriminant_percentage
274
+
FROM releases;
275
+
276
+
COMMENT ON VIEW discriminant_stats IS 'Statistics showing discriminant usage and extraction potential across entity types';
+252
migrations/20241220000008_fix_discriminant_case_sensitivity.sql
+252
migrations/20241220000008_fix_discriminant_case_sensitivity.sql
···
1
+
-- Fix case sensitivity in discriminant extraction patterns
2
+
-- This migration updates the discriminant extraction functions to properly handle case-insensitive matching
3
+
4
+
-- Drop dependent views first, then functions, then recreate everything
5
+
DROP VIEW IF EXISTS discriminant_analysis CASCADE;
6
+
DROP VIEW IF EXISTS discriminant_stats CASCADE;
7
+
8
+
-- Drop existing functions to replace with case-insensitive versions
9
+
DROP FUNCTION IF EXISTS extract_discriminant(TEXT) CASCADE;
10
+
DROP FUNCTION IF EXISTS get_base_name(TEXT) CASCADE;
11
+
DROP FUNCTION IF EXISTS extract_edition_discriminant(TEXT) CASCADE;
12
+
13
+
-- Enhanced function to extract discriminants with case-insensitive matching
14
+
CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
15
+
DECLARE
16
+
-- Comprehensive patterns for discriminant extraction with case-insensitive flags
17
+
discriminant_patterns TEXT[] := ARRAY[
18
+
-- Parentheses patterns
19
+
'(?i)\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)',
20
+
'(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)',
21
+
'(?i)\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)',
22
+
'(?i)\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)',
23
+
'(?i)\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)',
24
+
25
+
-- Brackets patterns
26
+
'(?i)\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]',
27
+
'(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]',
28
+
'(?i)\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]',
29
+
'(?i)\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]',
30
+
'(?i)\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]',
31
+
32
+
-- Braces patterns
33
+
'(?i)\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}',
34
+
'(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}',
35
+
'(?i)\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}',
36
+
'(?i)\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}',
37
+
'(?i)\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}',
38
+
39
+
-- Dash/hyphen patterns (common for editions)
40
+
'(?i)[-โโ]\s*([^-โโ]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$',
41
+
'(?i)[-โโ]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
42
+
43
+
-- Colon patterns (common for subtitles and versions)
44
+
'(?i):\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$',
45
+
'(?i):\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
46
+
];
47
+
48
+
pattern TEXT;
49
+
match_result TEXT;
50
+
BEGIN
51
+
-- Return early if input is null or empty
52
+
IF name_text IS NULL OR trim(name_text) = '' THEN
53
+
RETURN NULL;
54
+
END IF;
55
+
56
+
-- Try each pattern to find discriminant information
57
+
FOREACH pattern IN ARRAY discriminant_patterns
58
+
LOOP
59
+
SELECT substring(name_text FROM pattern) INTO match_result;
60
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
61
+
-- Clean up the match result
62
+
match_result := trim(match_result);
63
+
-- Remove leading/trailing punctuation
64
+
match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
65
+
-- Ensure it's not just whitespace or empty after cleanup
66
+
IF length(trim(match_result)) > 0 THEN
67
+
RETURN match_result;
68
+
END IF;
69
+
END IF;
70
+
END LOOP;
71
+
72
+
RETURN NULL;
73
+
END;
74
+
$$ LANGUAGE plpgsql IMMUTABLE;
75
+
76
+
-- Enhanced function to get base name without discriminant with case-insensitive matching
77
+
CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
78
+
DECLARE
79
+
-- Comprehensive cleanup patterns matching the extraction patterns
80
+
cleanup_patterns TEXT[] := ARRAY[
81
+
-- Remove parentheses content
82
+
'(?i)\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*',
83
+
'(?i)\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*',
84
+
'(?i)\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*',
85
+
'(?i)\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*',
86
+
'(?i)\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*',
87
+
88
+
-- Remove brackets content
89
+
'(?i)\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*',
90
+
'(?i)\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*',
91
+
'(?i)\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*',
92
+
'(?i)\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*',
93
+
'(?i)\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*',
94
+
95
+
-- Remove braces content
96
+
'(?i)\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*',
97
+
'(?i)\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*',
98
+
'(?i)\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*',
99
+
'(?i)\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*',
100
+
'(?i)\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*',
101
+
102
+
-- Remove dash/hyphen patterns
103
+
'(?i)\s*[-โโ]\s*[^-โโ]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$',
104
+
'(?i)\s*[-โโ]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
105
+
106
+
-- Remove colon patterns
107
+
'(?i)\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$',
108
+
'(?i)\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
109
+
];
110
+
111
+
pattern TEXT;
112
+
result_text TEXT := name_text;
113
+
BEGIN
114
+
-- Return early if input is null or empty
115
+
IF name_text IS NULL OR trim(name_text) = '' THEN
116
+
RETURN name_text;
117
+
END IF;
118
+
119
+
-- Remove discriminant patterns to get base name
120
+
FOREACH pattern IN ARRAY cleanup_patterns
121
+
LOOP
122
+
result_text := regexp_replace(result_text, pattern, ' ', 'g');
123
+
END LOOP;
124
+
125
+
-- Clean up extra whitespace and normalize
126
+
result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
127
+
128
+
-- Remove trailing punctuation that might be left after removal
129
+
result_text := regexp_replace(result_text, '[,;:\-โโ]\s*$', '', 'g');
130
+
result_text := trim(result_text);
131
+
132
+
-- Ensure we don't return an empty string
133
+
IF length(result_text) = 0 THEN
134
+
RETURN name_text;
135
+
END IF;
136
+
137
+
RETURN result_text;
138
+
END;
139
+
$$ LANGUAGE plpgsql IMMUTABLE;
140
+
141
+
-- Enhanced function to extract discriminant specifically for editions and versions with case-insensitive matching
142
+
CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$
143
+
DECLARE
144
+
-- Focused patterns for edition/version extraction with case-insensitive flags
145
+
edition_patterns TEXT[] := ARRAY[
146
+
-- Edition patterns
147
+
'(?i)\(([^)]*edition[^)]*)\)',
148
+
'(?i)\[([^]]*edition[^]]*)\]',
149
+
'(?i)\{([^}]*edition[^}]*)\}',
150
+
'(?i)[-โโ]\s*([^-โโ]*edition[^-โโ]*)$',
151
+
'(?i):\s*([^:]*edition[^:]*)$',
152
+
153
+
-- Version patterns
154
+
'(?i)\(([^)]*version[^)]*)\)',
155
+
'(?i)\[([^]]*version[^]]*)\]',
156
+
'(?i)\{([^}]*version[^}]*)\}',
157
+
'(?i)[-โโ]\s*([^-โโ]*version[^-โโ]*)$',
158
+
'(?i):\s*([^:]*version[^:]*)$',
159
+
160
+
-- Remaster patterns
161
+
'(?i)\(([^)]*remaster[^)]*)\)',
162
+
'(?i)\[([^]]*remaster[^]]*)\]',
163
+
'(?i)\{([^}]*remaster[^}]*)\}',
164
+
'(?i)[-โโ]\s*([^-โโ]*remaster[^-โโ]*)$',
165
+
'(?i):\s*([^:]*remaster[^:]*)$',
166
+
167
+
-- Year-based patterns
168
+
'(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)',
169
+
'(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]',
170
+
'(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}'
171
+
];
172
+
173
+
pattern TEXT;
174
+
match_result TEXT;
175
+
BEGIN
176
+
-- Return early if input is null or empty
177
+
IF name_text IS NULL OR trim(name_text) = '' THEN
178
+
RETURN NULL;
179
+
END IF;
180
+
181
+
-- Try edition-specific patterns first
182
+
FOREACH pattern IN ARRAY edition_patterns
183
+
LOOP
184
+
SELECT substring(name_text FROM pattern) INTO match_result;
185
+
IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
186
+
match_result := trim(match_result);
187
+
match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
188
+
IF length(trim(match_result)) > 0 THEN
189
+
RETURN match_result;
190
+
END IF;
191
+
END IF;
192
+
END LOOP;
193
+
194
+
RETURN NULL;
195
+
END;
196
+
$$ LANGUAGE plpgsql IMMUTABLE;
197
+
198
+
-- Update existing records with newly extracted discriminants (case-insensitive)
199
+
UPDATE recordings
200
+
SET discriminant = extract_discriminant(name)
201
+
WHERE discriminant IS NULL
202
+
AND extract_discriminant(name) IS NOT NULL;
203
+
204
+
UPDATE releases
205
+
SET discriminant = extract_discriminant(name)
206
+
WHERE discriminant IS NULL
207
+
AND extract_discriminant(name) IS NOT NULL;
208
+
209
+
UPDATE plays
210
+
SET track_discriminant = extract_discriminant(track_name)
211
+
WHERE track_discriminant IS NULL
212
+
AND extract_discriminant(track_name) IS NOT NULL;
213
+
214
+
UPDATE plays
215
+
SET release_discriminant = extract_discriminant(release_name)
216
+
WHERE release_discriminant IS NULL
217
+
AND release_name IS NOT NULL
218
+
AND extract_discriminant(release_name) IS NOT NULL;
219
+
220
+
-- Update comments for the enhanced functions
221
+
COMMENT ON FUNCTION extract_discriminant IS 'Enhanced case-insensitive discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons';
222
+
COMMENT ON FUNCTION get_base_name IS 'Enhanced case-insensitive base name extraction removing comprehensive discriminant patterns to enable proper grouping';
223
+
COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized case-insensitive function for extracting edition and version discriminants with focused patterns';
224
+
225
+
-- Refresh materialized views to reflect the case-insensitive improvements
226
+
REFRESH MATERIALIZED VIEW mv_release_play_counts;
227
+
REFRESH MATERIALIZED VIEW mv_recording_play_counts;
228
+
229
+
-- Update discriminant analysis view to include case-insensitive results
230
+
DROP VIEW IF EXISTS discriminant_analysis;
231
+
CREATE OR REPLACE VIEW discriminant_analysis AS
232
+
SELECT
233
+
'recordings' as table_name,
234
+
name as original_name,
235
+
discriminant,
236
+
get_base_name(name) as base_name,
237
+
extract_discriminant(name) as extracted_discriminant,
238
+
extract_edition_discriminant(name) as edition_discriminant
239
+
FROM recordings
240
+
WHERE name IS NOT NULL
241
+
UNION ALL
242
+
SELECT
243
+
'releases' as table_name,
244
+
name as original_name,
245
+
discriminant,
246
+
get_base_name(name) as base_name,
247
+
extract_discriminant(name) as extracted_discriminant,
248
+
extract_edition_discriminant(name) as edition_discriminant
249
+
FROM releases
250
+
WHERE name IS NOT NULL;
251
+
252
+
COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing case-insensitive discriminant extraction results for quality assessment and debugging';
+12
-3
package.json
+12
-3
package.json
···
7
7
"dev": "turbo dev",
8
8
"build": "pnpm turbo run build --filter='./packages/*' --filter='./apps/*'",
9
9
"build:rust": "turbo run build:rust",
10
-
"typecheck": "pnpm -r exec tsc --noEmit",
10
+
"typecheck": "pnpm -r --filter='!./vendor/*' exec tsc --noEmit",
11
11
"test": "turbo run test test:rust",
12
-
"rust:fmt": "cd services && cargo fmt",
13
-
"rust:clippy": "cd services && cargo clippy",
12
+
"rust:fmt": "pnpm rust:fmt:services && pnpm rust:fmt:apps",
13
+
"rust:clippy": "pnpm rust:clippy:services && pnpm rust:clippy:apps",
14
+
"rust:fmt:services": "cd services && cargo fmt",
15
+
"rust:clippy:services": "cd services && cargo clippy -- -D warnings",
16
+
"rust:fmt:apps": "for dir in apps/*/; do if [ -f \"$dir/Cargo.toml\" ]; then echo \"Formatting $dir\" && cd \"$dir\" && cargo fmt && cd ../..; fi; done",
17
+
"rust:clippy:apps": "for dir in apps/*/; do if [ -f \"$dir/Cargo.toml\" ]; then echo \"Linting $dir\" && cd \"$dir\" && cargo clippy -- -D warnings && cd ../..; fi; done",
14
18
"fix": "biome lint --apply . && biome format --write . && biome check . --apply",
19
+
"hooks:install": "./scripts/install-git-hooks.sh",
20
+
"hooks:install-precommit": "pre-commit install",
21
+
"postinstall": "pnpm lex:gen-server",
15
22
"nuke": "rimraf node_modules */*/node_modules",
16
23
"lex:gen-server": "turbo lex:gen-server",
17
24
"format": "prettier --write .",
···
19
26
"lex:watch": "cd tools/lexicon-cli && node dist/index.js watch",
20
27
"lex:validate": "cd tools/lexicon-cli && node dist/index.js validate",
21
28
"lex:diff": "cd tools/lexicon-cli && node dist/index.js diff",
29
+
"lex:build-amethyst": "pnpm lex:gen-server && pnpm turbo build --filter=@teal/amethyst",
30
+
"lex:dev": "pnpm lex:gen-server && pnpm turbo dev --filter=@teal/amethyst",
22
31
"db:migrate": "cd services && sqlx migrate run",
23
32
"db:migrate:revert": "cd services && sqlx migrate revert",
24
33
"db:create": "cd services && sqlx database create",
+25
packages/lexicons/lex-gen.sh
+25
packages/lexicons/lex-gen.sh
···
1
+
#!/bin/bash
2
+
set -e
3
+
4
+
# Navigate to the lexicons directory and find all .json files
5
+
cd ../../lexicons
6
+
json_files=$(find . -name "*.json" -type f)
7
+
8
+
# Go back to the lexicons package directory
9
+
cd ../packages/lexicons
10
+
11
+
# Check if we found any lexicon files
12
+
if [ -z "$json_files" ]; then
13
+
echo "No lexicon files found in ../../lexicons/"
14
+
exit 1
15
+
fi
16
+
17
+
# Convert the file list to absolute paths
18
+
lexicon_paths=""
19
+
for file in $json_files; do
20
+
lexicon_paths="$lexicon_paths ../../lexicons/$file"
21
+
done
22
+
23
+
# Generate lexicons
24
+
echo "Generating lexicons from: $lexicon_paths"
25
+
lex gen-server ./src $lexicon_paths --yes
+14
packages/lexicons/package.json
+14
packages/lexicons/package.json
···
1
+
{
2
+
"name": "@teal/lexicons",
3
+
"type": "module",
4
+
"main": "./index.ts",
5
+
"dependencies": {
6
+
"@atproto/lex-cli": "^0.5.4",
7
+
"@atproto/lexicon": "^0.4.2",
8
+
"@atproto/xrpc-server": "^0.7.4",
9
+
"@teal/tsconfig": "workspace:*"
10
+
},
11
+
"scripts": {
12
+
"lex:gen-server": "bash ./lex-gen.sh"
13
+
}
14
+
}
-4
pnpm-lock.yaml
-4
pnpm-lock.yaml
+100
scripts/install-git-hooks.sh
+100
scripts/install-git-hooks.sh
···
1
+
#!/bin/bash
2
+
3
+
# Install git hooks for the Teal project
4
+
# This script sets up pre-commit hooks for code formatting and linting
5
+
6
+
set -e
7
+
8
+
# Colors for output
9
+
RED='\033[0;31m'
10
+
GREEN='\033[0;32m'
11
+
YELLOW='\033[1;33m'
12
+
BLUE='\033[0;34m'
13
+
NC='\033[0m' # No Color
14
+
15
+
print_status() {
16
+
echo -e "${BLUE}[INFO]${NC} $1"
17
+
}
18
+
19
+
print_success() {
20
+
echo -e "${GREEN}[SUCCESS]${NC} $1"
21
+
}
22
+
23
+
print_error() {
24
+
echo -e "${RED}[ERROR]${NC} $1"
25
+
}
26
+
27
+
print_warning() {
28
+
echo -e "${YELLOW}[WARNING]${NC} $1"
29
+
}
30
+
31
+
# Check if we're in a git repository
32
+
if [ ! -d ".git" ]; then
33
+
print_error "This script must be run from the root of a git repository"
34
+
exit 1
35
+
fi
36
+
37
+
print_status "Installing git hooks for Teal project..."
38
+
39
+
# Create hooks directory if it doesn't exist
40
+
mkdir -p .git/hooks
41
+
42
+
# Install pre-commit hook
43
+
if [ -f "scripts/pre-commit-hook.sh" ]; then
44
+
print_status "Installing pre-commit hook..."
45
+
cp scripts/pre-commit-hook.sh .git/hooks/pre-commit
46
+
chmod +x .git/hooks/pre-commit
47
+
print_success "Pre-commit hook installed"
48
+
else
49
+
print_error "Pre-commit hook script not found at scripts/pre-commit-hook.sh"
50
+
exit 1
51
+
fi
52
+
53
+
# Optional: Install other hooks
54
+
# You can add more hooks here if needed
55
+
56
+
print_status "Testing hook installation..."
57
+
58
+
# Test if the hook is executable
59
+
if [ -x ".git/hooks/pre-commit" ]; then
60
+
print_success "Pre-commit hook is executable"
61
+
else
62
+
print_error "Pre-commit hook is not executable"
63
+
exit 1
64
+
fi
65
+
66
+
# Check if required tools are available
67
+
print_status "Checking required tools..."
68
+
69
+
MISSING_TOOLS=""
70
+
71
+
if ! command -v pnpm >/dev/null 2>&1; then
72
+
MISSING_TOOLS="$MISSING_TOOLS pnpm"
73
+
fi
74
+
75
+
if ! command -v node >/dev/null 2>&1; then
76
+
MISSING_TOOLS="$MISSING_TOOLS node"
77
+
fi
78
+
79
+
if ! command -v cargo >/dev/null 2>&1; then
80
+
MISSING_TOOLS="$MISSING_TOOLS cargo"
81
+
fi
82
+
83
+
if [ -n "$MISSING_TOOLS" ]; then
84
+
print_warning "Some tools are missing:$MISSING_TOOLS"
85
+
print_warning "The git hooks may not work properly without these tools"
86
+
else
87
+
print_success "All required tools are available"
88
+
fi
89
+
90
+
print_success "Git hooks installation complete! ๐"
91
+
print_status "The following hooks have been installed:"
92
+
echo " - pre-commit: Runs formatting and linting checks before commits"
93
+
94
+
print_status "To test the pre-commit hook, try making a commit with staged files"
95
+
print_status "To temporarily skip hooks, use: git commit --no-verify"
96
+
97
+
# Optional: Show hook status
98
+
echo ""
99
+
print_status "Installed hooks:"
100
+
ls -la .git/hooks/ | grep -v sample | grep -v "^d" | sed 's/^/ /'
+213
scripts/pre-commit-hook.sh
+213
scripts/pre-commit-hook.sh
···
1
+
#!/bin/bash
2
+
3
+
# Pre-commit hook for Teal project
4
+
# This script runs code formatting and linting checks before allowing commits
5
+
6
+
set -e
7
+
8
+
echo "๐ Running pre-commit checks..."
9
+
10
+
# Colors for output
11
+
RED='\033[0;31m'
12
+
GREEN='\033[0;32m'
13
+
YELLOW='\033[1;33m'
14
+
BLUE='\033[0;34m'
15
+
NC='\033[0m' # No Color
16
+
17
+
# Function to print colored output
18
+
print_status() {
19
+
echo -e "${BLUE}[INFO]${NC} $1"
20
+
}
21
+
22
+
print_success() {
23
+
echo -e "${GREEN}[SUCCESS]${NC} $1"
24
+
}
25
+
26
+
print_warning() {
27
+
echo -e "${YELLOW}[WARNING]${NC} $1"
28
+
}
29
+
30
+
print_error() {
31
+
echo -e "${RED}[ERROR]${NC} $1"
32
+
}
33
+
34
+
# Get list of staged files
35
+
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)
36
+
37
+
if [ -z "$STAGED_FILES" ]; then
38
+
print_warning "No staged files found"
39
+
exit 0
40
+
fi
41
+
42
+
# Check if we have TypeScript/JavaScript files
43
+
TS_JS_FILES=$(echo "$STAGED_FILES" | grep -E '\.(ts|tsx|js|jsx)$' || true)
44
+
# Check if we have Rust files
45
+
RUST_FILES=$(echo "$STAGED_FILES" | grep -E '\.rs$' || true)
46
+
# Check if we have lexicon files
47
+
LEXICON_FILES=$(echo "$STAGED_FILES" | grep -E 'lexicons/.*\.json$' || true)
48
+
49
+
print_status "Staged files to check:"
50
+
echo "$STAGED_FILES" | sed 's/^/ - /'
51
+
52
+
# 1. TypeScript/JavaScript checks
53
+
if [ -n "$TS_JS_FILES" ]; then
54
+
print_status "Running TypeScript/JavaScript checks..."
55
+
56
+
# Check if biome is available and run it
57
+
if command -v pnpm >/dev/null 2>&1; then
58
+
print_status "Running Biome formatting and linting..."
59
+
if ! pnpm biome check . --apply --no-errors-on-unmatched 2>/dev/null; then
60
+
print_error "Biome check failed. Please fix the issues and try again."
61
+
exit 1
62
+
fi
63
+
64
+
print_status "Running Prettier formatting..."
65
+
if ! pnpm prettier --write $TS_JS_FILES 2>/dev/null; then
66
+
print_error "Prettier formatting failed. Please fix the issues and try again."
67
+
exit 1
68
+
fi
69
+
70
+
# TypeScript checking temporarily disabled due to vendor compilation issues
71
+
# Re-enable once vendor code is fixed
72
+
else
73
+
print_warning "pnpm not found, skipping JS/TS checks"
74
+
fi
75
+
fi
76
+
77
+
# 2. Rust checks
78
+
if [ -n "$RUST_FILES" ]; then
79
+
print_status "Running Rust checks..."
80
+
81
+
if command -v cargo >/dev/null 2>&1; then
82
+
RUST_ERRORS=0
83
+
84
+
# Check services workspace
85
+
if [ -f "services/Cargo.toml" ]; then
86
+
print_status "Running cargo fmt on services workspace..."
87
+
if ! (cd services && cargo fmt --check) 2>/dev/null; then
88
+
print_status "Auto-formatting Rust code in services..."
89
+
(cd services && cargo fmt) 2>/dev/null || true
90
+
fi
91
+
92
+
print_status "Running cargo clippy on services workspace..."
93
+
if (cd services && cargo check); then
94
+
if ! (cd services && cargo clippy -- -D warnings); then
95
+
print_warning "Cargo clippy found issues in services workspace. Please fix the warnings."
96
+
print_warning "Run 'pnpm rust:clippy:services' to see detailed errors."
97
+
# Don't fail the commit for clippy warnings, just warn
98
+
fi
99
+
else
100
+
print_warning "Services workspace has compilation errors. Skipping clippy."
101
+
print_warning "Run 'pnpm rust:clippy:services' to see detailed errors."
102
+
fi
103
+
fi
104
+
105
+
# Check individual Rust projects outside services
106
+
CHECKED_DIRS=""
107
+
for rust_file in $RUST_FILES; do
108
+
rust_dir=$(dirname "$rust_file")
109
+
# Find the nearest Cargo.toml going up the directory tree
110
+
check_dir="$rust_dir"
111
+
while [ "$check_dir" != "." ] && [ "$check_dir" != "/" ]; do
112
+
if [ -f "$check_dir/Cargo.toml" ] && [ "$check_dir" != "services" ]; then
113
+
# Skip if we already checked this directory
114
+
if echo "$CHECKED_DIRS" | grep -q "$check_dir"; then
115
+
break
116
+
fi
117
+
CHECKED_DIRS="$CHECKED_DIRS $check_dir"
118
+
119
+
# Found a Cargo.toml outside services workspace
120
+
print_status "Running cargo fmt on $check_dir..."
121
+
if ! (cd "$check_dir" && cargo fmt --check) 2>/dev/null; then
122
+
print_status "Auto-formatting Rust code in $check_dir..."
123
+
(cd "$check_dir" && cargo fmt) 2>/dev/null || true
124
+
fi
125
+
126
+
print_status "Running cargo clippy on $check_dir..."
127
+
if (cd "$check_dir" && cargo check); then
128
+
if ! (cd "$check_dir" && cargo clippy -- -D warnings); then
129
+
print_error "Cargo clippy found issues in $check_dir. Please fix the warnings and try again."
130
+
RUST_ERRORS=1
131
+
fi
132
+
else
133
+
print_warning "Project $check_dir has compilation errors. Skipping clippy."
134
+
print_warning "Run 'cd $check_dir && cargo check' to see detailed errors."
135
+
fi
136
+
break
137
+
fi
138
+
check_dir=$(dirname "$check_dir")
139
+
done
140
+
done
141
+
142
+
if [ $RUST_ERRORS -eq 1 ]; then
143
+
exit 1
144
+
fi
145
+
else
146
+
print_warning "Cargo not found, skipping Rust checks"
147
+
fi
148
+
fi
149
+
150
+
# 3. Lexicon checks
151
+
if [ -n "$LEXICON_FILES" ]; then
152
+
print_status "Lexicon files changed, validating and regenerating..."
153
+
154
+
if command -v pnpm >/dev/null 2>&1; then
155
+
print_status "Validating lexicons..."
156
+
if ! pnpm lex:validate 2>/dev/null; then
157
+
print_error "Lexicon validation failed. Please fix the lexicon files and try again."
158
+
exit 1
159
+
fi
160
+
161
+
print_status "Regenerating lexicons..."
162
+
if ! pnpm lex:gen-server 2>/dev/null; then
163
+
print_error "Lexicon generation failed. Please check the lexicon files and try again."
164
+
exit 1
165
+
fi
166
+
167
+
# Note: Generated lexicon files are ignored by .gitignore and not added to staging
168
+
print_status "Generated lexicon files are ignored by .gitignore (as intended)"
169
+
else
170
+
print_warning "pnpm not found, skipping lexicon checks"
171
+
fi
172
+
fi
173
+
174
+
# 4. Re-add files that might have been formatted
175
+
FORMATTED_FILES=""
176
+
for file in $STAGED_FILES; do
177
+
if [ -f "$file" ]; then
178
+
# Check if file was modified by formatters
179
+
if [ -n "$(git diff "$file")" ]; then
180
+
FORMATTED_FILES="$FORMATTED_FILES $file"
181
+
git add "$file"
182
+
fi
183
+
fi
184
+
done
185
+
186
+
if [ -n "$FORMATTED_FILES" ]; then
187
+
print_success "Auto-formatted files have been re-staged:"
188
+
echo "$FORMATTED_FILES" | tr ' ' '\n' | sed 's/^/ - /'
189
+
fi
190
+
191
+
# 5. Final validation - ensure no syntax errors in staged files
192
+
print_status "Running final validation..."
193
+
194
+
# Check for common issues
195
+
for file in $TS_JS_FILES; do
196
+
if [ -f "$file" ]; then
197
+
# Check for console.log statements (optional - remove if you want to allow them)
198
+
if grep -n "console\.log" "$file" >/dev/null 2>&1; then
199
+
print_warning "Found console.log statements in $file! yooo!!!"
200
+
# Uncomment the next two lines if you want to block commits with console.log
201
+
# print_error "Please remove console.log statements before committing"
202
+
# exit 1
203
+
fi
204
+
205
+
# Check for TODO/FIXME comments in committed code (optional)
206
+
if grep -n -i "TODO\|FIXME" "$file" >/dev/null 2>&1; then
207
+
print_warning "Found TODO/FIXME comments in $file"
208
+
fi
209
+
fi
210
+
done
211
+
212
+
print_success "All pre-commit checks passed! ๐"
213
+
exit 0
+66
scripts/setup-lexicons.sh
+66
scripts/setup-lexicons.sh
···
1
+
#!/bin/bash
2
+
# scripts/setup-lexicons.sh
3
+
# Setup script for ATProto lexicons submodule and symbolic links
4
+
5
+
set -e
6
+
7
+
echo "Setting up lexicons..."
8
+
9
+
# Check if we're in the right directory
10
+
if [ ! -f "package.json" ] || [ ! -d "lexicons" ]; then
11
+
echo "Error: This script must be run from the project root directory"
12
+
exit 1
13
+
fi
14
+
15
+
# Initialize submodules
16
+
echo "Initializing submodules..."
17
+
git submodule update --init --recursive
18
+
19
+
# Check if vendor/atproto exists
20
+
if [ ! -d "vendor/atproto" ]; then
21
+
echo "Error: vendor/atproto submodule not found"
22
+
exit 1
23
+
fi
24
+
25
+
# Create symbolic links if they don't exist
26
+
echo "Creating symbolic links..."
27
+
cd lexicons
28
+
29
+
if [ ! -L app ]; then
30
+
ln -s ../vendor/atproto/lexicons/app app
31
+
echo "Created symlink: lexicons/app"
32
+
else
33
+
echo "Symlink already exists: lexicons/app"
34
+
fi
35
+
36
+
if [ ! -L chat ]; then
37
+
ln -s ../vendor/atproto/lexicons/chat chat
38
+
echo "Created symlink: lexicons/chat"
39
+
else
40
+
echo "Symlink already exists: lexicons/chat"
41
+
fi
42
+
43
+
if [ ! -L com ]; then
44
+
ln -s ../vendor/atproto/lexicons/com com
45
+
echo "Created symlink: lexicons/com"
46
+
else
47
+
echo "Symlink already exists: lexicons/com"
48
+
fi
49
+
50
+
if [ ! -L tools ]; then
51
+
ln -s ../vendor/atproto/lexicons/tools tools
52
+
echo "Created symlink: lexicons/tools"
53
+
else
54
+
echo "Symlink already exists: lexicons/tools"
55
+
fi
56
+
57
+
cd ..
58
+
59
+
echo "Lexicons setup complete!"
60
+
echo ""
61
+
echo "You should now have access to:"
62
+
echo " - lexicons/app -> ATProto app lexicons"
63
+
echo " - lexicons/chat -> ATProto chat lexicons"
64
+
echo " - lexicons/com -> ATProto protocol lexicons"
65
+
echo " - lexicons/tools -> ATProto tools lexicons"
66
+
echo " - lexicons/fm.teal.alpha -> Custom Teal lexicons"
+69
scripts/setup-sqlx-offline.sh
+69
scripts/setup-sqlx-offline.sh
···
1
+
#!/bin/bash
2
+
3
+
# Script to copy .sqlx files to all Rust projects that use SQLx
4
+
# This is needed for offline SQLx builds (SQLX_OFFLINE=true)
5
+
6
+
set -e
7
+
8
+
# Get the script directory (should be in teal/scripts/)
9
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
+
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
11
+
12
+
# Source .sqlx directory
13
+
SQLX_SOURCE="$PROJECT_ROOT/.sqlx"
14
+
15
+
# List of projects that use SQLx (relative to project root)
16
+
SQLX_PROJECTS=(
17
+
"apps/aqua"
18
+
"services/cadet"
19
+
"services/satellite"
20
+
)
21
+
22
+
echo "๐ง Setting up SQLx offline files..."
23
+
24
+
# Check if source .sqlx directory exists
25
+
if [ ! -d "$SQLX_SOURCE" ]; then
26
+
echo "โ Source .sqlx directory not found at: $SQLX_SOURCE"
27
+
echo " Make sure you've run 'cargo sqlx prepare' from the services directory first."
28
+
exit 1
29
+
fi
30
+
31
+
# Copy .sqlx files to each project that needs them
32
+
for project in "${SQLX_PROJECTS[@]}"; do
33
+
project_path="$PROJECT_ROOT/$project"
34
+
target_sqlx="$project_path/.sqlx"
35
+
36
+
if [ ! -d "$project_path" ]; then
37
+
echo "โ ๏ธ Project directory not found: $project_path (skipping)"
38
+
continue
39
+
fi
40
+
41
+
# Check if project actually uses SQLx
42
+
if [ ! -f "$project_path/Cargo.toml" ]; then
43
+
echo "โ ๏ธ No Cargo.toml found in $project (skipping)"
44
+
continue
45
+
fi
46
+
47
+
if ! grep -q "sqlx" "$project_path/Cargo.toml"; then
48
+
echo "โ ๏ธ Project $project doesn't appear to use SQLx (skipping)"
49
+
continue
50
+
fi
51
+
52
+
echo "๐ฆ Copying .sqlx files to $project..."
53
+
54
+
# Remove existing .sqlx directory if it exists
55
+
if [ -d "$target_sqlx" ]; then
56
+
rm -rf "$target_sqlx"
57
+
fi
58
+
59
+
# Copy the .sqlx directory
60
+
cp -r "$SQLX_SOURCE" "$target_sqlx"
61
+
62
+
echo " โ
Copied $(ls -1 "$target_sqlx" | wc -l) query files"
63
+
done
64
+
65
+
echo "โ
SQLx offline setup complete!"
66
+
echo ""
67
+
echo "Note: If you add new SQL queries or modify existing ones, you'll need to:"
68
+
echo "1. Run 'cargo sqlx prepare' from the services directory"
69
+
echo "2. Run this script again to update all project copies"
+62
scripts/update-lexicons.sh
+62
scripts/update-lexicons.sh
···
1
+
#!/bin/bash
2
+
# scripts/update-lexicons.sh
3
+
# Update script for ATProto lexicons from upstream
4
+
5
+
set -e
6
+
7
+
echo "Updating ATProto lexicons..."
8
+
9
+
# Check if we're in the right directory
10
+
if [ ! -f "package.json" ] || [ ! -d "vendor/atproto" ]; then
11
+
echo "Error: This script must be run from the project root directory"
12
+
echo "Make sure vendor/atproto submodule exists"
13
+
exit 1
14
+
fi
15
+
16
+
# Save current directory
17
+
PROJECT_ROOT=$(pwd)
18
+
19
+
# Update the submodule
20
+
echo "Fetching latest changes from atproto repository..."
21
+
cd vendor/atproto
22
+
23
+
# Fetch latest changes
24
+
git fetch origin
25
+
26
+
# Get current commit
27
+
CURRENT_COMMIT=$(git rev-parse HEAD)
28
+
CURRENT_SHORT=$(git rev-parse --short HEAD)
29
+
30
+
# Get latest commit on main
31
+
LATEST_COMMIT=$(git rev-parse origin/main)
32
+
LATEST_SHORT=$(git rev-parse --short origin/main)
33
+
34
+
if [ "$CURRENT_COMMIT" = "$LATEST_COMMIT" ]; then
35
+
echo "Already up to date (${CURRENT_SHORT})"
36
+
cd "$PROJECT_ROOT"
37
+
exit 0
38
+
fi
39
+
40
+
echo "Updating from ${CURRENT_SHORT} to ${LATEST_SHORT}..."
41
+
42
+
# Pull latest changes
43
+
git pull origin main
44
+
45
+
# Go back to project root
46
+
cd "$PROJECT_ROOT"
47
+
48
+
# Stage the submodule update
49
+
git add vendor/atproto
50
+
51
+
# Show what changed
52
+
echo ""
53
+
echo "Submodule updated successfully!"
54
+
echo "Changes:"
55
+
git diff --cached --submodule=log vendor/atproto
56
+
57
+
echo ""
58
+
echo "To complete the update, commit the changes:"
59
+
echo " git commit -m \"Update atproto lexicons to ${LATEST_SHORT}\""
60
+
echo ""
61
+
echo "Or to see what lexicon files changed:"
62
+
echo " cd vendor/atproto && git log --oneline ${CURRENT_SHORT}..${LATEST_SHORT} -- lexicons/"
-6
services/.sqlx/.sqlxrc
-6
services/.sqlx/.sqlxrc
+96
-366
services/Cargo.lock
+96
-366
services/Cargo.lock
···
60
60
]
61
61
62
62
[[package]]
63
-
name = "anstream"
64
-
version = "0.6.19"
65
-
source = "registry+https://github.com/rust-lang/crates.io-index"
66
-
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
67
-
dependencies = [
68
-
"anstyle",
69
-
"anstyle-parse",
70
-
"anstyle-query",
71
-
"anstyle-wincon",
72
-
"colorchoice",
73
-
"is_terminal_polyfill",
74
-
"utf8parse",
75
-
]
76
-
77
-
[[package]]
78
-
name = "anstyle"
79
-
version = "1.0.11"
80
-
source = "registry+https://github.com/rust-lang/crates.io-index"
81
-
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
82
-
83
-
[[package]]
84
-
name = "anstyle-parse"
85
-
version = "0.2.7"
86
-
source = "registry+https://github.com/rust-lang/crates.io-index"
87
-
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
88
-
dependencies = [
89
-
"utf8parse",
90
-
]
91
-
92
-
[[package]]
93
-
name = "anstyle-query"
94
-
version = "1.1.3"
95
-
source = "registry+https://github.com/rust-lang/crates.io-index"
96
-
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
97
-
dependencies = [
98
-
"windows-sys 0.59.0",
99
-
]
100
-
101
-
[[package]]
102
-
name = "anstyle-wincon"
103
-
version = "3.0.9"
104
-
source = "registry+https://github.com/rust-lang/crates.io-index"
105
-
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
106
-
dependencies = [
107
-
"anstyle",
108
-
"once_cell_polyfill",
109
-
"windows-sys 0.59.0",
110
-
]
111
-
112
-
[[package]]
113
63
name = "anyhow"
114
64
version = "1.0.98"
115
65
source = "registry+https://github.com/rust-lang/crates.io-index"
116
66
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
117
67
118
68
[[package]]
119
-
name = "aqua"
120
-
version = "0.1.0"
121
-
dependencies = [
122
-
"anyhow",
123
-
"async-trait",
124
-
"atrium-api",
125
-
"axum",
126
-
"base64",
127
-
"chrono",
128
-
"clap",
129
-
"dotenvy",
130
-
"iroh-car",
131
-
"redis",
132
-
"reqwest",
133
-
"serde",
134
-
"serde_json",
135
-
"sqlx",
136
-
"sys-info",
137
-
"time",
138
-
"tokio",
139
-
"tower-http",
140
-
"tracing",
141
-
"tracing-subscriber",
142
-
"types",
143
-
"url",
144
-
"uuid",
145
-
"vergen",
146
-
"vergen-gitcl",
147
-
]
148
-
149
-
[[package]]
150
69
name = "arc-swap"
151
70
version = "1.7.1"
152
71
source = "registry+https://github.com/rust-lang/crates.io-index"
···
187
106
]
188
107
189
108
[[package]]
109
+
name = "atmst"
110
+
version = "0.0.1"
111
+
source = "registry+https://github.com/rust-lang/crates.io-index"
112
+
checksum = "aeb2a4631a64a242ae62c3ceb140adfa2a8bdacb1b22a6549db5de2ce3389c1d"
113
+
dependencies = [
114
+
"async-trait",
115
+
"bytes",
116
+
"cid 0.11.1",
117
+
"dashmap",
118
+
"futures",
119
+
"ipld-core",
120
+
"iroh-car 0.5.1",
121
+
"log",
122
+
"multihash 0.19.3",
123
+
"serde",
124
+
"serde_ipld_dagcbor",
125
+
"serde_ipld_dagjson",
126
+
"sha2",
127
+
"thiserror 1.0.69",
128
+
"tokio",
129
+
]
130
+
131
+
[[package]]
190
132
name = "atoi"
191
133
version = "2.0.0"
192
134
source = "registry+https://github.com/rust-lang/crates.io-index"
···
287
229
checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
288
230
dependencies = [
289
231
"axum-core",
290
-
"axum-macros",
291
232
"bytes",
292
233
"form_urlencoded",
293
234
"futures-util",
···
300
241
"matchit",
301
242
"memchr",
302
243
"mime",
303
-
"multer",
304
244
"percent-encoding",
305
245
"pin-project-lite",
306
246
"rustversion",
···
337
277
]
338
278
339
279
[[package]]
340
-
name = "axum-macros"
341
-
version = "0.5.0"
342
-
source = "registry+https://github.com/rust-lang/crates.io-index"
343
-
checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
344
-
dependencies = [
345
-
"proc-macro2",
346
-
"quote",
347
-
"syn 2.0.104",
348
-
]
349
-
350
-
[[package]]
351
280
name = "backtrace"
352
281
version = "0.3.75"
353
282
source = "registry+https://github.com/rust-lang/crates.io-index"
···
504
433
version = "1.10.1"
505
434
source = "registry+https://github.com/rust-lang/crates.io-index"
506
435
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
436
+
dependencies = [
437
+
"serde",
438
+
]
507
439
508
440
[[package]]
509
441
name = "cadet"
···
511
443
dependencies = [
512
444
"anyhow",
513
445
"async-trait",
446
+
"atmst",
514
447
"atrium-api",
515
448
"base64",
516
449
"chrono",
517
450
"cid 0.11.1",
518
451
"dotenvy",
519
452
"flume",
520
-
"iroh-car",
453
+
"futures",
454
+
"iroh-car 0.4.0",
521
455
"libipld",
522
456
"metrics 0.23.1",
523
457
"metrics-exporter-prometheus",
···
528
462
"reqwest",
529
463
"rocketman",
530
464
"serde",
465
+
"serde_ipld_dagcbor",
531
466
"serde_json",
532
467
"sqlx",
533
468
"time",
···
541
476
]
542
477
543
478
[[package]]
544
-
name = "camino"
545
-
version = "1.1.10"
546
-
source = "registry+https://github.com/rust-lang/crates.io-index"
547
-
checksum = "0da45bc31171d8d6960122e222a67740df867c1dd53b4d51caa297084c185cab"
548
-
dependencies = [
549
-
"serde",
550
-
]
551
-
552
-
[[package]]
553
-
name = "cargo-platform"
554
-
version = "0.1.9"
555
-
source = "registry+https://github.com/rust-lang/crates.io-index"
556
-
checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea"
557
-
dependencies = [
558
-
"serde",
559
-
]
560
-
561
-
[[package]]
562
-
name = "cargo_metadata"
563
-
version = "0.19.2"
564
-
source = "registry+https://github.com/rust-lang/crates.io-index"
565
-
checksum = "dd5eb614ed4c27c5d706420e4320fbe3216ab31fa1c33cd8246ac36dae4479ba"
566
-
dependencies = [
567
-
"camino",
568
-
"cargo-platform",
569
-
"semver",
570
-
"serde",
571
-
"serde_json",
572
-
"thiserror 2.0.12",
573
-
]
574
-
575
-
[[package]]
576
479
name = "cbor4ii"
577
480
version = "0.2.14"
578
481
source = "registry+https://github.com/rust-lang/crates.io-index"
···
661
564
]
662
565
663
566
[[package]]
664
-
name = "clap"
665
-
version = "4.5.41"
666
-
source = "registry+https://github.com/rust-lang/crates.io-index"
667
-
checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
668
-
dependencies = [
669
-
"clap_builder",
670
-
"clap_derive",
671
-
]
672
-
673
-
[[package]]
674
-
name = "clap_builder"
675
-
version = "4.5.41"
676
-
source = "registry+https://github.com/rust-lang/crates.io-index"
677
-
checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
678
-
dependencies = [
679
-
"anstream",
680
-
"anstyle",
681
-
"clap_lex",
682
-
"strsim",
683
-
]
684
-
685
-
[[package]]
686
-
name = "clap_derive"
687
-
version = "4.5.41"
688
-
source = "registry+https://github.com/rust-lang/crates.io-index"
689
-
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
690
-
dependencies = [
691
-
"heck",
692
-
"proc-macro2",
693
-
"quote",
694
-
"syn 2.0.104",
695
-
]
696
-
697
-
[[package]]
698
-
name = "clap_lex"
699
-
version = "0.7.5"
700
-
source = "registry+https://github.com/rust-lang/crates.io-index"
701
-
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
702
-
703
-
[[package]]
704
567
name = "cmake"
705
568
version = "0.1.54"
706
569
source = "registry+https://github.com/rust-lang/crates.io-index"
···
708
571
dependencies = [
709
572
"cc",
710
573
]
711
-
712
-
[[package]]
713
-
name = "colorchoice"
714
-
version = "1.0.4"
715
-
source = "registry+https://github.com/rust-lang/crates.io-index"
716
-
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
717
574
718
575
[[package]]
719
576
name = "combine"
···
1296
1153
"libc",
1297
1154
"log",
1298
1155
"rustversion",
1299
-
"windows 0.61.3",
1156
+
"windows",
1300
1157
]
1301
1158
1302
1159
[[package]]
···
1568
1425
"js-sys",
1569
1426
"log",
1570
1427
"wasm-bindgen",
1571
-
"windows-core 0.61.2",
1428
+
"windows-core",
1572
1429
]
1573
1430
1574
1431
[[package]]
···
1757
1614
]
1758
1615
1759
1616
[[package]]
1760
-
name = "is_terminal_polyfill"
1761
-
version = "1.70.1"
1617
+
name = "iroh-car"
1618
+
version = "0.5.1"
1762
1619
source = "registry+https://github.com/rust-lang/crates.io-index"
1763
-
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
1620
+
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
1621
+
dependencies = [
1622
+
"anyhow",
1623
+
"cid 0.11.1",
1624
+
"futures",
1625
+
"serde",
1626
+
"serde_ipld_dagcbor",
1627
+
"thiserror 1.0.69",
1628
+
"tokio",
1629
+
"unsigned-varint 0.7.2",
1630
+
]
1764
1631
1765
1632
[[package]]
1766
1633
name = "itertools"
···
2150
2017
]
2151
2018
2152
2019
[[package]]
2153
-
name = "multer"
2154
-
version = "3.1.0"
2155
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2156
-
checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b"
2157
-
dependencies = [
2158
-
"bytes",
2159
-
"encoding_rs",
2160
-
"futures-util",
2161
-
"http",
2162
-
"httparse",
2163
-
"memchr",
2164
-
"mime",
2165
-
"spin",
2166
-
"version_check",
2167
-
]
2168
-
2169
-
[[package]]
2170
2020
name = "multibase"
2171
2021
version = "0.9.1"
2172
2022
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2300
2150
]
2301
2151
2302
2152
[[package]]
2303
-
name = "ntapi"
2304
-
version = "0.4.1"
2305
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2306
-
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
2307
-
dependencies = [
2308
-
"winapi",
2309
-
]
2310
-
2311
-
[[package]]
2312
2153
name = "nu-ansi-term"
2313
2154
version = "0.46.0"
2314
2155
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2383
2224
]
2384
2225
2385
2226
[[package]]
2386
-
name = "num_threads"
2387
-
version = "0.1.7"
2388
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2389
-
checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
2390
-
dependencies = [
2391
-
"libc",
2392
-
]
2393
-
2394
-
[[package]]
2395
-
name = "objc2-core-foundation"
2396
-
version = "0.3.1"
2397
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2398
-
checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166"
2399
-
dependencies = [
2400
-
"bitflags 2.9.1",
2401
-
]
2402
-
2403
-
[[package]]
2404
2227
name = "object"
2405
2228
version = "0.36.7"
2406
2229
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2414
2237
version = "1.21.3"
2415
2238
source = "registry+https://github.com/rust-lang/crates.io-index"
2416
2239
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
2417
-
2418
-
[[package]]
2419
-
name = "once_cell_polyfill"
2420
-
version = "1.70.1"
2421
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2422
-
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
2423
2240
2424
2241
[[package]]
2425
2242
name = "openssl"
···
3019
2836
dependencies = [
3020
2837
"aws-lc-rs",
3021
2838
"once_cell",
2839
+
"ring",
3022
2840
"rustls-pki-types",
3023
2841
"rustls-webpki",
3024
2842
"subtle",
···
3150
2968
version = "1.0.26"
3151
2969
source = "registry+https://github.com/rust-lang/crates.io-index"
3152
2970
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
3153
-
dependencies = [
3154
-
"serde",
3155
-
]
3156
2971
3157
2972
[[package]]
3158
2973
name = "serde"
···
3209
3024
]
3210
3025
3211
3026
[[package]]
3027
+
name = "serde_ipld_dagjson"
3028
+
version = "0.2.0"
3029
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3030
+
checksum = "3359b47ba7f4a306ef5984665e10539e212e97217afa489437d533208eecda36"
3031
+
dependencies = [
3032
+
"ipld-core",
3033
+
"serde",
3034
+
"serde_json",
3035
+
]
3036
+
3037
+
[[package]]
3212
3038
name = "serde_json"
3213
3039
version = "1.0.141"
3214
3040
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3296
3122
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
3297
3123
3298
3124
[[package]]
3125
+
name = "signal-hook-registry"
3126
+
version = "1.4.5"
3127
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3128
+
checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
3129
+
dependencies = [
3130
+
"libc",
3131
+
]
3132
+
3133
+
[[package]]
3299
3134
name = "signature"
3300
3135
version = "2.2.0"
3301
3136
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3402
3237
"memchr",
3403
3238
"once_cell",
3404
3239
"percent-encoding",
3240
+
"rustls",
3405
3241
"serde",
3406
3242
"serde_json",
3407
3243
"sha2",
···
3413
3249
"tracing",
3414
3250
"url",
3415
3251
"uuid",
3252
+
"webpki-roots 0.26.11",
3416
3253
]
3417
3254
3418
3255
[[package]]
···
3662
3499
]
3663
3500
3664
3501
[[package]]
3665
-
name = "sys-info"
3666
-
version = "0.9.1"
3667
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3668
-
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
3669
-
dependencies = [
3670
-
"cc",
3671
-
"libc",
3672
-
]
3673
-
3674
-
[[package]]
3675
-
name = "sysinfo"
3676
-
version = "0.34.2"
3677
-
source = "registry+https://github.com/rust-lang/crates.io-index"
3678
-
checksum = "a4b93974b3d3aeaa036504b8eefd4c039dced109171c1ae973f1dc63b2c7e4b2"
3679
-
dependencies = [
3680
-
"libc",
3681
-
"memchr",
3682
-
"ntapi",
3683
-
"objc2-core-foundation",
3684
-
"windows 0.57.0",
3685
-
]
3686
-
3687
-
[[package]]
3688
3502
name = "system-configuration"
3689
3503
version = "0.6.1"
3690
3504
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3781
3595
dependencies = [
3782
3596
"deranged",
3783
3597
"itoa",
3784
-
"libc",
3785
3598
"num-conv",
3786
-
"num_threads",
3787
3599
"powerfmt",
3788
3600
"serde",
3789
3601
"time-core",
···
3842
3654
"io-uring",
3843
3655
"libc",
3844
3656
"mio",
3657
+
"parking_lot",
3845
3658
"pin-project-lite",
3659
+
"signal-hook-registry",
3846
3660
"slab",
3847
3661
"socket2 0.5.10",
3848
3662
"tokio-macros",
···
4133
3947
"serde_ipld_dagcbor",
4134
3948
"serde_json",
4135
3949
"thiserror 2.0.12",
4136
-
"uuid",
4137
3950
]
4138
3951
4139
3952
[[package]]
···
4211
4024
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
4212
4025
4213
4026
[[package]]
4214
-
name = "utf8parse"
4215
-
version = "0.2.2"
4216
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4217
-
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
4218
-
4219
-
[[package]]
4220
4027
name = "uuid"
4221
4028
version = "1.17.0"
4222
4029
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4241
4048
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
4242
4049
4243
4050
[[package]]
4244
-
name = "vergen"
4245
-
version = "9.0.6"
4246
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4247
-
checksum = "6b2bf58be11fc9414104c6d3a2e464163db5ef74b12296bda593cac37b6e4777"
4248
-
dependencies = [
4249
-
"anyhow",
4250
-
"cargo_metadata",
4251
-
"derive_builder",
4252
-
"regex",
4253
-
"rustc_version",
4254
-
"rustversion",
4255
-
"sysinfo",
4256
-
"time",
4257
-
"vergen-lib",
4258
-
]
4259
-
4260
-
[[package]]
4261
-
name = "vergen-gitcl"
4262
-
version = "1.0.8"
4263
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4264
-
checksum = "b9dfc1de6eb2e08a4ddf152f1b179529638bedc0ea95e6d667c014506377aefe"
4265
-
dependencies = [
4266
-
"anyhow",
4267
-
"derive_builder",
4268
-
"rustversion",
4269
-
"time",
4270
-
"vergen",
4271
-
"vergen-lib",
4272
-
]
4273
-
4274
-
[[package]]
4275
-
name = "vergen-lib"
4276
-
version = "0.1.6"
4277
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4278
-
checksum = "9b07e6010c0f3e59fcb164e0163834597da68d1f864e2b8ca49f74de01e9c166"
4279
-
dependencies = [
4280
-
"anyhow",
4281
-
"derive_builder",
4282
-
"rustversion",
4283
-
]
4284
-
4285
-
[[package]]
4286
4051
name = "version_check"
4287
4052
version = "0.9.5"
4288
4053
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4410
4175
]
4411
4176
4412
4177
[[package]]
4178
+
name = "webpki-roots"
4179
+
version = "0.26.11"
4180
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4181
+
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
4182
+
dependencies = [
4183
+
"webpki-roots 1.0.2",
4184
+
]
4185
+
4186
+
[[package]]
4187
+
name = "webpki-roots"
4188
+
version = "1.0.2"
4189
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4190
+
checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2"
4191
+
dependencies = [
4192
+
"rustls-pki-types",
4193
+
]
4194
+
4195
+
[[package]]
4413
4196
name = "which"
4414
4197
version = "4.4.2"
4415
4198
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4455
4238
4456
4239
[[package]]
4457
4240
name = "windows"
4458
-
version = "0.57.0"
4459
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4460
-
checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143"
4461
-
dependencies = [
4462
-
"windows-core 0.57.0",
4463
-
"windows-targets 0.52.6",
4464
-
]
4465
-
4466
-
[[package]]
4467
-
name = "windows"
4468
4241
version = "0.61.3"
4469
4242
source = "registry+https://github.com/rust-lang/crates.io-index"
4470
4243
checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
4471
4244
dependencies = [
4472
4245
"windows-collections",
4473
-
"windows-core 0.61.2",
4246
+
"windows-core",
4474
4247
"windows-future",
4475
4248
"windows-link",
4476
4249
"windows-numerics",
···
4482
4255
source = "registry+https://github.com/rust-lang/crates.io-index"
4483
4256
checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
4484
4257
dependencies = [
4485
-
"windows-core 0.61.2",
4486
-
]
4487
-
4488
-
[[package]]
4489
-
name = "windows-core"
4490
-
version = "0.57.0"
4491
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4492
-
checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d"
4493
-
dependencies = [
4494
-
"windows-implement 0.57.0",
4495
-
"windows-interface 0.57.0",
4496
-
"windows-result 0.1.2",
4497
-
"windows-targets 0.52.6",
4258
+
"windows-core",
4498
4259
]
4499
4260
4500
4261
[[package]]
···
4503
4264
source = "registry+https://github.com/rust-lang/crates.io-index"
4504
4265
checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
4505
4266
dependencies = [
4506
-
"windows-implement 0.60.0",
4507
-
"windows-interface 0.59.1",
4267
+
"windows-implement",
4268
+
"windows-interface",
4508
4269
"windows-link",
4509
-
"windows-result 0.3.4",
4270
+
"windows-result",
4510
4271
"windows-strings",
4511
4272
]
4512
4273
···
4516
4277
source = "registry+https://github.com/rust-lang/crates.io-index"
4517
4278
checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
4518
4279
dependencies = [
4519
-
"windows-core 0.61.2",
4280
+
"windows-core",
4520
4281
"windows-link",
4521
4282
"windows-threading",
4522
4283
]
4523
4284
4524
4285
[[package]]
4525
4286
name = "windows-implement"
4526
-
version = "0.57.0"
4527
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4528
-
checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
4529
-
dependencies = [
4530
-
"proc-macro2",
4531
-
"quote",
4532
-
"syn 2.0.104",
4533
-
]
4534
-
4535
-
[[package]]
4536
-
name = "windows-implement"
4537
4287
version = "0.60.0"
4538
4288
source = "registry+https://github.com/rust-lang/crates.io-index"
4539
4289
checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
···
4545
4295
4546
4296
[[package]]
4547
4297
name = "windows-interface"
4548
-
version = "0.57.0"
4549
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4550
-
checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
4551
-
dependencies = [
4552
-
"proc-macro2",
4553
-
"quote",
4554
-
"syn 2.0.104",
4555
-
]
4556
-
4557
-
[[package]]
4558
-
name = "windows-interface"
4559
4298
version = "0.59.1"
4560
4299
source = "registry+https://github.com/rust-lang/crates.io-index"
4561
4300
checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
···
4577
4316
source = "registry+https://github.com/rust-lang/crates.io-index"
4578
4317
checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
4579
4318
dependencies = [
4580
-
"windows-core 0.61.2",
4319
+
"windows-core",
4581
4320
"windows-link",
4582
4321
]
4583
4322
···
4588
4327
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
4589
4328
dependencies = [
4590
4329
"windows-link",
4591
-
"windows-result 0.3.4",
4330
+
"windows-result",
4592
4331
"windows-strings",
4593
-
]
4594
-
4595
-
[[package]]
4596
-
name = "windows-result"
4597
-
version = "0.1.2"
4598
-
source = "registry+https://github.com/rust-lang/crates.io-index"
4599
-
checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
4600
-
dependencies = [
4601
-
"windows-targets 0.52.6",
4602
4332
]
4603
4333
4604
4334
[[package]]
+5
-4
services/Cargo.toml
+5
-4
services/Cargo.toml
···
1
1
[workspace]
2
-
members = ["aqua", "cadet", "rocketman", "satellite", "types"]
2
+
members = ["cadet", "satellite", "types"]
3
3
resolver = "2"
4
4
5
5
[workspace.dependencies]
···
12
12
"postgres",
13
13
"uuid",
14
14
"chrono",
15
+
"tls-rustls",
15
16
] }
16
17
serde = { version = "1.0", features = ["derive"] }
17
18
anyhow = "1.0"
···
19
20
tracing = "0.1"
20
21
tracing-subscriber = "0.3"
21
22
metrics = "0.23"
22
-
reqwest = { version = "0.12", features = ["json"] }
23
+
reqwest.workspace = true
23
24
url = "2.5"
24
25
rand = "0.8"
25
26
flume = "0.11"
26
27
async-trait = "0.1"
27
28
time = "0.3"
28
29
dotenvy = "0.15"
29
-
tokio-tungstenite = "0.24"
30
+
tokio-tungstenite.workspace = true
30
31
atrium-api = "0.25"
31
32
chrono = { version = "0.4", features = ["serde"] }
32
33
uuid = { version = "1.0", features = ["v4", "serde"] }
33
34
types = { path = "types" }
34
-
rocketman = { path = "rocketman" }
35
+
rocketman = "0.2.5"
35
36
36
37
# CAR and IPLD dependencies
37
38
iroh-car = "0.4"
+20
services/Cross.toml
+20
services/Cross.toml
···
1
+
[build.env]
2
+
passthrough = [
3
+
"CARGO_HOME",
4
+
"CARGO_TARGET_DIR",
5
+
"SQLX_OFFLINE",
6
+
"PKG_CONFIG_ALLOW_CROSS",
7
+
]
8
+
9
+
[target.aarch64-unknown-linux-gnu]
10
+
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main"
11
+
12
+
[target.aarch64-unknown-linux-gnu.env]
13
+
passthrough = ["CARGO_HOME", "CARGO_TARGET_DIR", "SQLX_OFFLINE"]
14
+
# Allow cross-compilation of native dependencies
15
+
PKG_CONFIG_ALLOW_CROSS = "1"
16
+
# Use static linking to reduce runtime dependencies
17
+
RUSTFLAGS = "-C target-feature=+crt-static -C link-arg=-s"
18
+
# Disable problematic features that might require OpenSSL
19
+
CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc"
20
+
CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"
+4
services/cadet/Cargo.toml
+4
services/cadet/Cargo.toml
···
3
3
version = "0.1.0"
4
4
edition = "2021"
5
5
6
+
6
7
[dependencies]
7
8
atrium-api.workspace = true
8
9
tokio.workspace = true
···
32
33
libipld.workspace = true
33
34
cid.workspace = true
34
35
base64.workspace = true
36
+
atmst = "0.0.1"
37
+
serde_ipld_dagcbor = "0.6"
38
+
futures = "0.3"
35
39
36
40
# Redis for job queues
37
41
redis.workspace = true
+61
-1
services/cadet/Dockerfile
+61
-1
services/cadet/Dockerfile
···
1
+
# Docker build args for cross-platform builds (must be at the top)
2
+
ARG TARGETPLATFORM
3
+
ARG BUILDPLATFORM
4
+
ARG TARGETARCH
5
+
ARG TARGETOS
6
+
1
7
FROM --platform=${BUILDPLATFORM} rust:latest AS buildah
2
8
3
9
# Create appuser
···
15
21
16
22
WORKDIR /buildah
17
23
24
+
# Re-declare ARGs after FROM (Docker requirement)
25
+
ARG TARGETPLATFORM
26
+
ARG BUILDPLATFORM
27
+
ARG TARGETARCH
28
+
ARG TARGETOS
29
+
30
+
# Debug platform detection before copying files
31
+
RUN echo "DEBUG Before copy: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH BUILDPLATFORM=$BUILDPLATFORM"
32
+
18
33
COPY ./ .
19
34
20
-
RUN . ./target.sh && touch src/main.rs && echo "Building for $TARGET_ARCH" && cargo build --release --target $RUST_TARGET && cp target/$RUST_TARGET/release/cadet target/cadet
35
+
# Setup lexicons and install dependencies
36
+
RUN ./scripts/setup-lexicons.sh
37
+
38
+
# Install Node.js and pnpm for lexicon generation
39
+
RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/*
40
+
RUN npm install -g pnpm
41
+
42
+
# Install dependencies and generate lexicons
43
+
RUN pnpm install
44
+
RUN cd tools/lexicon-cli && pnpm build
45
+
RUN pnpm lex:gen
46
+
47
+
# Install cross-compilation toolchains
48
+
RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu
49
+
50
+
# Enable ARM64 architecture and install cross-compilation tools
51
+
RUN dpkg --add-architecture arm64 && \
52
+
apt-get update && \
53
+
apt-get install -y \
54
+
gcc-aarch64-linux-gnu \
55
+
libssl-dev:arm64 \
56
+
libssl-dev \
57
+
pkg-config \
58
+
&& rm -rf /var/lib/apt/lists/*
59
+
60
+
# Set up cross-compilation environment
61
+
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc
62
+
ENV PKG_CONFIG_ALLOW_CROSS=1
63
+
ENV PKG_CONFIG_PATH_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu/pkgconfig
64
+
ENV OPENSSL_DIR_aarch64_unknown_linux_gnu=/usr
65
+
ENV OPENSSL_LIB_DIR_aarch64_unknown_linux_gnu=/usr/lib/aarch64-linux-gnu
66
+
ENV OPENSSL_INCLUDE_DIR_aarch64_unknown_linux_gnu=/usr/include/openssl
67
+
68
+
# Force SQLx to use offline mode with workspace cache
69
+
ENV SQLX_OFFLINE=true
70
+
71
+
# copy sqlx in
72
+
COPY ./.sqlx ./services/cadet/.sqlx
73
+
74
+
# Debug platform detection and run build
75
+
RUN echo "DEBUG Before target.sh: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH" && \
76
+
. ./target.sh && \
77
+
touch services/cadet/src/main.rs && \
78
+
echo "Building for $TARGET_ARCH" && \
79
+
cargo build --release --target $RUST_TARGET --package cadet && \
80
+
cp target/$RUST_TARGET/release/cadet target/cadet
21
81
22
82
FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc
23
83
+240
services/cadet/src/ingestors/car/README.md
+240
services/cadet/src/ingestors/car/README.md
···
1
+
# CAR Import System with `atmst`
2
+
3
+
This directory contains the implementation of Teal's CAR (Content Addressable aRchive) import functionality, now powered by the `atmst` library for proper AT Protocol-style Merkle Search Tree handling.
4
+
5
+
## Overview
6
+
7
+
The CAR import system allows Teal to ingest historical music listening data from AT Protocol repositories. Previously, this was done with manual IPLD parsing, but we've now migrated to use the specialized `atmst` library for more accurate and robust CAR file processing.
8
+
9
+
## Key Components
10
+
11
+
### `CarImportIngestor`
12
+
13
+
The main entry point for CAR file processing. This ingestor:
14
+
15
+
1. **Accepts CAR data** via the `LexiconIngestor` interface (base64 or URL)
16
+
2. **Uses `atmst::CarImporter`** to parse CAR files with proper MST handling
17
+
3. **Converts to MST structure** for tree traversal and record extraction
18
+
4. **Delegates to existing ingestors** for Teal record types (play, profile, status)
19
+
20
+
### Migration from `iroh-car` to `atmst`
21
+
22
+
**Previous Implementation:**
23
+
- Used `iroh-car` for basic CAR parsing
24
+
- Manual IPLD block decoding with `libipld`
25
+
- Complex two-pass processing to extract rkey mappings from commit operations
26
+
- Error-prone MST parsing that could miss records
27
+
28
+
**New Implementation:**
29
+
- Uses `atmst::CarImporter` for specialized AT Protocol CAR handling
30
+
- Built-in MST structure understanding
31
+
- Proper tree traversal with guaranteed rkey extraction
32
+
- More reliable and maintainable code
33
+
34
+
## Usage
35
+
36
+
### As a LexiconIngestor
37
+
38
+
The CAR importer integrates seamlessly with Teal's existing ingestion pipeline:
39
+
40
+
```rust
41
+
// CAR data in a record
42
+
{
43
+
"$type": "com.teal.car.import",
44
+
"carData": "base64-encoded-car-file-here"
45
+
}
46
+
47
+
// Or as a URL reference
48
+
{
49
+
"$type": "com.teal.car.import",
50
+
"carData": {
51
+
"url": "https://example.com/repo.car"
52
+
}
53
+
}
54
+
```
55
+
56
+
### Direct Import
57
+
58
+
```rust
59
+
let ingestor = CarImportIngestor::new(db_pool);
60
+
61
+
// Import from bytes
62
+
let import_id = ingestor.import_car_bytes(&car_data, "did:plc:example").await?;
63
+
64
+
// Import from PDS
65
+
let import_id = ingestor.fetch_and_process_identity_car("user.bsky.social").await?;
66
+
```
67
+
68
+
## Supported Record Types
69
+
70
+
The CAR importer automatically detects and processes these Teal record types:
71
+
72
+
- **`fm.teal.alpha.feed.play`** - Music play records
73
+
- **`fm.teal.alpha.profile`** - User profile data
74
+
- **`fm.teal.alpha.status`** - User status updates
75
+
76
+
Records are processed using the same logic as real-time Jetstream ingestion, ensuring data consistency.
77
+
78
+
## Architecture
79
+
80
+
### MST Processing Flow
81
+
82
+
1. **CAR Import**: `atmst::CarImporter` loads and validates the CAR file
83
+
2. **MST Conversion**: CAR data is converted to an `atmst::Mst` structure
84
+
3. **Tree Traversal**: MST is traversed depth-first to find all records
85
+
4. **Record Extraction**: Each MST entry is examined for Teal record types
86
+
5. **Delegation**: Valid records are passed to existing Teal ingestors
87
+
88
+
### Key Benefits
89
+
90
+
- **Proper rkey handling**: MST structure ensures correct record key extraction
91
+
- **AT Protocol compliance**: Uses specialized library designed for AT Protocol
92
+
- **Maintainable code**: Eliminates complex manual MST parsing
93
+
- **Better error handling**: More robust than previous implementation
94
+
95
+
## Current Status
96
+
97
+
### โ
Completed
98
+
- Basic `atmst` integration
99
+
- MST structure setup and conversion
100
+
- Record type detection and routing
101
+
- Integration with existing Teal ingestors
102
+
- Error handling and logging
103
+
104
+
### ๐ง In Progress
105
+
- **Block data access**: Full implementation of record data extraction from MST
106
+
- **MST traversal**: Complete iteration through MST entries
107
+
- **Testing**: Comprehensive test suite with real CAR files
108
+
109
+
### ๐ TODO
110
+
- Complete `get_record_from_mst()` implementation
111
+
- Add MST entry iteration logic
112
+
- Performance optimization for large CAR files
113
+
- Comprehensive integration tests
114
+
115
+
## Implementation Notes
116
+
117
+
### Block Data Access
118
+
119
+
The current implementation has a placeholder for accessing actual record data from the MST:
120
+
121
+
```rust
122
+
fn get_record_from_mst(&self, cid: &atmst::Cid, mst: &Mst) -> Option<Value> {
123
+
// TODO: Implement proper block data access using atmst API
124
+
// This requires understanding how to extract IPLD data for a given CID
125
+
// from the MST's internal block storage
126
+
None
127
+
}
128
+
```
129
+
130
+
This is the key missing piece that needs to be completed based on `atmst` library documentation.
131
+
132
+
### MST Traversal
133
+
134
+
Similarly, the MST traversal logic needs completion:
135
+
136
+
```rust
137
+
// TODO: Implement proper MST iteration
138
+
// for (cid, node) in mst.iter() {
139
+
// // Process MST entries
140
+
// }
141
+
```
142
+
143
+
### Error Handling
144
+
145
+
The system is designed to be resilient:
146
+
- Invalid records are logged and skipped
147
+
- Network errors during PDS fetching are properly reported
148
+
- Database errors are propagated with context
149
+
150
+
## Testing
151
+
152
+
### Test Structure
153
+
154
+
```bash
155
+
# Unit tests (no database required)
156
+
cargo test test_parse_teal_key
157
+
cargo test test_is_teal_record_key
158
+
159
+
# Integration tests (requires database)
160
+
cargo test test_atmst_car_import --ignored
161
+
162
+
# CLI testing
163
+
cd tools/teal-cli
164
+
cargo run -- car analyze path/to/file.car
165
+
```
166
+
167
+
### Test Data
168
+
169
+
Test CAR files should be placed in `services/cadet/` for integration testing:
170
+
- `test.car` - Basic test file with Teal records
171
+
- `large.car` - Performance testing file
172
+
- `empty.car` - Edge case testing
173
+
174
+
## Dependencies
175
+
176
+
### Key Dependencies
177
+
- **`atmst`**: AT Protocol MST library (v0.0.1)
178
+
- **`serde_json`**: JSON serialization for record processing
179
+
- **`anyhow`**: Error handling
180
+
- **`uuid`**: Import ID generation
181
+
- **`reqwest`**: HTTP client for PDS fetching
182
+
183
+
### Workspace Dependencies
184
+
The implementation uses existing Teal workspace dependencies for database access, logging, and record processing.
185
+
186
+
## Configuration
187
+
188
+
No additional configuration is required. The CAR importer uses the same database connection and logging setup as other Teal ingestors.
189
+
190
+
## Monitoring
191
+
192
+
The CAR importer provides detailed logging:
193
+
194
+
- **Info**: Successful imports, record counts, processing progress
195
+
- **Warn**: Skipped records, missing data, network issues
196
+
- **Error**: Database failures, invalid CAR files, processing errors
197
+
198
+
Metrics are integrated with Teal's existing observability stack.
199
+
200
+
## Performance
201
+
202
+
### Optimization Strategies
203
+
204
+
1. **Streaming processing**: Records are processed as they're discovered
205
+
2. **Batch database operations**: Multiple records can be inserted in batches
206
+
3. **Memory management**: Large CAR files are processed without loading entirely into memory
207
+
4. **Parallel processing**: Future enhancement for concurrent record processing
208
+
209
+
### Benchmarks
210
+
211
+
Performance testing should be conducted with:
212
+
- Small CAR files (< 1MB, ~100 records)
213
+
- Medium CAR files (1-50MB, ~10K records)
214
+
- Large CAR files (> 50MB, ~100K+ records)
215
+
216
+
## Future Enhancements
217
+
218
+
### Planned Features
219
+
- **Incremental imports**: Support for delta/since-based CAR fetching
220
+
- **Batch processing**: Queue-based processing for multiple CAR files
221
+
- **Validation**: Pre-import validation of CAR file integrity
222
+
- **Metrics**: Detailed import statistics and performance monitoring
223
+
224
+
### Integration Opportunities
225
+
- **Admin API**: Trigger imports via HTTP API
226
+
- **Scheduled imports**: Cron-based periodic imports from known users
227
+
- **Real-time sync**: Hybrid approach combining Jetstream + CAR imports
228
+
229
+
---
230
+
231
+
## Contributing
232
+
233
+
When working on the CAR import system:
234
+
235
+
1. **Test thoroughly**: Use both unit and integration tests
236
+
2. **Document changes**: Update this README for significant modifications
237
+
3. **Monitor performance**: Large CAR files can impact system performance
238
+
4. **Handle errors gracefully**: Network and parsing errors are expected
239
+
240
+
For questions about `atmst` integration or MST processing, refer to the library documentation or consider reaching out to the `atmst` maintainers.
+677
-421
services/cadet/src/ingestors/car/car_import.rs
+677
-421
services/cadet/src/ingestors/car/car_import.rs
···
1
+
//! CAR (Content Addressable aRchive) Import Ingestor using atmst
2
+
//!
3
+
//! This module handles importing Teal records from CAR files using the atmst library,
4
+
//! which provides proper AT Protocol-style Merkle Search Tree handling. The CAR import process:
5
+
//!
6
+
//! 1. Receives CAR data via the LexiconIngestor interface (base64 encoded or URL)
7
+
//! 2. Uses atmst::CarImporter to parse the CAR file and extract MST structure
8
+
//! 3. Converts the CarImporter to an MST for proper tree traversal
9
+
//! 4. Iterates through MST nodes to find Teal record types (play, profile, status)
10
+
//! 5. Delegates to existing Teal ingestors using the actual DID and proper rkey
11
+
//!
12
+
//! ## Usage Example
13
+
//!
14
+
//! ```rust,ignore
15
+
//! // CAR data can be provided in a record like:
16
+
//! {
17
+
//! "carData": "base64-encoded-car-file-here"
18
+
//! }
19
+
//!
20
+
//! // Or as a URL reference:
21
+
//! {
22
+
//! "carData": {
23
+
//! "url": "https://example.com/my-archive.car"
24
+
//! }
25
+
//! }
26
+
//! ```
27
+
//!
28
+
//! The ingestor will automatically detect record types and store them using the
29
+
//! same logic as real-time Jetstream ingestion, ensuring data consistency.
30
+
//! All imported records will be attributed to the DID that initiated the import
31
+
//! and use the original rkey from the AT Protocol MST structure.
32
+
33
+
use crate::ingestors::car::jobs::{queue_keys, CarImportJob};
34
+
use crate::redis_client::RedisClient;
1
35
use anyhow::{anyhow, Result};
2
36
use async_trait::async_trait;
3
-
use base64::{engine::general_purpose, Engine as _};
4
-
use chrono;
5
-
use cid::Cid;
6
-
use iroh_car::{CarHeader, CarReader};
7
-
use libipld::cbor::DagCborCodec;
8
-
use libipld::{Block, Cid as LibipldCid, Ipld};
9
-
use reqwest;
37
+
use atmst::{mst::Mst, Bytes, CarImporter};
38
+
use base64::Engine;
39
+
use futures::StreamExt;
40
+
use redis::AsyncCommands;
10
41
use rocketman::{ingestion::LexiconIngestor, types::event::Event};
11
42
use serde_json::Value;
12
43
use sqlx::PgPool;
13
-
use std::io::Cursor;
14
44
use tracing::{info, warn};
15
-
use url;
16
45
46
+
/// Helper struct for extracted records
47
+
#[derive(Debug)]
48
+
pub struct ExtractedRecord {
49
+
pub collection: String,
50
+
pub rkey: String,
51
+
pub data: serde_json::Value,
52
+
}
53
+
54
+
/// CAR Import Ingestor handles importing Teal records from CAR files using atmst
17
55
pub struct CarImportIngestor {
18
56
sql: PgPool,
19
57
}
20
58
21
59
impl CarImportIngestor {
60
+
/// Create a new CAR import ingestor with database connection
22
61
pub fn new(sql: PgPool) -> Self {
23
62
Self { sql }
24
63
}
25
64
26
-
/// Process a CAR file from bytes
27
-
async fn process_car_data(&self, car_data: &[u8], import_id: &str) -> Result<()> {
28
-
info!("Starting CAR file processing for import {}", import_id);
65
+
/// Helper to get a Redis connection for job queueing
66
+
pub async fn get_redis_connection(&self) -> Result<redis::aio::MultiplexedConnection> {
67
+
let redis_url =
68
+
std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
69
+
let client = RedisClient::new(&redis_url)?;
70
+
client
71
+
.get_connection()
72
+
.await
73
+
.map_err(|e| anyhow!("Redis connection error: {}", e))
74
+
}
75
+
76
+
/// Process CAR file data using atmst library and extract Teal records
77
+
async fn process_car_data(&self, car_data: &[u8], import_id: &str, did: &str) -> Result<()> {
78
+
info!(
79
+
"Starting CAR file processing with atmst for import {} (DID: {})",
80
+
import_id, did
81
+
);
82
+
83
+
// Convert to Bytes for atmst
84
+
let car_bytes: Bytes = Bytes::from(car_data.to_vec());
85
+
86
+
// Create CarImporter and import the CAR data
87
+
let mut car_importer = CarImporter::new();
88
+
car_importer
89
+
.import_from_bytes(car_bytes.clone())
90
+
.await
91
+
.map_err(|e| anyhow!("Failed to import CAR with atmst: {}", e))?;
29
92
30
-
let cursor = Cursor::new(car_data);
31
-
let mut reader = CarReader::new(cursor).await?;
93
+
info!(
94
+
"CAR imported successfully. Root CIDs: {:?}, Total blocks: {}",
95
+
car_importer.roots(),
96
+
car_importer.len()
97
+
);
98
+
99
+
// Convert CarImporter to MST for proper tree traversal
100
+
let mst = Mst::from_car_importer(car_importer)
101
+
.await
102
+
.map_err(|e| anyhow!("Failed to convert CAR to MST: {}", e))?;
32
103
33
-
// Read the header
34
-
let header = reader.header();
35
-
info!("CAR header: {} root CIDs", header.roots().len());
104
+
info!("MST conversion successful, starting record extraction");
105
+
106
+
// Create a new CarImporter for data access since the previous one was consumed
107
+
let mut data_importer = CarImporter::new();
108
+
data_importer
109
+
.import_from_bytes(car_bytes)
110
+
.await
111
+
.map_err(|e| anyhow!("Failed to re-import CAR for data access: {}", e))?;
36
112
37
-
// Track import metadata
38
-
// self.store_import_metadata(import_id, header).await?;
113
+
// Extract all records from the MST
114
+
let records = self
115
+
.extract_records_from_mst(&mst, &data_importer, did)
116
+
.await?;
39
117
40
-
// Process blocks
41
-
let mut block_count = 0;
42
-
while let Some((cid, block_data)) = reader.next_block().await? {
43
-
// Convert iroh-car CID to our CID type for processing
44
-
let our_cid: Cid = cid.to_string().parse()?;
45
-
self.process_car_block(&our_cid, &block_data, import_id)
46
-
.await?;
47
-
block_count += 1;
118
+
info!("Extracted {} records from MST", records.len());
48
119
49
-
if block_count % 100 == 0 {
50
-
info!("Processed {} blocks for import {}", block_count, import_id);
120
+
// Process each record through the appropriate ingestor
121
+
let mut processed_count = 0;
122
+
for record in records {
123
+
match self.process_extracted_record(&record, import_id, did).await {
124
+
Ok(()) => {
125
+
processed_count += 1;
126
+
if processed_count % 10 == 0 {
127
+
info!("Processed {} records so far", processed_count);
128
+
}
129
+
}
130
+
Err(e) => {
131
+
warn!("Failed to process record {}: {}", record.rkey, e);
132
+
// Continue processing other records
133
+
}
51
134
}
52
135
}
53
136
54
137
info!(
55
-
"Completed CAR file processing: {} blocks for import {}",
56
-
block_count, import_id
138
+
"Completed CAR file processing: {} records processed for import {}",
139
+
processed_count, import_id
57
140
);
58
-
// self.mark_import_complete(import_id, block_count).await?;
59
141
60
142
Ok(())
61
143
}
62
144
63
-
/// Process an individual IPLD block from the CAR file
64
-
async fn process_car_block(&self, cid: &Cid, block_data: &[u8], import_id: &str) -> Result<()> {
65
-
// Store the raw block first
66
-
// self.store_raw_block(cid, block_data, import_id).await?;
145
+
/// Extract all Teal records from the MST
146
+
async fn extract_records_from_mst(
147
+
&self,
148
+
mst: &Mst,
149
+
car_importer: &CarImporter,
150
+
_did: &str,
151
+
) -> Result<Vec<ExtractedRecord>> {
152
+
let mut records = Vec::new();
153
+
154
+
// Use the MST iterator to traverse all entries
155
+
let mut stream = mst.iter().into_stream();
67
156
68
-
// Try to decode as IPLD and extract meaningful data
69
-
match self.decode_and_extract_data(cid, block_data).await {
70
-
Ok(Some(extracted_data)) => {
71
-
self.process_extracted_data(&extracted_data, cid, import_id)
72
-
.await?;
73
-
}
74
-
Ok(None) => {
75
-
// Block doesn't contain extractable data, just stored raw
76
-
}
77
-
Err(e) => {
78
-
warn!("Failed to decode block {}: {}", cid, e);
79
-
// Continue processing other blocks
157
+
while let Some(result) = stream.next().await {
158
+
match result {
159
+
Ok((key, record_cid)) => {
160
+
// Check if this is a Teal record based on the key pattern
161
+
if self.is_teal_record_key(&key) {
162
+
info!("๐ต Found Teal record: {} -> {}", key, record_cid);
163
+
if let Some((collection, rkey)) = self.parse_teal_key(&key) {
164
+
info!(" Collection: {}, rkey: {}", collection, rkey);
165
+
// Get the actual record data using the CID
166
+
match self.get_record_data(&record_cid, car_importer).await {
167
+
Ok(Some(data)) => {
168
+
info!(" โ
Successfully got record data for {}", record_cid);
169
+
records.push(ExtractedRecord {
170
+
collection,
171
+
rkey,
172
+
data,
173
+
});
174
+
}
175
+
Ok(None) => {
176
+
warn!(" โ No data found for record CID: {}", record_cid);
177
+
}
178
+
Err(e) => {
179
+
warn!(
180
+
" โ Failed to get record data for {}: {}",
181
+
record_cid, e
182
+
);
183
+
}
184
+
}
185
+
} else {
186
+
warn!(" โ Failed to parse Teal key: {}", key);
187
+
}
188
+
}
189
+
}
190
+
Err(e) => {
191
+
warn!("Error iterating MST: {}", e);
192
+
// Continue with other entries
193
+
}
80
194
}
81
195
}
82
196
83
-
Ok(())
197
+
Ok(records)
84
198
}
85
199
86
-
/// Decode IPLD block and extract AT Protocol data if present
87
-
async fn decode_and_extract_data(
200
+
/// Get record data from the CAR importer using a CID
201
+
async fn get_record_data(
88
202
&self,
89
-
cid: &Cid,
90
-
block_data: &[u8],
91
-
) -> Result<Option<ExtractedData>> {
92
-
// Create IPLD block (convert CID types)
93
-
let libipld_cid: LibipldCid = cid.to_string().parse()?;
94
-
let block: Block<libipld::DefaultParams> = Block::new(libipld_cid, block_data.to_vec())?;
95
-
96
-
// Decode to IPLD (try to decode as DAG-CBOR, which is common in AT Protocol)
97
-
let ipld: Ipld = match block.decode::<DagCborCodec, Ipld>() {
98
-
Ok(ipld) => ipld,
99
-
Err(_) => {
100
-
// If DAG-CBOR fails, try as raw data
101
-
return Ok(None);
102
-
}
103
-
};
104
-
105
-
// Check if this looks like AT Protocol data
106
-
if let Ipld::Map(map) = &ipld {
107
-
// Look for AT Protocol patterns
108
-
if let Some(collection) = map.get("$type").and_then(|v| {
109
-
if let Ipld::String(s) = v {
110
-
Some(s.as_str())
111
-
} else {
112
-
None
203
+
cid: &atmst::Cid,
204
+
car_importer: &CarImporter,
205
+
) -> Result<Option<Value>> {
206
+
// Try to decode the block as CBOR IPLD directly with atmst::Cid
207
+
info!("๐ Attempting to decode CBOR for CID: {}", cid);
208
+
match car_importer.decode_cbor(cid) {
209
+
Ok(ipld) => {
210
+
info!(" โ
Successfully decoded CBOR for CID: {}", cid);
211
+
// Convert IPLD to JSON for processing by existing ingestors
212
+
match self.ipld_to_json(&ipld) {
213
+
Ok(json) => {
214
+
info!(" โ
Successfully converted IPLD to JSON for CID: {}", cid);
215
+
Ok(Some(json))
216
+
}
217
+
Err(e) => {
218
+
warn!(
219
+
" โ Failed to convert IPLD to JSON for CID {}: {}",
220
+
cid, e
221
+
);
222
+
Ok(None)
223
+
}
113
224
}
114
-
}) {
115
-
return Ok(Some(ExtractedData {
116
-
collection: collection.to_string(),
117
-
data: ipld,
118
-
cid: cid.clone(),
119
-
}));
120
225
}
121
-
122
-
// Check for commit structures
123
-
if map.contains_key("ops") && map.contains_key("prev") {
124
-
return Ok(Some(ExtractedData {
125
-
collection: "commit".to_string(),
126
-
data: ipld,
127
-
cid: cid.clone(),
128
-
}));
226
+
Err(e) => {
227
+
warn!(" โ Failed to decode CBOR for CID {}: {}", cid, e);
228
+
Ok(None)
129
229
}
130
230
}
131
-
132
-
Ok(None)
133
231
}
134
232
135
-
/// Process extracted AT Protocol data
136
-
async fn process_extracted_data(
233
+
/// Process a single extracted record through the appropriate ingestor
234
+
async fn process_extracted_record(
137
235
&self,
138
-
data: &ExtractedData,
139
-
cid: &Cid,
140
-
import_id: &str,
236
+
record: &ExtractedRecord,
237
+
_import_id: &str,
238
+
did: &str,
141
239
) -> Result<()> {
142
-
match data.collection.as_str() {
240
+
info!(
241
+
"Processing {} record with rkey: {}",
242
+
record.collection, record.rkey
243
+
);
244
+
245
+
info!(
246
+
"๐ Processing {} record: {}",
247
+
record.collection, record.rkey
248
+
);
249
+
match record.collection.as_str() {
143
250
"fm.teal.alpha.feed.play" => {
144
-
self.process_play_record(&data.data, cid, import_id).await?;
251
+
info!(" ๐ Processing play record...");
252
+
let result = self
253
+
.process_play_record(&record.data, did, &record.rkey)
254
+
.await;
255
+
if result.is_ok() {
256
+
info!(" โ
Successfully processed play record");
257
+
} else {
258
+
warn!(" โ Failed to process play record: {:?}", result);
259
+
}
260
+
result
145
261
}
146
262
"fm.teal.alpha.actor.profile" => {
147
-
self.process_profile_record(&data.data, cid, import_id)
148
-
.await?;
263
+
info!(" ๐ค Processing profile record...");
264
+
let result = self
265
+
.process_profile_record(&record.data, did, &record.rkey)
266
+
.await;
267
+
if result.is_ok() {
268
+
info!(" โ
Successfully processed profile record");
269
+
} else {
270
+
warn!(" โ Failed to process profile record: {:?}", result);
271
+
}
272
+
result
149
273
}
150
274
"fm.teal.alpha.actor.status" => {
151
-
self.process_status_record(&data.data, cid, import_id)
152
-
.await?;
153
-
}
154
-
"commit" => {
155
-
self.process_commit_record(&data.data, cid, import_id)
156
-
.await?;
275
+
info!(" ๐ข Processing status record...");
276
+
let result = self
277
+
.process_status_record(&record.data, did, &record.rkey)
278
+
.await;
279
+
if result.is_ok() {
280
+
info!(" โ
Successfully processed status record");
281
+
} else {
282
+
warn!(" โ Failed to process status record: {:?}", result);
283
+
}
284
+
result
157
285
}
158
286
_ => {
159
-
info!("Unhandled collection type: {}", data.collection);
287
+
warn!("โ Unknown Teal collection: {}", record.collection);
288
+
Ok(())
160
289
}
161
290
}
291
+
}
162
292
163
-
Ok(())
293
+
/// Check if a key represents a Teal record
294
+
fn is_teal_record_key(&self, key: &str) -> bool {
295
+
key.starts_with("fm.teal.alpha.") && key.contains("/")
164
296
}
165
297
166
-
/// Process a Teal play record from IPLD data
167
-
async fn process_play_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> {
168
-
// Convert IPLD to JSON value for processing by existing ingestors
169
-
let json_value = ipld_to_json(ipld)?;
298
+
/// Parse a Teal MST key to extract collection and rkey
299
+
fn parse_teal_key(&self, key: &str) -> Option<(String, String)> {
300
+
if let Some(slash_pos) = key.rfind('/') {
301
+
let collection = key[..slash_pos].to_string();
302
+
let rkey = key[slash_pos + 1..].to_string();
303
+
Some((collection, rkey))
304
+
} else {
305
+
None
306
+
}
307
+
}
170
308
171
-
// Delegate to existing play ingestor logic
172
-
if let Ok(play_record) =
173
-
serde_json::from_value::<types::fm::teal::alpha::feed::play::RecordData>(json_value)
309
+
/// Process a play record using the existing PlayIngestor
310
+
async fn process_play_record(&self, data: &Value, did: &str, rkey: &str) -> Result<()> {
311
+
match serde_json::from_value::<types::fm::teal::alpha::feed::play::RecordData>(data.clone())
174
312
{
175
-
info!("Importing play record from CAR: {}", play_record.track_name);
313
+
Ok(play_record) => {
314
+
let play_ingestor =
315
+
super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
316
+
let uri = super::super::teal::assemble_at_uri(did, "fm.teal.alpha.feed.play", rkey);
176
317
177
-
// Use existing play ingestor for consistency
178
-
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
318
+
play_ingestor
319
+
.insert_play(
320
+
&play_record,
321
+
&uri,
322
+
&format!("car-import-{}", uuid::Uuid::new_v4()),
323
+
did,
324
+
rkey,
325
+
)
326
+
.await?;
179
327
180
-
// Create a synthetic AT URI for the imported record
181
-
let synthetic_did = format!("car-import:{}", import_id);
182
-
let rkey = cid.to_string();
183
-
let uri = super::super::teal::assemble_at_uri(
184
-
&synthetic_did,
185
-
"fm.teal.alpha.feed.play",
186
-
&rkey,
187
-
);
328
+
info!(
329
+
"Successfully stored play record: {} by {:?}",
330
+
play_record.track_name, play_record.artist_names
331
+
);
332
+
Ok(())
333
+
}
334
+
Err(e) => {
335
+
warn!("Failed to deserialize play record data: {}", e);
336
+
Err(anyhow!("Invalid play record format: {}", e))
337
+
}
338
+
}
339
+
}
188
340
189
-
// Store using existing logic
190
-
play_ingestor
191
-
.insert_play(&play_record, &uri, &cid.to_string(), &synthetic_did, &rkey)
192
-
.await?;
341
+
/// Process a profile record using the existing ActorProfileIngestor
342
+
async fn process_profile_record(&self, data: &Value, did: &str, _rkey: &str) -> Result<()> {
343
+
match serde_json::from_value::<types::fm::teal::alpha::actor::profile::RecordData>(
344
+
data.clone(),
345
+
) {
346
+
Ok(profile_record) => {
347
+
let profile_ingestor =
348
+
super::super::teal::actor_profile::ActorProfileIngestor::new(self.sql.clone());
349
+
let did_typed = atrium_api::types::string::Did::new(did.to_string())
350
+
.map_err(|e| anyhow!("Failed to create Did: {}", e))?;
193
351
194
-
// Track the extracted record
195
-
// self.store_extracted_record(import_id, cid, "fm.teal.alpha.feed.play", Some(&uri)).await?;
352
+
profile_ingestor
353
+
.insert_profile(did_typed, &profile_record)
354
+
.await?;
355
+
356
+
info!(
357
+
"Successfully stored profile record: {:?}",
358
+
profile_record.display_name
359
+
);
360
+
Ok(())
361
+
}
362
+
Err(e) => {
363
+
warn!("Failed to deserialize profile record data: {}", e);
364
+
Err(anyhow!("Invalid profile record format: {}", e))
365
+
}
196
366
}
367
+
}
197
368
198
-
Ok(())
369
+
/// Process a status record using the existing ActorStatusIngestor
370
+
async fn process_status_record(&self, data: &Value, did: &str, rkey: &str) -> Result<()> {
371
+
match serde_json::from_value::<types::fm::teal::alpha::actor::status::RecordData>(
372
+
data.clone(),
373
+
) {
374
+
Ok(status_record) => {
375
+
let status_ingestor =
376
+
super::super::teal::actor_status::ActorStatusIngestor::new(self.sql.clone());
377
+
let did_typed = atrium_api::types::string::Did::new(did.to_string())
378
+
.map_err(|e| anyhow!("Failed to create Did: {}", e))?;
379
+
380
+
status_ingestor
381
+
.insert_status(
382
+
did_typed,
383
+
rkey,
384
+
&format!("car-import-{}", uuid::Uuid::new_v4()),
385
+
&status_record,
386
+
)
387
+
.await?;
388
+
389
+
info!("Successfully stored status record from CAR import");
390
+
Ok(())
391
+
}
392
+
Err(e) => {
393
+
warn!("Failed to deserialize status record data: {}", e);
394
+
Err(anyhow!("Invalid status record format: {}", e))
395
+
}
396
+
}
199
397
}
200
398
201
-
/// Process a Teal profile record from IPLD data
202
-
async fn process_profile_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> {
203
-
let json_value = ipld_to_json(ipld)?;
399
+
/// Fetch and process a CAR file from a PDS for a given identity
400
+
pub async fn fetch_and_process_identity_car(&self, handle_or_did: &str) -> Result<String> {
401
+
info!("Fetching CAR file for identity: {}", handle_or_did);
402
+
403
+
// Resolve to DID if needed
404
+
let did = if handle_or_did.starts_with("did:") {
405
+
handle_or_did.to_string()
406
+
} else {
407
+
self.resolve_handle_to_did(handle_or_did).await?
408
+
};
409
+
410
+
// Resolve DID to PDS
411
+
let pds_url = self.resolve_did_to_pds(&did).await?;
412
+
info!("Resolved {} to PDS: {}", did, pds_url);
204
413
205
-
if let Ok(profile_record) =
206
-
serde_json::from_value::<types::fm::teal::alpha::actor::profile::RecordData>(json_value)
207
-
{
208
-
info!(
209
-
"Importing profile record from CAR: {:?}",
210
-
profile_record.display_name
211
-
);
414
+
// Fetch CAR file
415
+
let car_data = self.fetch_car_from_pds(&pds_url, &did).await?;
212
416
213
-
// For now, just log until we have public methods on profile ingestor
214
-
info!(
215
-
"Would store profile record from CAR import {} with CID {}",
216
-
import_id, cid
217
-
);
417
+
// Generate import ID
418
+
let import_id = uuid::Uuid::new_v4().to_string();
218
419
219
-
// Track the extracted record
220
-
// self.store_extracted_record(import_id, cid, "fm.teal.alpha.actor.profile", None).await?;
221
-
}
420
+
// Process the CAR data
421
+
self.process_car_data(&car_data, &import_id, &did).await?;
222
422
223
-
Ok(())
423
+
Ok(import_id)
224
424
}
225
425
226
-
/// Process a Teal status record from IPLD data
227
-
async fn process_status_record(&self, ipld: &Ipld, cid: &Cid, import_id: &str) -> Result<()> {
228
-
let json_value = ipld_to_json(ipld)?;
426
+
/// Resolve handle to DID
427
+
async fn resolve_handle_to_did(&self, handle: &str) -> Result<String> {
428
+
let url = format!(
429
+
"https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}",
430
+
handle
431
+
);
432
+
let response: Value = reqwest::get(&url).await?.json().await?;
229
433
230
-
if let Ok(_status_record) =
231
-
serde_json::from_value::<types::fm::teal::alpha::actor::status::RecordData>(json_value)
232
-
{
233
-
info!("Importing status record from CAR");
434
+
response["did"]
435
+
.as_str()
436
+
.map(|s| s.to_string())
437
+
.ok_or_else(|| anyhow!("Failed to resolve handle to DID"))
438
+
}
234
439
235
-
// For now, just log until we have public methods on status ingestor
236
-
info!(
237
-
"Would store status record from CAR import {} with CID {}",
238
-
import_id, cid
239
-
);
440
+
/// Resolve DID to PDS URL
441
+
async fn resolve_did_to_pds(&self, did: &str) -> Result<String> {
442
+
let url = format!("https://plc.directory/{}", did);
443
+
let response: Value = reqwest::get(&url).await?.json().await?;
240
444
241
-
// Track the extracted record
242
-
// self.store_extracted_record(import_id, cid, "fm.teal.alpha.actor.status", None).await?;
445
+
if let Some(services) = response["service"].as_array() {
446
+
for service in services {
447
+
if service["id"] == "#atproto_pds" {
448
+
if let Some(endpoint) = service["serviceEndpoint"].as_str() {
449
+
return Ok(endpoint.to_string());
450
+
}
451
+
}
452
+
}
243
453
}
244
454
245
-
Ok(())
455
+
Err(anyhow!("Could not resolve PDS for DID: {}", did))
246
456
}
247
457
248
-
/// Process a commit record from IPLD data
249
-
async fn process_commit_record(
250
-
&self,
251
-
_ipld: &Ipld,
252
-
_cid: &Cid,
253
-
_import_id: &str,
254
-
) -> Result<()> {
255
-
info!("Processing commit record from CAR import");
458
+
/// Fetch CAR file from PDS
459
+
async fn fetch_car_from_pds(&self, pds_url: &str, did: &str) -> Result<Vec<u8>> {
460
+
let url = format!("{}/xrpc/com.atproto.sync.getRepo?did={}", pds_url, did);
461
+
let response = reqwest::get(&url).await?;
256
462
257
-
// Store commit metadata for tracking
258
-
// self.store_commit_metadata(ipld, cid, import_id).await?;
463
+
if !response.status().is_success() {
464
+
return Err(anyhow!(
465
+
"Failed to fetch CAR file: HTTP {}",
466
+
response.status()
467
+
));
468
+
}
469
+
470
+
let car_data = response.bytes().await?.to_vec();
471
+
info!("Fetched CAR file: {} bytes", car_data.len());
259
472
260
-
Ok(())
473
+
Ok(car_data)
261
474
}
262
475
263
-
/// Store CAR import metadata
264
-
async fn store_import_metadata(&self, _import_id: &str, _header: &CarHeader) -> Result<()> {
265
-
// TODO: Implement when database tables are ready
266
-
Ok(())
476
+
/// Helper: Convert IPLD to JSON
477
+
#[allow(clippy::only_used_in_recursion)]
478
+
fn ipld_to_json(&self, ipld: &atmst::Ipld) -> Result<Value> {
479
+
use atmst::Ipld;
480
+
481
+
match ipld {
482
+
Ipld::Null => Ok(Value::Null),
483
+
Ipld::Bool(b) => Ok(Value::Bool(*b)),
484
+
Ipld::Integer(i) => {
485
+
if let Ok(i64_val) = i64::try_from(*i) {
486
+
Ok(Value::Number(i64_val.into()))
487
+
} else {
488
+
Ok(Value::String(i.to_string()))
489
+
}
490
+
}
491
+
Ipld::Float(f) => {
492
+
if let Some(num) = serde_json::Number::from_f64(*f) {
493
+
Ok(Value::Number(num))
494
+
} else {
495
+
Err(anyhow!("Invalid float value"))
496
+
}
497
+
}
498
+
Ipld::String(s) => Ok(Value::String(s.clone())),
499
+
Ipld::Bytes(b) => Ok(Value::String(
500
+
base64::engine::general_purpose::STANDARD.encode(b),
501
+
)),
502
+
Ipld::List(list) => {
503
+
let json_array: Result<Vec<Value>> =
504
+
list.iter().map(|v| self.ipld_to_json(v)).collect();
505
+
Ok(Value::Array(json_array?))
506
+
}
507
+
Ipld::Map(map) => {
508
+
let mut json_map = serde_json::Map::new();
509
+
for (key, value) in map {
510
+
json_map.insert(key.clone(), self.ipld_to_json(value)?);
511
+
}
512
+
Ok(Value::Object(json_map))
513
+
}
514
+
Ipld::Link(cid) => Ok(Value::String(cid.to_string())),
515
+
}
267
516
}
517
+
}
268
518
269
-
/// Mark import as complete
270
-
async fn mark_import_complete(&self, _import_id: &str, _block_count: i32) -> Result<()> {
271
-
// TODO: Implement when database tables are ready
519
+
#[async_trait]
520
+
impl LexiconIngestor for CarImportIngestor {
521
+
async fn ingest(&self, message: Event<Value>) -> Result<()> {
522
+
let commit = message
523
+
.commit
524
+
.as_ref()
525
+
.ok_or_else(|| anyhow!("CarImportIngestor requires a commit event"))?;
526
+
527
+
let record = commit
528
+
.record
529
+
.as_ref()
530
+
.ok_or_else(|| anyhow!("CarImportIngestor requires a record in the commit"))?;
531
+
532
+
// Enqueue CAR import job into Redis
533
+
let job = CarImportJob {
534
+
request_id: uuid::Uuid::new_v4(),
535
+
identity: record
536
+
.get("identity")
537
+
.and_then(|v| v.as_str())
538
+
.ok_or_else(|| anyhow!("Missing identity in record"))?
539
+
.to_string(),
540
+
since: None,
541
+
created_at: chrono::Utc::now(),
542
+
description: None,
543
+
};
544
+
let job_payload = serde_json::to_string(&job)?;
545
+
let mut conn = self.get_redis_connection().await?;
546
+
// Specify the expected return type to avoid FromRedisValue fallback issues in edition 2024
547
+
let _: () = conn.lpush(queue_keys::CAR_IMPORT_JOBS, job_payload).await?;
548
+
tracing::info!("Enqueued CAR import job: {}", job.request_id);
549
+
272
550
Ok(())
273
551
}
552
+
}
274
553
275
-
/// Store raw IPLD block
276
-
async fn store_raw_block(
277
-
&self,
278
-
_cid: &Cid,
279
-
_block_data: &[u8],
280
-
_import_id: &str,
281
-
) -> Result<()> {
282
-
// TODO: Implement when database tables are ready
283
-
Ok(())
554
+
#[allow(dead_code)]
555
+
impl CarImportIngestor {
556
+
/// Download CAR file from URL
557
+
async fn download_car_file(&self, url: &str) -> Result<Vec<u8>> {
558
+
let response = reqwest::get(url).await?;
559
+
Ok(response.bytes().await?.to_vec())
284
560
}
285
561
286
-
/// Store commit metadata
287
-
async fn store_commit_metadata(&self, _ipld: &Ipld, _cid: &Cid, import_id: &str) -> Result<()> {
288
-
info!("Would store commit metadata from CAR import {}", import_id);
289
-
Ok(())
562
+
/// Import CAR data from bytes (public interface)
563
+
pub async fn import_car_bytes(&self, car_data: &[u8], did: &str) -> Result<String> {
564
+
let import_id = uuid::Uuid::new_v4().to_string();
565
+
self.process_car_data(car_data, &import_id, did).await?;
566
+
Ok(import_id)
290
567
}
291
568
292
-
/// Store extracted record tracking
293
-
async fn store_extracted_record(
294
-
&self,
295
-
_import_id: &str,
296
-
_cid: &Cid,
297
-
_collection: &str,
298
-
_record_uri: Option<&str>,
299
-
) -> Result<()> {
300
-
// TODO: Implement when database tables are ready
301
-
Ok(())
569
+
/// Consolidate synthetic artists with MusicBrainz artists
570
+
pub async fn consolidate_synthetic_artists(&self, min_confidence: f64) -> Result<usize> {
571
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
572
+
play_ingestor
573
+
.consolidate_synthetic_artists(min_confidence)
574
+
.await
302
575
}
303
576
304
-
/// Fetch and process CAR file for a given identity (handle or DID)
305
-
pub async fn fetch_and_process_identity_car(&self, identity: &str) -> Result<String> {
306
-
info!(
307
-
"Starting CAR fetch and processing for identity: {}",
308
-
identity
309
-
);
577
+
/// Consolidate duplicate releases
578
+
pub async fn consolidate_duplicate_releases(&self, min_confidence: f64) -> Result<usize> {
579
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
580
+
play_ingestor
581
+
.consolidate_duplicate_releases(min_confidence)
582
+
.await
583
+
}
310
584
311
-
// Resolve identity to DID and PDS
312
-
let (user_did, pds_host) = self.resolve_user_to_pds(identity).await?;
313
-
info!(
314
-
"Resolved {} to DID {} on PDS {}",
315
-
identity, user_did, pds_host
316
-
);
585
+
/// Consolidate duplicate recordings
586
+
pub async fn consolidate_duplicate_recordings(&self, min_confidence: f64) -> Result<usize> {
587
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
588
+
play_ingestor
589
+
.consolidate_duplicate_recordings(min_confidence)
590
+
.await
591
+
}
317
592
318
-
// Fetch CAR file from PDS
319
-
let car_data = self.fetch_car_from_pds(&pds_host, &user_did, None).await?;
320
-
info!(
321
-
"Successfully fetched CAR file for {} ({} bytes)",
322
-
user_did,
323
-
car_data.len()
324
-
);
593
+
/// Preview consolidation candidates before running consolidation
594
+
pub async fn preview_consolidation_candidates(&self, min_confidence: f64) -> Result<()> {
595
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
596
+
play_ingestor
597
+
.preview_consolidation_candidates(min_confidence)
598
+
.await
599
+
}
325
600
326
-
// Generate import ID
327
-
let import_id = format!(
328
-
"pds-{}-{}",
329
-
user_did.replace(":", "-"),
330
-
chrono::Utc::now().timestamp()
331
-
);
601
+
/// Run full batch consolidation for all entity types
602
+
pub async fn run_full_consolidation(&self) -> Result<()> {
603
+
let play_ingestor = super::super::teal::feed_play::PlayIngestor::new(self.sql.clone());
604
+
play_ingestor.run_full_consolidation().await
605
+
}
606
+
}
332
607
333
-
// Process through existing pipeline
334
-
self.process_car_data(&car_data, &import_id).await?;
608
+
// Removed unused helper struct for extracted records.
335
609
336
-
info!("โ
CAR import completed successfully for {}", identity);
337
-
Ok(import_id)
338
-
}
610
+
#[cfg(test)]
611
+
mod tests {
612
+
use super::*;
613
+
use atmst::{CarBuilder, Ipld};
614
+
use std::collections::BTreeMap;
339
615
340
-
/// Resolve a user identifier (DID or handle) to their DID and PDS host
341
-
async fn resolve_user_to_pds(&self, user_identifier: &str) -> Result<(String, String)> {
342
-
if user_identifier.starts_with("did:") {
343
-
// User provided a DID directly, resolve to PDS
344
-
let pds_host = self.resolve_did_to_pds(user_identifier).await?;
345
-
Ok((user_identifier.to_string(), pds_host))
346
-
} else {
347
-
// User provided a handle, resolve to DID then PDS
348
-
let user_did = self.resolve_handle_to_did(user_identifier).await?;
349
-
let pds_host = self.resolve_did_to_pds(&user_did).await?;
350
-
Ok((user_did, pds_host))
351
-
}
616
+
fn create_mock_teal_play_record() -> Ipld {
617
+
let mut record = BTreeMap::new();
618
+
record.insert(
619
+
"$type".to_string(),
620
+
Ipld::String("fm.teal.alpha.feed.play".to_string()),
621
+
);
622
+
record.insert(
623
+
"track_name".to_string(),
624
+
Ipld::String("Test Song".to_string()),
625
+
);
626
+
record.insert(
627
+
"artist_names".to_string(),
628
+
Ipld::List(vec![Ipld::String("Test Artist".to_string())]),
629
+
);
630
+
record.insert("duration".to_string(), Ipld::Integer(180000));
631
+
record.insert(
632
+
"created_at".to_string(),
633
+
Ipld::String("2024-01-01T00:00:00Z".to_string()),
634
+
);
635
+
Ipld::Map(record)
352
636
}
353
637
354
-
/// Resolve a handle to a DID using com.atproto.identity.resolveHandle
355
-
async fn resolve_handle_to_did(&self, handle: &str) -> Result<String> {
356
-
let url = format!(
357
-
"https://bsky.social/xrpc/com.atproto.identity.resolveHandle?handle={}",
358
-
handle
638
+
fn create_mock_teal_profile_record() -> Ipld {
639
+
let mut record = BTreeMap::new();
640
+
record.insert(
641
+
"$type".to_string(),
642
+
Ipld::String("fm.teal.alpha.actor.profile".to_string()),
359
643
);
644
+
record.insert(
645
+
"display_name".to_string(),
646
+
Ipld::String("Test User".to_string()),
647
+
);
648
+
record.insert(
649
+
"description".to_string(),
650
+
Ipld::String("Music lover".to_string()),
651
+
);
652
+
Ipld::Map(record)
653
+
}
360
654
361
-
let response = reqwest::get(&url).await?;
362
-
if !response.status().is_success() {
363
-
return Err(anyhow!(
364
-
"Failed to resolve handle {}: {}",
365
-
handle,
366
-
response.status()
367
-
));
368
-
}
655
+
async fn create_test_car_with_teal_records() -> Result<Bytes> {
656
+
let mut builder = CarBuilder::new();
369
657
370
-
let json: serde_json::Value = response.json().await?;
371
-
let did = json["did"]
372
-
.as_str()
373
-
.ok_or_else(|| anyhow!("No DID found in response for handle {}", handle))?;
658
+
// Create test Teal records
659
+
let play_record = create_mock_teal_play_record();
660
+
let profile_record = create_mock_teal_profile_record();
374
661
375
-
Ok(did.to_string())
376
-
}
662
+
// Add records to CAR
663
+
let play_cid = builder.add_cbor(&play_record)?;
664
+
let profile_cid = builder.add_cbor(&profile_record)?;
377
665
378
-
/// Resolve a DID to their PDS host using DID document
379
-
async fn resolve_did_to_pds(&self, did: &str) -> Result<String> {
380
-
// For DID:plc, use the PLC directory
381
-
if did.starts_with("did:plc:") {
382
-
let url = format!("https://plc.directory/{}", did);
666
+
// Add roots (in a real MST, these would be MST nodes, but for testing this is sufficient)
667
+
builder.add_root(play_cid);
668
+
builder.add_root(profile_cid);
383
669
384
-
let response = reqwest::get(&url).await?;
385
-
if !response.status().is_success() {
386
-
return Err(anyhow!(
387
-
"Failed to resolve DID {}: {}",
388
-
did,
389
-
response.status()
390
-
));
391
-
}
670
+
let importer = builder.build();
671
+
importer
672
+
.export_to_bytes()
673
+
.await
674
+
.map_err(|e| anyhow!("Failed to export CAR: {}", e))
675
+
}
392
676
393
-
let doc: serde_json::Value = response.json().await?;
677
+
#[test]
678
+
fn test_parse_teal_key() {
679
+
// This test doesn't need a database connection or async
680
+
let key = "fm.teal.alpha.feed.play/3k2akjdlkjsf";
394
681
395
-
// Find the PDS service endpoint
396
-
if let Some(services) = doc["service"].as_array() {
397
-
for service in services {
398
-
if service["id"].as_str() == Some("#atproto_pds") {
399
-
if let Some(endpoint) = service["serviceEndpoint"].as_str() {
400
-
// Extract hostname from URL
401
-
let parsed_url = url::Url::parse(endpoint)?;
402
-
let host = parsed_url
403
-
.host_str()
404
-
.ok_or_else(|| anyhow!("Invalid PDS endpoint URL: {}", endpoint))?;
405
-
return Ok(host.to_string());
406
-
}
407
-
}
408
-
}
409
-
}
682
+
// Test the parsing logic directly
683
+
if let Some(slash_pos) = key.rfind('/') {
684
+
let collection = key[..slash_pos].to_string();
685
+
let rkey = key[slash_pos + 1..].to_string();
410
686
411
-
Err(anyhow!("No PDS service found in DID document for {}", did))
687
+
assert_eq!(collection, "fm.teal.alpha.feed.play");
688
+
assert_eq!(rkey, "3k2akjdlkjsf");
412
689
} else {
413
-
Err(anyhow!("Unsupported DID method: {}", did))
690
+
panic!("Should have found slash in key");
414
691
}
415
692
}
416
693
417
-
/// Fetch CAR file from PDS using com.atproto.sync.getRepo
418
-
async fn fetch_car_from_pds(
419
-
&self,
420
-
pds_host: &str,
421
-
did: &str,
422
-
since: Option<&str>,
423
-
) -> Result<Vec<u8>> {
424
-
let mut url = format!(
425
-
"https://{}/xrpc/com.atproto.sync.getRepo?did={}",
426
-
pds_host, did
427
-
);
428
-
429
-
if let Some(since_rev) = since {
430
-
url.push_str(&format!("&since={}", since_rev));
694
+
#[test]
695
+
fn test_is_teal_record_key() {
696
+
// Test the logic directly without needing an ingestor instance
697
+
fn is_teal_record_key(key: &str) -> bool {
698
+
key.starts_with("fm.teal.alpha.") && key.contains("/")
431
699
}
432
700
433
-
info!("Fetching CAR file from: {}", url);
701
+
assert!(is_teal_record_key("fm.teal.alpha.feed.play/abc123"));
702
+
assert!(is_teal_record_key("fm.teal.alpha.profile/def456"));
703
+
assert!(!is_teal_record_key("app.bsky.feed.post/xyz789"));
704
+
assert!(!is_teal_record_key("fm.teal.alpha.feed.play")); // No rkey
705
+
}
434
706
435
-
let response = reqwest::get(&url).await?;
436
-
if !response.status().is_success() {
437
-
return Err(anyhow!(
438
-
"Failed to fetch CAR from PDS {}: {}",
439
-
pds_host,
440
-
response.status()
441
-
));
442
-
}
707
+
#[test]
708
+
fn test_ipld_to_json_conversion() {
709
+
// Test IPLD to JSON conversion logic directly
710
+
use atmst::Ipld;
711
+
use std::collections::BTreeMap;
443
712
444
-
// Verify content type
445
-
let content_type = response
446
-
.headers()
447
-
.get("content-type")
448
-
.and_then(|h| h.to_str().ok())
449
-
.unwrap_or("");
713
+
let mut record = BTreeMap::new();
714
+
record.insert(
715
+
"$type".to_string(),
716
+
Ipld::String("fm.teal.alpha.feed.play".to_string()),
717
+
);
718
+
record.insert(
719
+
"track_name".to_string(),
720
+
Ipld::String("Test Song".to_string()),
721
+
);
722
+
record.insert("duration".to_string(), Ipld::Integer(180000));
723
+
let play_record = Ipld::Map(record);
450
724
451
-
if !content_type.contains("application/vnd.ipld.car") {
452
-
return Err(anyhow!("Unexpected content type: {}", content_type));
725
+
// Test the conversion logic inline
726
+
fn ipld_to_json(ipld: &Ipld) -> Result<Value> {
727
+
match ipld {
728
+
Ipld::Null => Ok(Value::Null),
729
+
Ipld::Bool(b) => Ok(Value::Bool(*b)),
730
+
Ipld::Integer(i) => {
731
+
if let Ok(i64_val) = i64::try_from(*i) {
732
+
Ok(Value::Number(i64_val.into()))
733
+
} else {
734
+
Ok(Value::String(i.to_string()))
735
+
}
736
+
}
737
+
Ipld::String(s) => Ok(Value::String(s.clone())),
738
+
Ipld::Map(map) => {
739
+
let mut json_map = serde_json::Map::new();
740
+
for (key, value) in map {
741
+
json_map.insert(key.clone(), ipld_to_json(value)?);
742
+
}
743
+
Ok(Value::Object(json_map))
744
+
}
745
+
_ => Ok(Value::Null), // Simplified for test
746
+
}
453
747
}
454
748
455
-
let car_data = response.bytes().await?;
456
-
Ok(car_data.to_vec())
749
+
let json_result = ipld_to_json(&play_record);
750
+
assert!(json_result.is_ok());
751
+
let json = json_result.unwrap();
752
+
assert_eq!(json["$type"], "fm.teal.alpha.feed.play");
753
+
assert_eq!(json["track_name"], "Test Song");
754
+
assert_eq!(json["duration"], 180000);
457
755
}
458
-
}
459
756
460
-
#[async_trait]
461
-
impl LexiconIngestor for CarImportIngestor {
462
-
async fn ingest(&self, message: Event<Value>) -> Result<()> {
463
-
// For CAR imports, we expect the message to contain CAR file data
464
-
// This could be a file path, URL, or base64 encoded data
757
+
#[tokio::test]
758
+
async fn test_car_creation_and_basic_parsing() -> Result<()> {
759
+
// Test that we can create a CAR file with Teal records and parse it
760
+
let car_bytes = create_test_car_with_teal_records().await?;
465
761
466
-
if let Some(commit) = &message.commit {
467
-
if let Some(record) = &commit.record {
468
-
// Check if this is a CAR import request
469
-
if let Some(car_data_field) = record.get("carData") {
470
-
let import_id = format!("{}:{}", message.did, commit.rkey);
762
+
// Verify we can import the CAR with atmst
763
+
let mut importer = CarImporter::new();
764
+
importer.import_from_bytes(car_bytes).await?;
765
+
766
+
assert!(!importer.is_empty());
767
+
assert!(importer.len() >= 2); // Should have at least our 2 test records
471
768
472
-
match car_data_field {
473
-
Value::String(base64_data) => {
474
-
// Decode base64 CAR data
475
-
if let Ok(car_bytes) = general_purpose::STANDARD.decode(base64_data) {
476
-
self.process_car_data(&car_bytes, &import_id).await?;
477
-
} else {
478
-
return Err(anyhow!("Invalid base64 CAR data"));
479
-
}
480
-
}
481
-
Value::Object(obj) => {
482
-
// Handle different CAR data formats (URL, file path, etc.)
483
-
if let Some(Value::String(url)) = obj.get("url") {
484
-
// Download and process CAR from URL
485
-
let car_bytes = self.download_car_file(url).await?;
486
-
self.process_car_data(&car_bytes, &import_id).await?;
487
-
}
488
-
}
489
-
_ => {
490
-
return Err(anyhow!("Unsupported CAR data format"));
491
-
}
492
-
}
493
-
} else {
494
-
return Err(anyhow!("No CAR data found in record"));
769
+
// Test that we can decode the records
770
+
for cid in importer.cids() {
771
+
if let Ok(Ipld::Map(map)) = importer.decode_cbor(&cid) {
772
+
if let Some(Ipld::String(record_type)) = map.get("$type") {
773
+
assert!(record_type.starts_with("fm.teal.alpha."));
774
+
println!("Found Teal record: {}", record_type);
495
775
}
496
776
}
497
777
}
498
778
499
779
Ok(())
500
780
}
501
-
}
781
+
782
+
#[tokio::test]
783
+
#[ignore = "requires database connection"]
784
+
async fn test_full_car_import_integration() -> Result<()> {
785
+
// This test requires a real database connection
786
+
let database_url = std::env::var("DATABASE_URL")
787
+
.unwrap_or_else(|_| "postgresql://localhost/teal_test".to_string());
788
+
789
+
let pool = sqlx::PgPool::connect(&database_url).await?;
790
+
let ingestor = CarImportIngestor::new(pool);
502
791
503
-
impl CarImportIngestor {
504
-
/// Download CAR file from URL
505
-
async fn download_car_file(&self, url: &str) -> Result<Vec<u8>> {
506
-
let response = reqwest::get(url).await?;
507
-
let bytes = response.bytes().await?;
508
-
Ok(bytes.to_vec())
509
-
}
510
-
}
792
+
// Create test CAR with Teal records
793
+
let car_bytes = create_test_car_with_teal_records().await?;
794
+
795
+
// Test the full import process
796
+
let import_id = uuid::Uuid::new_v4().to_string();
797
+
let test_did = "did:plc:test123";
511
798
512
-
/// Helper struct for extracted AT Protocol data
513
-
#[derive(Debug)]
514
-
struct ExtractedData {
515
-
collection: String,
516
-
data: Ipld,
517
-
cid: Cid,
518
-
}
799
+
// This should work with our new atmst implementation
800
+
let result = ingestor
801
+
.process_car_data(&car_bytes, &import_id, test_did)
802
+
.await;
519
803
520
-
/// Convert IPLD to JSON Value for compatibility with existing ingestors
521
-
fn ipld_to_json(ipld: &Ipld) -> Result<Value> {
522
-
match ipld {
523
-
Ipld::Null => Ok(Value::Null),
524
-
Ipld::Bool(b) => Ok(Value::Bool(*b)),
525
-
Ipld::Integer(i) => {
526
-
// Convert i128 to i64 for JSON compatibility
527
-
if let Ok(i64_val) = i64::try_from(*i) {
528
-
Ok(Value::Number(i64_val.into()))
529
-
} else {
530
-
// Fall back to string representation for very large integers
531
-
Ok(Value::String(i.to_string()))
532
-
}
533
-
}
534
-
Ipld::Float(f) => {
535
-
if let Some(num) = serde_json::Number::from_f64(*f) {
536
-
Ok(Value::Number(num))
537
-
} else {
538
-
Err(anyhow!("Invalid float value"))
804
+
// For now, we expect this to work but records might not actually get stored
805
+
// because the test CAR doesn't have proper MST structure
806
+
match result {
807
+
Ok(()) => {
808
+
println!("โ
CAR import completed successfully");
539
809
}
540
-
}
541
-
Ipld::String(s) => Ok(Value::String(s.clone())),
542
-
Ipld::Bytes(b) => {
543
-
// Convert bytes to base64 string
544
-
Ok(Value::String(general_purpose::STANDARD.encode(b)))
545
-
}
546
-
Ipld::List(list) => {
547
-
let json_array: Result<Vec<Value>> = list.iter().map(ipld_to_json).collect();
548
-
Ok(Value::Array(json_array?))
549
-
}
550
-
Ipld::Map(map) => {
551
-
let mut json_map = serde_json::Map::new();
552
-
for (key, value) in map {
553
-
json_map.insert(key.clone(), ipld_to_json(value)?);
810
+
Err(e) => {
811
+
println!("โ ๏ธ CAR import failed (expected for test data): {}", e);
812
+
// This is expected since our test CAR doesn't have proper MST structure
554
813
}
555
-
Ok(Value::Object(json_map))
556
814
}
557
-
Ipld::Link(cid) => {
558
-
// Convert CID to string representation
559
-
Ok(Value::String(cid.to_string()))
560
-
}
815
+
816
+
Ok(())
561
817
}
562
818
}
+51
services/cadet/src/ingestors/car/jobs.rs
+51
services/cadet/src/ingestors/car/jobs.rs
···
1
+
use chrono::{DateTime, Utc};
2
+
use serde::{Deserialize, Serialize};
3
+
use uuid::Uuid;
4
+
5
+
#[derive(Debug, Clone, Serialize, Deserialize)]
6
+
pub struct CarImportJob {
7
+
pub request_id: Uuid,
8
+
pub identity: String,
9
+
pub since: Option<DateTime<Utc>>,
10
+
pub created_at: DateTime<Utc>,
11
+
pub description: Option<String>,
12
+
}
13
+
14
+
#[derive(Debug, Clone, Serialize, Deserialize)]
15
+
pub struct CarImportJobStatus {
16
+
pub status: JobStatus,
17
+
pub created_at: DateTime<Utc>,
18
+
pub started_at: Option<DateTime<Utc>>,
19
+
pub completed_at: Option<DateTime<Utc>>,
20
+
pub error_message: Option<String>,
21
+
pub progress: Option<JobProgress>,
22
+
}
23
+
24
+
#[derive(Debug, Clone, Serialize, Deserialize)]
25
+
pub enum JobStatus {
26
+
Pending,
27
+
Processing,
28
+
Completed,
29
+
Failed,
30
+
Cancelled,
31
+
}
32
+
33
+
#[derive(Debug, Clone, Serialize, Deserialize)]
34
+
pub struct JobProgress {
35
+
pub step: String,
36
+
pub user_did: Option<String>,
37
+
pub pds_host: Option<String>,
38
+
pub car_size_bytes: Option<u64>,
39
+
pub blocks_processed: Option<u64>,
40
+
}
41
+
42
+
pub mod queue_keys {
43
+
use uuid::Uuid;
44
+
45
+
pub const CAR_IMPORT_JOBS: &str = "car_import_jobs";
46
+
pub const CAR_IMPORT_STATUS_PREFIX: &str = "car_import_status";
47
+
48
+
pub fn job_status_key(job_id: &Uuid) -> String {
49
+
format!("{}:{}", CAR_IMPORT_STATUS_PREFIX, job_id)
50
+
}
51
+
}
+2
-1
services/cadet/src/ingestors/car/mod.rs
+2
-1
services/cadet/src/ingestors/car/mod.rs
+7
-7
services/cadet/src/ingestors/teal/actor_status.rs
+7
-7
services/cadet/src/ingestors/teal/actor_status.rs
···
23
23
status: &types::fm::teal::alpha::actor::status::RecordData,
24
24
) -> anyhow::Result<()> {
25
25
let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey);
26
-
26
+
27
27
let record_json = serde_json::to_value(status)?;
28
-
28
+
29
29
sqlx::query!(
30
30
r#"
31
31
INSERT INTO statii (uri, did, rkey, cid, record)
···
43
43
)
44
44
.execute(&self.sql)
45
45
.await?;
46
-
46
+
47
47
Ok(())
48
48
}
49
49
50
50
pub async fn remove_status(&self, did: Did, rkey: &str) -> anyhow::Result<()> {
51
51
let uri = assemble_at_uri(did.as_str(), "fm.teal.alpha.actor.status", rkey);
52
-
52
+
53
53
sqlx::query!(
54
54
r#"
55
55
DELETE FROM statii WHERE uri = $1
···
58
58
)
59
59
.execute(&self.sql)
60
60
.await?;
61
-
61
+
62
62
Ok(())
63
63
}
64
64
}
···
71
71
let record = serde_json::from_value::<
72
72
types::fm::teal::alpha::actor::status::RecordData,
73
73
>(record.clone())?;
74
-
74
+
75
75
if let Some(ref commit) = message.commit {
76
76
if let Some(ref cid) = commit.cid {
77
77
self.insert_status(
···
98
98
}
99
99
Ok(())
100
100
}
101
-
}
101
+
}
+1132
-62
services/cadet/src/ingestors/teal/feed_play.rs
+1132
-62
services/cadet/src/ingestors/teal/feed_play.rs
···
7
7
8
8
use super::assemble_at_uri;
9
9
10
+
#[derive(Debug, Clone)]
11
+
struct FuzzyMatchCandidate {
12
+
artist_id: i32,
13
+
name: String,
14
+
confidence: f64,
15
+
}
16
+
17
+
struct MusicBrainzCleaner;
18
+
19
+
impl MusicBrainzCleaner {
20
+
/// List of common "guff" words found in parentheses that should be removed
21
+
const GUFF_WORDS: &'static [&'static str] = &[
22
+
"a cappella",
23
+
"acoustic",
24
+
"bonus",
25
+
"censored",
26
+
"clean",
27
+
"club",
28
+
"clubmix",
29
+
"composition",
30
+
"cut",
31
+
"dance",
32
+
"demo",
33
+
"dialogue",
34
+
"dirty",
35
+
"edit",
36
+
"excerpt",
37
+
"explicit",
38
+
"extended",
39
+
"feat",
40
+
"featuring",
41
+
"ft",
42
+
"instrumental",
43
+
"interlude",
44
+
"intro",
45
+
"karaoke",
46
+
"live",
47
+
"long",
48
+
"main",
49
+
"maxi",
50
+
"megamix",
51
+
"mix",
52
+
"mono",
53
+
"official",
54
+
"orchestral",
55
+
"original",
56
+
"outro",
57
+
"outtake",
58
+
"outtakes",
59
+
"piano",
60
+
"quadraphonic",
61
+
"radio",
62
+
"rap",
63
+
"re-edit",
64
+
"reedit",
65
+
"refix",
66
+
"rehearsal",
67
+
"reinterpreted",
68
+
"released",
69
+
"release",
70
+
"remake",
71
+
"remastered",
72
+
"remaster",
73
+
"master",
74
+
"remix",
75
+
"remixed",
76
+
"remode",
77
+
"reprise",
78
+
"rework",
79
+
"reworked",
80
+
"rmx",
81
+
"session",
82
+
"short",
83
+
"single",
84
+
"skit",
85
+
"stereo",
86
+
"studio",
87
+
"take",
88
+
"takes",
89
+
"tape",
90
+
"track",
91
+
"tryout",
92
+
"uncensored",
93
+
"unknown",
94
+
"unplugged",
95
+
"untitled",
96
+
"version",
97
+
"ver",
98
+
"video",
99
+
"vocal",
100
+
"vs",
101
+
"with",
102
+
"without",
103
+
];
104
+
105
+
/// Clean artist name by removing common variations and guff
106
+
fn clean_artist_name(name: &str) -> String {
107
+
let mut cleaned = name.trim().to_string();
108
+
109
+
// Remove common featuring patterns
110
+
if let Some(pos) = cleaned.to_lowercase().find(" feat") {
111
+
cleaned = cleaned[..pos].trim().to_string();
112
+
}
113
+
if let Some(pos) = cleaned.to_lowercase().find(" ft.") {
114
+
cleaned = cleaned[..pos].trim().to_string();
115
+
}
116
+
if let Some(pos) = cleaned.to_lowercase().find(" featuring") {
117
+
cleaned = cleaned[..pos].trim().to_string();
118
+
}
119
+
120
+
// Remove parenthetical content if it looks like guff
121
+
if let Some(start) = cleaned.find('(') {
122
+
if let Some(end) = cleaned.find(')') {
123
+
let paren_content = &cleaned[start + 1..end].to_lowercase();
124
+
if Self::is_likely_guff(paren_content) {
125
+
cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..])
126
+
.trim()
127
+
.to_string();
128
+
}
129
+
}
130
+
}
131
+
132
+
// Remove brackets with guff
133
+
if let Some(start) = cleaned.find('[') {
134
+
if let Some(end) = cleaned.find(']') {
135
+
let bracket_content = &cleaned[start + 1..end].to_lowercase();
136
+
if Self::is_likely_guff(bracket_content) {
137
+
cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..])
138
+
.trim()
139
+
.to_string();
140
+
}
141
+
}
142
+
}
143
+
144
+
// Remove common prefixes/suffixes
145
+
if cleaned.to_lowercase().starts_with("the ") && cleaned.len() > 4 {
146
+
let without_the = &cleaned[4..];
147
+
if !without_the.trim().is_empty() {
148
+
return without_the.trim().to_string();
149
+
}
150
+
}
151
+
152
+
cleaned.trim().to_string()
153
+
}
154
+
155
+
/// Clean track name by removing common variations and guff
156
+
fn clean_track_name(name: &str) -> String {
157
+
let mut cleaned = name.trim().to_string();
158
+
159
+
// Remove parenthetical content if it looks like guff
160
+
if let Some(start) = cleaned.find('(') {
161
+
if let Some(end) = cleaned.find(')') {
162
+
let paren_content = &cleaned[start + 1..end].to_lowercase();
163
+
if Self::is_likely_guff(paren_content) {
164
+
cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 1..])
165
+
.trim()
166
+
.to_string();
167
+
}
168
+
}
169
+
}
170
+
171
+
// Remove featuring artists from track titles
172
+
if let Some(pos) = cleaned.to_lowercase().find(" feat") {
173
+
cleaned = cleaned[..pos].trim().to_string();
174
+
}
175
+
if let Some(pos) = cleaned.to_lowercase().find(" ft.") {
176
+
cleaned = cleaned[..pos].trim().to_string();
177
+
}
178
+
179
+
cleaned.trim().to_string()
180
+
}
181
+
182
+
/// Check if parenthetical content is likely "guff" that should be removed
183
+
fn is_likely_guff(content: &str) -> bool {
184
+
let content_lower = content.to_lowercase();
185
+
let words: Vec<&str> = content_lower.split_whitespace().collect();
186
+
187
+
// If most words are guff words, consider it guff
188
+
let guff_word_count = words
189
+
.iter()
190
+
.filter(|word| Self::GUFF_WORDS.contains(word))
191
+
.count();
192
+
193
+
// Also check for years (19XX or 20XX)
194
+
let has_year = content_lower.chars().collect::<String>().contains("19")
195
+
|| content_lower.contains("20");
196
+
197
+
// Consider it guff if >50% are guff words, or if it contains years, or if it's short and common
198
+
guff_word_count > words.len() / 2
199
+
|| has_year
200
+
|| (words.len() <= 2
201
+
&& Self::GUFF_WORDS
202
+
.iter()
203
+
.any(|&guff| content_lower.contains(guff)))
204
+
}
205
+
206
+
/// Normalize text for comparison (remove special chars, lowercase, etc.)
207
+
fn normalize_for_comparison(text: &str) -> String {
208
+
text.chars()
209
+
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
210
+
.collect::<String>()
211
+
.to_lowercase()
212
+
.split_whitespace()
213
+
.collect::<Vec<&str>>()
214
+
.join(" ")
215
+
}
216
+
}
217
+
10
218
pub struct PlayIngestor {
11
219
sql: PgPool,
12
220
}
···
58
266
Self { sql }
59
267
}
60
268
61
-
/// Inserts or updates an artist in the database.
62
-
/// Returns the Uuid of the artist.
63
-
async fn insert_artist(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> {
64
-
let artist_uuid = Uuid::parse_str(mbid)?;
65
-
let res = sqlx::query!(
269
+
/// Batch consolidate synthetic artists that match existing MusicBrainz artists
270
+
pub async fn consolidate_synthetic_artists(
271
+
&self,
272
+
min_confidence: f64,
273
+
) -> anyhow::Result<usize> {
274
+
tracing::info!(
275
+
"๐ Starting batch consolidation of synthetic artists with confidence >= {:.2}",
276
+
min_confidence
277
+
);
278
+
279
+
let consolidation_candidates = sqlx::query!(
280
+
r#"
281
+
SELECT DISTINCT
282
+
ae1.id as synthetic_id,
283
+
ae1.name as synthetic_name,
284
+
ae2.id as target_id,
285
+
ae2.name as target_name,
286
+
ae2.mbid as target_mbid,
287
+
similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score
288
+
FROM artists_extended ae1
289
+
CROSS JOIN artists_extended ae2
290
+
WHERE ae1.id != ae2.id
291
+
AND ae1.mbid_type = 'synthetic'
292
+
AND ae2.mbid_type = 'musicbrainz'
293
+
AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1
294
+
ORDER BY similarity_score DESC
295
+
"#,
296
+
min_confidence as f32
297
+
)
298
+
.fetch_all(&self.sql)
299
+
.await?;
300
+
301
+
let mut consolidated_count = 0;
302
+
303
+
for candidate in consolidation_candidates {
304
+
let synthetic_id = candidate.synthetic_id;
305
+
let target_id = candidate.target_id;
306
+
let similarity = candidate.similarity_score.unwrap_or(0.0) as f64;
307
+
308
+
// Double-check with our improved similarity calculation
309
+
let calculated_similarity =
310
+
Self::calculate_similarity(&candidate.synthetic_name, &candidate.target_name, true);
311
+
312
+
let final_confidence = similarity.max(calculated_similarity);
313
+
314
+
if final_confidence >= min_confidence {
315
+
// Move all play relationships from synthetic artist to MusicBrainz artist
316
+
let moved_plays = sqlx::query!(
317
+
r#"
318
+
UPDATE play_to_artists_extended
319
+
SET artist_id = $1, artist_name = $2
320
+
WHERE artist_id = $3
321
+
AND NOT EXISTS (
322
+
SELECT 1 FROM play_to_artists_extended existing
323
+
WHERE existing.play_uri = play_to_artists_extended.play_uri
324
+
AND existing.artist_id = $1
325
+
)
326
+
"#,
327
+
target_id,
328
+
candidate.target_name,
329
+
synthetic_id
330
+
)
331
+
.execute(&self.sql)
332
+
.await?;
333
+
334
+
// Remove duplicate relationships that couldn't be moved
335
+
sqlx::query!(
336
+
"DELETE FROM play_to_artists_extended WHERE artist_id = $1",
337
+
synthetic_id
338
+
)
339
+
.execute(&self.sql)
340
+
.await?;
341
+
342
+
// Remove the synthetic artist
343
+
sqlx::query!("DELETE FROM artists_extended WHERE id = $1", synthetic_id)
344
+
.execute(&self.sql)
345
+
.await?;
346
+
347
+
consolidated_count += 1;
348
+
349
+
tracing::info!(
350
+
"โ
Consolidated '{}' โ '{}' (confidence: {:.2}, moved {} plays)",
351
+
candidate.synthetic_name,
352
+
candidate.target_name,
353
+
final_confidence,
354
+
moved_plays.rows_affected()
355
+
);
356
+
}
357
+
}
358
+
359
+
// Refresh materialized views after consolidation
360
+
if consolidated_count > 0 {
361
+
tracing::info!("๐ Refreshing materialized views after consolidation");
362
+
sqlx::query!("REFRESH MATERIALIZED VIEW mv_artist_play_counts;")
363
+
.execute(&self.sql)
364
+
.await?;
365
+
}
366
+
367
+
tracing::info!(
368
+
"๐ Batch consolidation complete: {} artists consolidated",
369
+
consolidated_count
370
+
);
371
+
Ok(consolidated_count)
372
+
}
373
+
374
+
/// Find and consolidate duplicate releases/albums (requires matching artist context)
375
+
pub async fn consolidate_duplicate_releases(
376
+
&self,
377
+
min_confidence: f64,
378
+
) -> anyhow::Result<usize> {
379
+
tracing::info!(
380
+
"๐ Starting release consolidation with confidence >= {:.2} (requires artist context)",
381
+
min_confidence
382
+
);
383
+
384
+
// Find releases that have similar names AND share at least one artist
385
+
let release_candidates = sqlx::query!(
66
386
r#"
67
-
INSERT INTO artists (mbid, name) VALUES ($1, $2)
68
-
ON CONFLICT (mbid) DO NOTHING
69
-
RETURNING mbid;
387
+
SELECT DISTINCT
388
+
r1.mbid as release1_mbid,
389
+
r1.name as release1_name,
390
+
r2.mbid as release2_mbid,
391
+
r2.name as release2_name,
392
+
similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,
393
+
COUNT(DISTINCT ptae1.artist_id) as shared_artists
394
+
FROM releases r1
395
+
CROSS JOIN releases r2
396
+
INNER JOIN plays p1 ON p1.release_mbid = r1.mbid
397
+
INNER JOIN plays p2 ON p2.release_mbid = r2.mbid
398
+
INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri
399
+
INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri
400
+
WHERE r1.mbid != r2.mbid
401
+
AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1
402
+
AND ptae1.artist_id = ptae2.artist_id -- Same artist
403
+
AND (
404
+
(r1.discriminant IS NULL AND r2.discriminant IS NULL) OR
405
+
(LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))
406
+
) -- Same or no discriminants
407
+
GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score
408
+
HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist
409
+
ORDER BY similarity_score DESC, shared_artists DESC
70
410
"#,
71
-
artist_uuid,
72
-
name
411
+
min_confidence as f32
73
412
)
74
413
.fetch_all(&self.sql)
75
414
.await?;
76
415
77
-
if !res.is_empty() {
78
-
// TODO: send request to async scrape data from local MB instance
416
+
let mut consolidated_count = 0;
417
+
418
+
for candidate in release_candidates {
419
+
let similarity = candidate.similarity_score.unwrap_or(0.0) as f64;
420
+
let shared_artists = candidate.shared_artists.unwrap_or(0);
421
+
422
+
// Use MusicBrainz-style cleaning for better matching
423
+
let cleaned_similarity = Self::calculate_similarity(
424
+
&candidate.release1_name,
425
+
&candidate.release2_name,
426
+
false, // is_artist = false for releases
427
+
);
428
+
429
+
let final_confidence = similarity.max(cleaned_similarity);
430
+
431
+
// Require high confidence AND shared artists for album consolidation
432
+
if final_confidence >= min_confidence && shared_artists > 0 {
433
+
// Choose the release with more plays as the canonical one
434
+
let r1_plays: i64 = sqlx::query_scalar!(
435
+
"SELECT COUNT(*) FROM plays WHERE release_mbid = $1",
436
+
candidate.release1_mbid
437
+
)
438
+
.fetch_one(&self.sql)
439
+
.await?
440
+
.unwrap_or(0);
441
+
442
+
let r2_plays: i64 = sqlx::query_scalar!(
443
+
"SELECT COUNT(*) FROM plays WHERE release_mbid = $1",
444
+
candidate.release2_mbid
445
+
)
446
+
.fetch_one(&self.sql)
447
+
.await?
448
+
.unwrap_or(0);
449
+
450
+
let (keep_mbid, remove_mbid, keep_name) = if r1_plays >= r2_plays {
451
+
(
452
+
candidate.release1_mbid,
453
+
candidate.release2_mbid,
454
+
candidate.release1_name.clone(),
455
+
)
456
+
} else {
457
+
(
458
+
candidate.release2_mbid,
459
+
candidate.release1_mbid,
460
+
candidate.release2_name.clone(),
461
+
)
462
+
};
463
+
464
+
// Update plays to use the canonical release
465
+
let updated_plays = sqlx::query!(
466
+
"UPDATE plays SET release_mbid = $1, release_name = $2 WHERE release_mbid = $3",
467
+
keep_mbid,
468
+
keep_name,
469
+
remove_mbid
470
+
)
471
+
.execute(&self.sql)
472
+
.await?;
473
+
474
+
// Remove the duplicate release
475
+
sqlx::query!("DELETE FROM releases WHERE mbid = $1", remove_mbid)
476
+
.execute(&self.sql)
477
+
.await?;
478
+
479
+
consolidated_count += 1;
480
+
481
+
tracing::info!(
482
+
"โ
Consolidated releases: '{}' โ '{}' (confidence: {:.2}, {} shared artists, updated {} plays)",
483
+
if r1_plays >= r2_plays {
484
+
&candidate.release2_name
485
+
} else {
486
+
&candidate.release1_name
487
+
},
488
+
keep_name,
489
+
final_confidence,
490
+
shared_artists,
491
+
updated_plays.rows_affected()
492
+
);
493
+
}
79
494
}
80
495
81
-
Ok(artist_uuid)
496
+
tracing::info!(
497
+
"๐ Release consolidation complete: {} releases consolidated",
498
+
consolidated_count
499
+
);
500
+
Ok(consolidated_count)
501
+
}
502
+
503
+
/// Find and consolidate duplicate recordings/tracks (requires matching artist context)
504
+
pub async fn consolidate_duplicate_recordings(
505
+
&self,
506
+
min_confidence: f64,
507
+
) -> anyhow::Result<usize> {
508
+
tracing::info!(
509
+
"๐ Starting recording consolidation with confidence >= {:.2} (requires artist context)",
510
+
min_confidence
511
+
);
512
+
513
+
// Find recordings that have similar names AND share at least one artist
514
+
let recording_candidates = sqlx::query!(
515
+
r#"
516
+
SELECT DISTINCT
517
+
r1.mbid as recording1_mbid,
518
+
r1.name as recording1_name,
519
+
r2.mbid as recording2_mbid,
520
+
r2.name as recording2_name,
521
+
similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,
522
+
COUNT(DISTINCT ptae1.artist_id) as shared_artists
523
+
FROM recordings r1
524
+
CROSS JOIN recordings r2
525
+
INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid
526
+
INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid
527
+
INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri
528
+
INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri
529
+
WHERE r1.mbid != r2.mbid
530
+
AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1
531
+
AND ptae1.artist_id = ptae2.artist_id -- Same artist
532
+
AND (
533
+
(r1.discriminant IS NULL AND r2.discriminant IS NULL) OR
534
+
(LOWER(TRIM(COALESCE(r1.discriminant, ''))) = LOWER(TRIM(COALESCE(r2.discriminant, ''))))
535
+
) -- Same or no discriminants
536
+
GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score
537
+
HAVING COUNT(DISTINCT ptae1.artist_id) > 0 -- At least one shared artist
538
+
ORDER BY similarity_score DESC, shared_artists DESC
539
+
"#,
540
+
min_confidence as f32
541
+
)
542
+
.fetch_all(&self.sql)
543
+
.await?;
544
+
545
+
let mut consolidated_count = 0;
546
+
547
+
for candidate in recording_candidates {
548
+
let similarity = candidate.similarity_score.unwrap_or(0.0) as f64;
549
+
let shared_artists = candidate.shared_artists.unwrap_or(0);
550
+
551
+
// Use MusicBrainz-style cleaning for track names
552
+
let cleaned_similarity = Self::calculate_similarity(
553
+
&candidate.recording1_name,
554
+
&candidate.recording2_name,
555
+
false, // is_artist = false for recordings
556
+
);
557
+
558
+
let final_confidence = similarity.max(cleaned_similarity);
559
+
560
+
// Require high confidence AND shared artists for track consolidation
561
+
if final_confidence >= min_confidence && shared_artists > 0 {
562
+
// Choose the recording with more plays as canonical
563
+
let r1_plays: i64 = sqlx::query_scalar!(
564
+
"SELECT COUNT(*) FROM plays WHERE recording_mbid = $1",
565
+
candidate.recording1_mbid
566
+
)
567
+
.fetch_one(&self.sql)
568
+
.await?
569
+
.unwrap_or(0);
570
+
571
+
let r2_plays: i64 = sqlx::query_scalar!(
572
+
"SELECT COUNT(*) FROM plays WHERE recording_mbid = $1",
573
+
candidate.recording2_mbid
574
+
)
575
+
.fetch_one(&self.sql)
576
+
.await?
577
+
.unwrap_or(0);
578
+
579
+
let (keep_mbid, remove_mbid, keep_name) = if r1_plays >= r2_plays {
580
+
(
581
+
candidate.recording1_mbid,
582
+
candidate.recording2_mbid,
583
+
candidate.recording1_name.clone(),
584
+
)
585
+
} else {
586
+
(
587
+
candidate.recording2_mbid,
588
+
candidate.recording1_mbid,
589
+
candidate.recording2_name.clone(),
590
+
)
591
+
};
592
+
593
+
// Update plays to use the canonical recording
594
+
let updated_plays = sqlx::query!(
595
+
"UPDATE plays SET recording_mbid = $1 WHERE recording_mbid = $2",
596
+
keep_mbid,
597
+
remove_mbid
598
+
)
599
+
.execute(&self.sql)
600
+
.await?;
601
+
602
+
// Remove the duplicate recording
603
+
sqlx::query!("DELETE FROM recordings WHERE mbid = $1", remove_mbid)
604
+
.execute(&self.sql)
605
+
.await?;
606
+
607
+
consolidated_count += 1;
608
+
609
+
tracing::info!(
610
+
"โ
Consolidated recordings: '{}' โ '{}' (confidence: {:.2}, {} shared artists, updated {} plays)",
611
+
if r1_plays >= r2_plays {
612
+
&candidate.recording2_name
613
+
} else {
614
+
&candidate.recording1_name
615
+
},
616
+
keep_name,
617
+
final_confidence,
618
+
shared_artists,
619
+
updated_plays.rows_affected()
620
+
);
621
+
}
622
+
}
623
+
624
+
tracing::info!(
625
+
"๐ Recording consolidation complete: {} recordings consolidated",
626
+
consolidated_count
627
+
);
628
+
Ok(consolidated_count)
629
+
}
630
+
631
+
/// Preview consolidation candidates to show what would be merged
632
+
pub async fn preview_consolidation_candidates(
633
+
&self,
634
+
min_confidence: f64,
635
+
) -> anyhow::Result<()> {
636
+
tracing::info!(
637
+
"๐ Previewing consolidation candidates (confidence >= {:.2})",
638
+
min_confidence
639
+
);
640
+
641
+
// Preview artist consolidations
642
+
let artist_candidates = sqlx::query!(
643
+
r#"
644
+
SELECT DISTINCT
645
+
ae1.name as synthetic_name,
646
+
ae2.name as target_name,
647
+
similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as similarity_score,
648
+
COUNT(ptae1.play_uri) as synthetic_plays,
649
+
COUNT(ptae2.play_uri) as target_plays
650
+
FROM artists_extended ae1
651
+
CROSS JOIN artists_extended ae2
652
+
LEFT JOIN play_to_artists_extended ptae1 ON ae1.id = ptae1.artist_id
653
+
LEFT JOIN play_to_artists_extended ptae2 ON ae2.id = ptae2.artist_id
654
+
WHERE ae1.id != ae2.id
655
+
AND ae1.mbid_type = 'synthetic'
656
+
AND ae2.mbid_type = 'musicbrainz'
657
+
AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) >= $1
658
+
GROUP BY ae1.id, ae1.name, ae2.id, ae2.name, similarity_score
659
+
ORDER BY similarity_score DESC
660
+
LIMIT 10
661
+
"#,
662
+
min_confidence as f32
663
+
)
664
+
.fetch_all(&self.sql)
665
+
.await?;
666
+
667
+
if !artist_candidates.is_empty() {
668
+
tracing::info!("๐ฏ Artist consolidation candidates:");
669
+
for candidate in artist_candidates {
670
+
tracing::info!(
671
+
" '{}' โ '{}' (confidence: {:.2}, {} + {} plays)",
672
+
candidate.synthetic_name,
673
+
candidate.target_name,
674
+
candidate.similarity_score.unwrap_or(0.0),
675
+
candidate.synthetic_plays.unwrap_or(0),
676
+
candidate.target_plays.unwrap_or(0)
677
+
);
678
+
}
679
+
}
680
+
681
+
// Preview release consolidations (with artist context)
682
+
let release_candidates = sqlx::query!(
683
+
r#"
684
+
SELECT DISTINCT
685
+
r1.name as release1_name,
686
+
r2.name as release2_name,
687
+
similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,
688
+
COUNT(DISTINCT ptae1.artist_id) as shared_artists,
689
+
STRING_AGG(DISTINCT ae.name, ', ') as artist_names
690
+
FROM releases r1
691
+
CROSS JOIN releases r2
692
+
INNER JOIN plays p1 ON p1.release_mbid = r1.mbid
693
+
INNER JOIN plays p2 ON p2.release_mbid = r2.mbid
694
+
INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri
695
+
INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri
696
+
INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id
697
+
WHERE r1.mbid != r2.mbid
698
+
AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1
699
+
AND ptae1.artist_id = ptae2.artist_id
700
+
GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score
701
+
HAVING COUNT(DISTINCT ptae1.artist_id) > 0
702
+
ORDER BY similarity_score DESC
703
+
LIMIT 5
704
+
"#,
705
+
min_confidence as f32
706
+
)
707
+
.fetch_all(&self.sql)
708
+
.await?;
709
+
710
+
if !release_candidates.is_empty() {
711
+
tracing::info!("๐ฟ Release consolidation candidates (with artist context):");
712
+
for candidate in release_candidates {
713
+
tracing::info!(
714
+
" '{}' โ '{}' (confidence: {:.2}, {} shared artists: {})",
715
+
candidate.release1_name,
716
+
candidate.release2_name,
717
+
candidate.similarity_score.unwrap_or(0.0),
718
+
candidate.shared_artists.unwrap_or(0),
719
+
candidate.artist_names.unwrap_or_default()
720
+
);
721
+
}
722
+
}
723
+
724
+
// Preview recording consolidations (with artist context)
725
+
let recording_candidates = sqlx::query!(
726
+
r#"
727
+
SELECT DISTINCT
728
+
r1.name as recording1_name,
729
+
r2.name as recording2_name,
730
+
similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) as similarity_score,
731
+
COUNT(DISTINCT ptae1.artist_id) as shared_artists,
732
+
STRING_AGG(DISTINCT ae.name, ', ') as artist_names
733
+
FROM recordings r1
734
+
CROSS JOIN recordings r2
735
+
INNER JOIN plays p1 ON p1.recording_mbid = r1.mbid
736
+
INNER JOIN plays p2 ON p2.recording_mbid = r2.mbid
737
+
INNER JOIN play_to_artists_extended ptae1 ON p1.uri = ptae1.play_uri
738
+
INNER JOIN play_to_artists_extended ptae2 ON p2.uri = ptae2.play_uri
739
+
INNER JOIN artists_extended ae ON ptae1.artist_id = ae.id
740
+
WHERE r1.mbid != r2.mbid
741
+
AND similarity(LOWER(TRIM(r1.name)), LOWER(TRIM(r2.name))) >= $1
742
+
AND ptae1.artist_id = ptae2.artist_id
743
+
GROUP BY r1.mbid, r1.name, r2.mbid, r2.name, similarity_score
744
+
HAVING COUNT(DISTINCT ptae1.artist_id) > 0
745
+
ORDER BY similarity_score DESC
746
+
LIMIT 5
747
+
"#,
748
+
min_confidence as f32
749
+
)
750
+
.fetch_all(&self.sql)
751
+
.await?;
752
+
753
+
if !recording_candidates.is_empty() {
754
+
tracing::info!("๐ต Recording consolidation candidates (with artist context):");
755
+
for candidate in recording_candidates {
756
+
tracing::info!(
757
+
" '{}' โ '{}' (confidence: {:.2}, {} shared artists: {})",
758
+
candidate.recording1_name,
759
+
candidate.recording2_name,
760
+
candidate.similarity_score.unwrap_or(0.0),
761
+
candidate.shared_artists.unwrap_or(0),
762
+
candidate.artist_names.unwrap_or_default()
763
+
);
764
+
}
765
+
}
766
+
767
+
Ok(())
768
+
}
769
+
770
+
/// Run full batch consolidation for all entity types
771
+
pub async fn run_full_consolidation(&self) -> anyhow::Result<()> {
772
+
tracing::info!("๐ Starting full batch consolidation process");
773
+
774
+
// First, preview what we would consolidate
775
+
self.preview_consolidation_candidates(0.92).await?;
776
+
777
+
let artist_count = self.consolidate_synthetic_artists(0.92).await?;
778
+
let release_count = self.consolidate_duplicate_releases(0.92).await?;
779
+
let recording_count = self.consolidate_duplicate_recordings(0.92).await?;
780
+
781
+
tracing::info!(
782
+
"๐ Full consolidation complete! Artists: {}, Releases: {}, Recordings: {}",
783
+
artist_count,
784
+
release_count,
785
+
recording_count
786
+
);
787
+
788
+
Ok(())
789
+
}
790
+
791
+
/// Generate a synthetic MBID for artists without MusicBrainz data using database function
792
+
async fn generate_synthetic_mbid(&self, artist_name: &str) -> anyhow::Result<Uuid> {
793
+
let result = sqlx::query_scalar!("SELECT generate_synthetic_mbid($1)", artist_name)
794
+
.fetch_one(&self.sql)
795
+
.await?;
796
+
797
+
result.ok_or_else(|| anyhow!("Failed to generate synthetic MBID"))
798
+
}
799
+
800
+
/// Generate a fallback artist name for tracks without any artist information
801
+
fn generate_fallback_artist(track_name: &str) -> String {
802
+
format!(
803
+
"Unknown Artist ({})",
804
+
track_name.chars().take(20).collect::<String>()
805
+
)
806
+
}
807
+
808
+
/// Normalize text for fuzzy matching with MusicBrainz-style cleaning
809
+
fn normalize_text(text: &str, is_artist: bool) -> String {
810
+
let cleaned = if is_artist {
811
+
MusicBrainzCleaner::clean_artist_name(text)
812
+
} else {
813
+
MusicBrainzCleaner::clean_track_name(text)
814
+
};
815
+
816
+
MusicBrainzCleaner::normalize_for_comparison(&cleaned)
817
+
}
818
+
819
+
/// Calculate string similarity with MusicBrainz-style cleaning
820
+
fn calculate_similarity(s1: &str, s2: &str, is_artist: bool) -> f64 {
821
+
let s1_norm = Self::normalize_text(s1, is_artist);
822
+
let s2_norm = Self::normalize_text(s2, is_artist);
823
+
824
+
if s1_norm == s2_norm {
825
+
return 1.0;
826
+
}
827
+
828
+
if s1_norm.is_empty() || s2_norm.is_empty() {
829
+
return 0.0;
830
+
}
831
+
832
+
// Calculate basic similarity
833
+
let max_len = s1_norm.len().max(s2_norm.len()) as f64;
834
+
let min_len = s1_norm.len().min(s2_norm.len()) as f64;
835
+
836
+
// Character-based similarity
837
+
let common_chars = s1_norm
838
+
.chars()
839
+
.zip(s2_norm.chars())
840
+
.filter(|(a, b)| a == b)
841
+
.count() as f64;
842
+
843
+
// Word-based similarity boost
844
+
let s1_words: std::collections::HashSet<&str> = s1_norm.split_whitespace().collect();
845
+
let s2_words: std::collections::HashSet<&str> = s2_norm.split_whitespace().collect();
846
+
let common_words = s1_words.intersection(&s2_words).count() as f64;
847
+
let total_words = s1_words.union(&s2_words).count() as f64;
848
+
849
+
let word_similarity = if total_words > 0.0 {
850
+
common_words / total_words
851
+
} else {
852
+
0.0
853
+
};
854
+
let char_similarity = common_chars / max_len;
855
+
856
+
// Boost for very similar lengths (helps with minor differences)
857
+
let length_factor = if max_len > 0.0 {
858
+
min_len / max_len
859
+
} else {
860
+
0.0
861
+
};
862
+
863
+
// Weighted combination: 50% word similarity, 30% char similarity, 20% length factor
864
+
(word_similarity * 0.5) + (char_similarity * 0.3) + (length_factor * 0.2)
865
+
}
866
+
867
+
/// Find existing artists that fuzzy match the given name
868
+
async fn find_fuzzy_artist_matches(
869
+
&self,
870
+
artist_name: &str,
871
+
_track_name: &str,
872
+
_album_name: Option<&str>,
873
+
) -> anyhow::Result<Vec<FuzzyMatchCandidate>> {
874
+
let normalized_name = Self::normalize_text(artist_name, true);
875
+
876
+
// Search for artists with similar names using trigram similarity
877
+
let candidates = sqlx::query!(
878
+
r#"
879
+
SELECT
880
+
ae.id,
881
+
ae.name
882
+
FROM artists_extended ae
883
+
WHERE ae.mbid_type = 'musicbrainz'
884
+
AND (
885
+
LOWER(TRIM(ae.name)) = $1
886
+
OR LOWER(TRIM(ae.name)) LIKE '%' || $1 || '%'
887
+
OR $1 LIKE '%' || LOWER(TRIM(ae.name)) || '%'
888
+
OR similarity(LOWER(TRIM(ae.name)), $1) > 0.6
889
+
)
890
+
ORDER BY similarity(LOWER(TRIM(ae.name)), $1) DESC
891
+
LIMIT 10
892
+
"#,
893
+
normalized_name
894
+
)
895
+
.fetch_all(&self.sql)
896
+
.await
897
+
.unwrap_or_default();
898
+
899
+
let mut matches = Vec::new();
900
+
901
+
for candidate in candidates {
902
+
let name_similarity = Self::calculate_similarity(artist_name, &candidate.name, true);
903
+
904
+
// Base confidence from name similarity
905
+
let mut confidence = name_similarity;
906
+
907
+
// Boost confidence for exact matches after normalization
908
+
if Self::normalize_text(artist_name, true)
909
+
== Self::normalize_text(&candidate.name, true)
910
+
{
911
+
confidence = confidence.max(0.95);
912
+
}
913
+
914
+
// Additional boost for cleaned matches
915
+
let cleaned_input = MusicBrainzCleaner::clean_artist_name(artist_name);
916
+
let cleaned_candidate = MusicBrainzCleaner::clean_artist_name(&candidate.name);
917
+
if MusicBrainzCleaner::normalize_for_comparison(&cleaned_input)
918
+
== MusicBrainzCleaner::normalize_for_comparison(&cleaned_candidate)
919
+
{
920
+
confidence = confidence.max(0.9);
921
+
}
922
+
923
+
// Lower threshold since we have better cleaning now
924
+
if confidence >= 0.8 {
925
+
matches.push(FuzzyMatchCandidate {
926
+
artist_id: candidate.id,
927
+
name: candidate.name,
928
+
confidence,
929
+
});
930
+
}
931
+
}
932
+
933
+
// Sort by confidence descending
934
+
matches.sort_by(|a, b| {
935
+
b.confidence
936
+
.partial_cmp(&a.confidence)
937
+
.unwrap_or(std::cmp::Ordering::Equal)
938
+
});
939
+
940
+
Ok(matches)
941
+
}
942
+
943
+
/// Try to match an artist to existing MusicBrainz data using fuzzy matching
944
+
async fn find_or_create_artist_with_fuzzy_matching(
945
+
&self,
946
+
artist_name: &str,
947
+
mbid: Option<&str>,
948
+
track_name: &str,
949
+
album_name: Option<&str>,
950
+
) -> anyhow::Result<i32> {
951
+
// If we already have an MBID, use it directly
952
+
if let Some(mbid) = mbid {
953
+
return self.insert_artist_extended(Some(mbid), artist_name).await;
954
+
}
955
+
956
+
// Try fuzzy matching against existing MusicBrainz artists
957
+
let matches = self
958
+
.find_fuzzy_artist_matches(artist_name, track_name, album_name)
959
+
.await?;
960
+
961
+
if let Some(best_match) = matches.first() {
962
+
// Use high confidence threshold for automatic matching
963
+
if best_match.confidence >= 0.92 {
964
+
tracing::info!(
965
+
"๐ Fuzzy matched '{}' to existing artist '{}' (confidence: {:.2})",
966
+
artist_name,
967
+
best_match.name,
968
+
best_match.confidence
969
+
);
970
+
971
+
// Update the existing artist name if the new one seems more complete
972
+
if artist_name.len() > best_match.name.len() && best_match.confidence >= 0.95 {
973
+
sqlx::query!(
974
+
"UPDATE artists_extended SET name = $1, updated_at = NOW() WHERE id = $2",
975
+
artist_name,
976
+
best_match.artist_id
977
+
)
978
+
.execute(&self.sql)
979
+
.await?;
980
+
}
981
+
982
+
return Ok(best_match.artist_id);
983
+
} else if best_match.confidence >= 0.85 {
984
+
tracing::debug!(
985
+
"๐ค Potential match for '{}' -> '{}' (confidence: {:.2}) but below auto-match threshold",
986
+
artist_name,
987
+
best_match.name,
988
+
best_match.confidence
989
+
);
990
+
}
991
+
}
992
+
993
+
// No good match found, create synthetic artist
994
+
self.insert_artist_extended(None, artist_name).await
995
+
}
996
+
997
+
/// Inserts or updates an artist in the database using the extended table.
998
+
/// Returns the internal ID of the artist.
999
+
async fn insert_artist_extended(&self, mbid: Option<&str>, name: &str) -> anyhow::Result<i32> {
1000
+
if let Some(mbid) = mbid {
1001
+
let artist_uuid = Uuid::parse_str(mbid)?;
1002
+
let res = sqlx::query!(
1003
+
r#"
1004
+
INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'musicbrainz')
1005
+
ON CONFLICT (mbid) DO UPDATE SET
1006
+
name = EXCLUDED.name,
1007
+
updated_at = NOW()
1008
+
RETURNING id;
1009
+
"#,
1010
+
artist_uuid,
1011
+
name
1012
+
)
1013
+
.fetch_one(&self.sql)
1014
+
.await?;
1015
+
Ok(res.id)
1016
+
} else {
1017
+
// Artist without MBID - generate synthetic MBID
1018
+
let synthetic_uuid = self.generate_synthetic_mbid(name).await?;
1019
+
1020
+
let res = sqlx::query!(
1021
+
r#"
1022
+
INSERT INTO artists_extended (mbid, name, mbid_type) VALUES ($1, $2, 'synthetic')
1023
+
ON CONFLICT (mbid) DO UPDATE SET
1024
+
name = EXCLUDED.name,
1025
+
updated_at = NOW()
1026
+
RETURNING id;
1027
+
"#,
1028
+
synthetic_uuid,
1029
+
name
1030
+
)
1031
+
.fetch_one(&self.sql)
1032
+
.await?;
1033
+
Ok(res.id)
1034
+
}
82
1035
}
83
1036
84
1037
/// Inserts or updates a release in the database.
85
1038
/// Returns the Uuid of the release.
86
1039
async fn insert_release(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> {
87
1040
let release_uuid = Uuid::parse_str(mbid)?;
1041
+
1042
+
// Extract discriminant from release name for new releases
1043
+
// Prioritize edition-specific patterns for better quality
1044
+
let discriminant = self
1045
+
.extract_edition_discriminant_from_db(name)
1046
+
.await
1047
+
.or_else(|| {
1048
+
futures::executor::block_on(async { self.extract_discriminant_from_db(name).await })
1049
+
});
1050
+
88
1051
let res = sqlx::query!(
89
1052
r#"
90
-
INSERT INTO releases (mbid, name) VALUES ($1, $2)
91
-
ON CONFLICT (mbid) DO NOTHING
1053
+
INSERT INTO releases (mbid, name, discriminant) VALUES ($1, $2, $3)
1054
+
ON CONFLICT (mbid) DO UPDATE SET
1055
+
name = EXCLUDED.name,
1056
+
discriminant = COALESCE(EXCLUDED.discriminant, releases.discriminant)
92
1057
RETURNING mbid;
93
1058
"#,
94
1059
release_uuid,
95
-
name
1060
+
name,
1061
+
discriminant
96
1062
)
97
1063
.fetch_all(&self.sql)
98
1064
.await?;
···
108
1074
/// Returns the Uuid of the recording.
109
1075
async fn insert_recording(&self, mbid: &str, name: &str) -> anyhow::Result<Uuid> {
110
1076
let recording_uuid = Uuid::parse_str(mbid)?;
1077
+
1078
+
// Extract discriminant from recording name for new recordings
1079
+
// Prioritize edition-specific patterns for better quality
1080
+
let discriminant = self
1081
+
.extract_edition_discriminant_from_db(name)
1082
+
.await
1083
+
.or_else(|| {
1084
+
futures::executor::block_on(async { self.extract_discriminant_from_db(name).await })
1085
+
});
1086
+
111
1087
let res = sqlx::query!(
112
1088
r#"
113
-
INSERT INTO recordings (mbid, name) VALUES ($1, $2)
114
-
ON CONFLICT (mbid) DO NOTHING
1089
+
INSERT INTO recordings (mbid, name, discriminant) VALUES ($1, $2, $3)
1090
+
ON CONFLICT (mbid) DO UPDATE SET
1091
+
name = EXCLUDED.name,
1092
+
discriminant = COALESCE(EXCLUDED.discriminant, recordings.discriminant)
115
1093
RETURNING mbid;
116
1094
"#,
117
1095
recording_uuid,
118
-
name
1096
+
name,
1097
+
discriminant
119
1098
)
120
1099
.fetch_all(&self.sql)
121
1100
.await?;
···
126
1105
127
1106
Ok(recording_uuid)
128
1107
}
1108
+
1109
+
/// Extract discriminant from name using database function
1110
+
async fn extract_discriminant_from_db(&self, name: &str) -> Option<String> {
1111
+
sqlx::query_scalar!("SELECT extract_discriminant($1)", name)
1112
+
.fetch_one(&self.sql)
1113
+
.await
1114
+
.ok()
1115
+
.flatten()
1116
+
}
1117
+
1118
+
/// Extract edition-specific discriminant from name using database function
1119
+
async fn extract_edition_discriminant_from_db(&self, name: &str) -> Option<String> {
1120
+
sqlx::query_scalar!("SELECT extract_edition_discriminant($1)", name)
1121
+
.fetch_one(&self.sql)
1122
+
.await
1123
+
.ok()
1124
+
.flatten()
1125
+
}
1126
+
1127
+
// /// Get base name without discriminant using database function
1128
+
// async fn get_base_name_from_db(&self, name: &str) -> String {
1129
+
// sqlx::query_scalar!("SELECT get_base_name($1)", name)
1130
+
// .fetch_one(&self.sql)
1131
+
// .await
1132
+
// .ok()
1133
+
// .flatten()
1134
+
// .unwrap_or_else(|| name.to_string())
1135
+
// }
129
1136
130
1137
pub async fn insert_play(
131
1138
&self,
···
137
1144
) -> anyhow::Result<()> {
138
1145
dbg!("ingesting", play_record);
139
1146
let play_record = clean(play_record);
140
-
let mut parsed_artists: Vec<(Uuid, String)> = vec![];
1147
+
let mut parsed_artists: Vec<(i32, String)> = vec![];
1148
+
let mut artist_names_raw: Vec<String> = vec![];
1149
+
141
1150
if let Some(ref artists) = &play_record.artists {
142
1151
for artist in artists {
143
1152
let artist_name = artist.artist_name.clone();
144
-
let artist_mbid = artist.artist_mb_id.clone();
145
-
if let Some(artist_mbid) = artist_mbid {
146
-
let artist_uuid = self.insert_artist(&artist_mbid, &artist_name).await?;
147
-
parsed_artists.push((artist_uuid, artist_name.clone()));
1153
+
artist_names_raw.push(artist_name.clone());
1154
+
let artist_mbid = artist.artist_mb_id.as_deref();
1155
+
1156
+
let artist_id = self
1157
+
.find_or_create_artist_with_fuzzy_matching(
1158
+
&artist_name,
1159
+
artist_mbid,
1160
+
&play_record.track_name,
1161
+
play_record.release_name.as_deref(),
1162
+
)
1163
+
.await?;
1164
+
parsed_artists.push((artist_id, artist_name.clone()));
1165
+
}
1166
+
} else if let Some(artist_names) = &play_record.artist_names {
1167
+
for (index, artist_name) in artist_names.iter().enumerate() {
1168
+
artist_names_raw.push(artist_name.clone());
1169
+
1170
+
let artist_mbid_opt = if let Some(ref mbid_list) = play_record.artist_mb_ids {
1171
+
mbid_list.get(index)
148
1172
} else {
149
-
// Handle case where artist MBID is missing, maybe log a warning
150
-
eprintln!("Warning: Artist MBID missing for '{}'", artist_name);
151
-
}
1173
+
None
1174
+
};
1175
+
1176
+
let artist_id = self
1177
+
.find_or_create_artist_with_fuzzy_matching(
1178
+
artist_name,
1179
+
artist_mbid_opt.map(|s| s.as_str()),
1180
+
&play_record.track_name,
1181
+
play_record.release_name.as_deref(),
1182
+
)
1183
+
.await?;
1184
+
parsed_artists.push((artist_id, artist_name.clone()));
152
1185
}
153
1186
} else {
154
-
if let Some(artist_names) = &play_record.artist_names {
155
-
for artist_name in artist_names {
156
-
// Assuming artist_mbid is optional, handle missing mbid gracefully
157
-
let artist_mbid_opt = if let Some(ref mbid_list) = play_record.artist_mb_ids {
158
-
mbid_list.get(
159
-
artist_names
160
-
.iter()
161
-
.position(|name| name == artist_name)
162
-
.unwrap_or(0),
163
-
)
164
-
} else {
165
-
None
166
-
};
1187
+
// No artist information provided - create a fallback artist
1188
+
let fallback_artist_name = Self::generate_fallback_artist(&play_record.track_name);
1189
+
artist_names_raw.push(fallback_artist_name.clone());
167
1190
168
-
if let Some(artist_mbid) = artist_mbid_opt {
169
-
let artist_uuid = self.insert_artist(artist_mbid, artist_name).await?;
170
-
parsed_artists.push((artist_uuid, artist_name.clone()));
171
-
} else {
172
-
// Handle case where artist MBID is missing, maybe log a warning
173
-
eprintln!("Warning: Artist MBID missing for '{}'", artist_name);
174
-
}
175
-
}
176
-
}
1191
+
let artist_id = self
1192
+
.find_or_create_artist_with_fuzzy_matching(
1193
+
&fallback_artist_name,
1194
+
None,
1195
+
&play_record.track_name,
1196
+
play_record.release_name.as_deref(),
1197
+
)
1198
+
.await?;
1199
+
parsed_artists.push((artist_id, fallback_artist_name));
177
1200
}
178
1201
179
1202
// Insert release if missing
···
203
1226
time::OffsetDateTime::from_unix_timestamp(played_time.as_ref().timestamp())
204
1227
.unwrap_or_else(|_| time::OffsetDateTime::now_utc());
205
1228
206
-
// Our main insert into plays
1229
+
// Extract discriminants from lexicon fields or infer from names
1230
+
// First try lexicon fields, then extract from names with preference for edition-specific patterns
1231
+
// TODO: Enable when types are updated with discriminant fields
1232
+
// let track_discriminant = play_record.track_discriminant.clone().or_else(|| {
1233
+
let track_discriminant = {
1234
+
// Try edition-specific patterns first, then general patterns
1235
+
futures::executor::block_on(async {
1236
+
self.extract_edition_discriminant_from_db(&play_record.track_name)
1237
+
.await
1238
+
.or_else(|| {
1239
+
futures::executor::block_on(async {
1240
+
self.extract_discriminant_from_db(&play_record.track_name)
1241
+
.await
1242
+
})
1243
+
})
1244
+
})
1245
+
};
1246
+
1247
+
// let release_discriminant = play_record.release_discriminant.clone().or_else(|| {
1248
+
let release_discriminant = {
1249
+
if let Some(ref release_name) = play_record.release_name {
1250
+
futures::executor::block_on(async {
1251
+
// Try edition-specific patterns first, then general patterns
1252
+
self.extract_edition_discriminant_from_db(release_name)
1253
+
.await
1254
+
.or_else(|| {
1255
+
futures::executor::block_on(async {
1256
+
self.extract_discriminant_from_db(release_name).await
1257
+
})
1258
+
})
1259
+
})
1260
+
} else {
1261
+
None
1262
+
}
1263
+
};
1264
+
1265
+
// Our main insert into plays with raw artist names and discriminants
1266
+
let artist_names_json = if !artist_names_raw.is_empty() {
1267
+
Some(serde_json::to_value(&artist_names_raw)?)
1268
+
} else {
1269
+
None
1270
+
};
1271
+
207
1272
sqlx::query!(
208
1273
r#"
209
1274
INSERT INTO plays (
210
1275
uri, cid, did, rkey, isrc, duration, track_name, played_time,
211
1276
processed_time, release_mbid, release_name, recording_mbid,
212
-
submission_client_agent, music_service_base_domain
1277
+
submission_client_agent, music_service_base_domain, artist_names_raw,
1278
+
track_discriminant, release_discriminant
213
1279
) VALUES (
214
1280
$1, $2, $3, $4, $5, $6, $7, $8,
215
-
NOW(), $9, $10, $11, $12, $13
1281
+
NOW(), $9, $10, $11, $12, $13, $14, $15, $16
216
1282
) ON CONFLICT(uri) DO UPDATE SET
217
1283
isrc = EXCLUDED.isrc,
218
1284
duration = EXCLUDED.duration,
···
223
1289
release_name = EXCLUDED.release_name,
224
1290
recording_mbid = EXCLUDED.recording_mbid,
225
1291
submission_client_agent = EXCLUDED.submission_client_agent,
226
-
music_service_base_domain = EXCLUDED.music_service_base_domain;
1292
+
music_service_base_domain = EXCLUDED.music_service_base_domain,
1293
+
artist_names_raw = EXCLUDED.artist_names_raw,
1294
+
track_discriminant = EXCLUDED.track_discriminant,
1295
+
release_discriminant = EXCLUDED.release_discriminant;
227
1296
"#,
228
1297
uri,
229
1298
cid,
···
238
1307
recording_mbid_opt,
239
1308
play_record.submission_client_agent,
240
1309
play_record.music_service_base_domain,
1310
+
artist_names_json,
1311
+
track_discriminant,
1312
+
release_discriminant
241
1313
)
242
1314
.execute(&self.sql)
243
1315
.await?;
244
1316
245
-
// Insert plays into join table
246
-
for (mbid, artist) in &parsed_artists {
247
-
let artist_name = artist.clone(); // Clone to move into the query
248
-
1317
+
// Insert plays into the extended join table (supports all artists)
1318
+
for (artist_id, artist_name) in &parsed_artists {
249
1319
sqlx::query!(
250
1320
r#"
251
-
INSERT INTO play_to_artists (play_uri, artist_mbid, artist_name) VALUES
252
-
($1, $2, $3)
253
-
ON CONFLICT (play_uri, artist_mbid) DO NOTHING;
254
-
"#,
1321
+
INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) VALUES
1322
+
($1, $2, $3)
1323
+
ON CONFLICT (play_uri, artist_id) DO NOTHING;
1324
+
"#,
255
1325
uri,
256
-
mbid,
1326
+
artist_id,
257
1327
artist_name
258
1328
)
259
1329
.execute(&self.sql)
+51
-24
services/cadet/src/main.rs
+51
-24
services/cadet/src/main.rs
···
17
17
mod cursor;
18
18
mod db;
19
19
mod ingestors;
20
-
mod resolve;
21
20
mod redis_client;
21
+
mod resolve;
22
22
23
23
fn setup_tracing() {
24
24
tracing_subscriber::fmt()
···
96
96
97
97
// CAR import job worker
98
98
let car_ingestor = ingestors::car::CarImportIngestor::new(pool.clone());
99
-
let redis_url = std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
100
-
99
+
let redis_url =
100
+
std::env::var("REDIS_URL").unwrap_or_else(|_| "redis://127.0.0.1:6379".to_string());
101
+
101
102
match redis_client::RedisClient::new(&redis_url) {
102
103
Ok(redis_client) => {
103
104
// Spawn CAR import job processing task
104
105
tokio::spawn(async move {
105
-
use types::jobs::{CarImportJob, CarImportJobStatus, JobStatus, JobProgress, queue_keys};
106
-
use tracing::{info, error};
107
106
use chrono::Utc;
108
-
107
+
use ingestors::car::jobs::{
108
+
queue_keys, CarImportJob, CarImportJobStatus, JobProgress, JobStatus,
109
+
};
110
+
use tracing::{error, info};
111
+
109
112
info!("Starting CAR import job worker, polling Redis queue...");
110
-
113
+
111
114
loop {
112
115
// Block for up to 10 seconds waiting for jobs
113
116
match redis_client.pop_job(queue_keys::CAR_IMPORT_JOBS, 10).await {
114
117
Ok(Some(job_data)) => {
115
118
info!("Received CAR import job: {}", job_data);
116
-
119
+
117
120
// Parse job
118
121
match serde_json::from_str::<CarImportJob>(&job_data) {
119
122
Ok(job) => {
···
132
135
blocks_processed: None,
133
136
}),
134
137
};
135
-
138
+
136
139
let status_key = queue_keys::job_status_key(&job.request_id);
137
-
if let Ok(status_data) = serde_json::to_string(&processing_status) {
138
-
let _ = redis_client.update_job_status(&status_key, &status_data).await;
140
+
if let Ok(status_data) =
141
+
serde_json::to_string(&processing_status)
142
+
{
143
+
let _ = redis_client
144
+
.update_job_status(&status_key, &status_data)
145
+
.await;
139
146
}
140
-
147
+
141
148
// Process the job
142
-
match car_ingestor.fetch_and_process_identity_car(&job.identity).await {
149
+
match car_ingestor
150
+
.fetch_and_process_identity_car(&job.identity)
151
+
.await
152
+
{
143
153
Ok(import_id) => {
144
-
info!("โ
CAR import job completed successfully: {}", job.request_id);
145
-
154
+
info!(
155
+
"โ
CAR import job completed successfully: {}",
156
+
job.request_id
157
+
);
158
+
146
159
let completed_status = CarImportJobStatus {
147
160
status: JobStatus::Completed,
148
161
created_at: job.created_at,
···
150
163
completed_at: Some(Utc::now()),
151
164
error_message: None,
152
165
progress: Some(JobProgress {
153
-
step: format!("CAR import completed: {}", import_id),
166
+
step: format!(
167
+
"CAR import completed: {}",
168
+
import_id
169
+
),
154
170
user_did: None,
155
171
pds_host: None,
156
172
car_size_bytes: None,
157
173
blocks_processed: None,
158
174
}),
159
175
};
160
-
161
-
if let Ok(status_data) = serde_json::to_string(&completed_status) {
162
-
let _ = redis_client.update_job_status(&status_key, &status_data).await;
176
+
177
+
if let Ok(status_data) =
178
+
serde_json::to_string(&completed_status)
179
+
{
180
+
let _ = redis_client
181
+
.update_job_status(&status_key, &status_data)
182
+
.await;
163
183
}
164
184
}
165
185
Err(e) => {
166
-
error!("โ CAR import job failed: {}: {}", job.request_id, e);
167
-
186
+
error!(
187
+
"โ CAR import job failed: {}: {}",
188
+
job.request_id, e
189
+
);
190
+
168
191
let failed_status = CarImportJobStatus {
169
192
status: JobStatus::Failed,
170
193
created_at: job.created_at,
···
173
196
error_message: Some(e.to_string()),
174
197
progress: None,
175
198
};
176
-
177
-
if let Ok(status_data) = serde_json::to_string(&failed_status) {
178
-
let _ = redis_client.update_job_status(&status_key, &status_data).await;
199
+
200
+
if let Ok(status_data) =
201
+
serde_json::to_string(&failed_status)
202
+
{
203
+
let _ = redis_client
204
+
.update_job_status(&status_key, &status_data)
205
+
.await;
179
206
}
180
207
}
181
208
}
+3
-3
services/cadet/src/redis_client.rs
+3
-3
services/cadet/src/redis_client.rs
···
20
20
pub async fn pop_job(&self, queue_key: &str, timeout_seconds: u64) -> Result<Option<String>> {
21
21
let mut conn = self.get_connection().await?;
22
22
let result: Option<Vec<String>> = conn.brpop(queue_key, timeout_seconds as f64).await?;
23
-
23
+
24
24
match result {
25
25
Some(mut items) if items.len() >= 2 => {
26
26
// brpop returns [queue_name, item], we want the item
27
27
Ok(Some(items.remove(1)))
28
28
}
29
-
_ => Ok(None)
29
+
_ => Ok(None),
30
30
}
31
31
}
32
32
···
36
36
let _: () = conn.set(status_key, status_data).await?;
37
37
Ok(())
38
38
}
39
-
}
39
+
}
+55
services/cadet/target.sh
+55
services/cadet/target.sh
···
1
+
#!/bin/bash
2
+
set -e
3
+
4
+
# Debug: Print all available build variables
5
+
echo "DEBUG: TARGETPLATFORM=$TARGETPLATFORM"
6
+
echo "DEBUG: BUILDPLATFORM=$BUILDPLATFORM"
7
+
echo "DEBUG: TARGETARCH=$TARGETARCH"
8
+
echo "DEBUG: TARGETOS=$TARGETOS"
9
+
10
+
# Use TARGETARCH directly (more reliable than TARGETPLATFORM)
11
+
TARGET_ARCH_VAR="${TARGETARCH:-}"
12
+
13
+
# If TARGETARCH is not set, try to extract from TARGETPLATFORM
14
+
if [ -z "$TARGET_ARCH_VAR" ] && [ -n "$TARGETPLATFORM" ]; then
15
+
TARGET_ARCH_VAR=$(echo "$TARGETPLATFORM" | cut -d'/' -f2)
16
+
echo "DEBUG: Extracted TARGET_ARCH_VAR=$TARGET_ARCH_VAR from TARGETPLATFORM"
17
+
fi
18
+
19
+
# Final fallback: detect from uname
20
+
if [ -z "$TARGET_ARCH_VAR" ]; then
21
+
ARCH=$(uname -m)
22
+
case "$ARCH" in
23
+
"x86_64")
24
+
TARGET_ARCH_VAR="amd64"
25
+
;;
26
+
"aarch64")
27
+
TARGET_ARCH_VAR="arm64"
28
+
;;
29
+
*)
30
+
echo "ERROR: Could not detect target architecture. uname -m returned: $ARCH"
31
+
echo "Available variables: TARGETARCH=$TARGETARCH, TARGETPLATFORM=$TARGETPLATFORM"
32
+
exit 1
33
+
;;
34
+
esac
35
+
echo "DEBUG: Detected TARGET_ARCH_VAR=$TARGET_ARCH_VAR from uname"
36
+
fi
37
+
38
+
# Map architecture to Rust target
39
+
case "$TARGET_ARCH_VAR" in
40
+
"amd64")
41
+
export RUST_TARGET="x86_64-unknown-linux-gnu"
42
+
export TARGET_ARCH="amd64"
43
+
;;
44
+
"arm64")
45
+
export RUST_TARGET="aarch64-unknown-linux-gnu"
46
+
export TARGET_ARCH="arm64"
47
+
;;
48
+
*)
49
+
echo "ERROR: Unsupported target architecture: $TARGET_ARCH_VAR"
50
+
echo "Supported architectures: amd64, arm64"
51
+
exit 1
52
+
;;
53
+
esac
54
+
55
+
echo "SUCCESS: Using RUST_TARGET=$RUST_TARGET, TARGET_ARCH=$TARGET_ARCH"
-226
services/migrations/20241220000001_initial_schema.sql
-226
services/migrations/20241220000001_initial_schema.sql
···
1
-
-- Initial comprehensive schema for Teal music platform
2
-
-- Based on services/cadet/sql/base.sql
3
-
4
-
CREATE TABLE artists (
5
-
mbid UUID PRIMARY KEY,
6
-
name TEXT NOT NULL,
7
-
play_count INTEGER DEFAULT 0
8
-
);
9
-
10
-
-- releases are synologous to 'albums'
11
-
CREATE TABLE releases (
12
-
mbid UUID PRIMARY KEY,
13
-
name TEXT NOT NULL,
14
-
play_count INTEGER DEFAULT 0
15
-
);
16
-
17
-
-- recordings are synologous to 'tracks' BUT tracks can be in multiple releases!
18
-
CREATE TABLE recordings (
19
-
mbid UUID PRIMARY KEY,
20
-
name TEXT NOT NULL,
21
-
play_count INTEGER DEFAULT 0
22
-
);
23
-
24
-
CREATE TABLE plays (
25
-
uri TEXT PRIMARY KEY,
26
-
did TEXT NOT NULL,
27
-
rkey TEXT NOT NULL,
28
-
cid TEXT NOT NULL,
29
-
isrc TEXT,
30
-
duration INTEGER,
31
-
track_name TEXT NOT NULL,
32
-
played_time TIMESTAMP WITH TIME ZONE,
33
-
processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
34
-
release_mbid UUID,
35
-
release_name TEXT,
36
-
recording_mbid UUID,
37
-
submission_client_agent TEXT,
38
-
music_service_base_domain TEXT,
39
-
origin_url TEXT,
40
-
FOREIGN KEY (release_mbid) REFERENCES releases (mbid),
41
-
FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid)
42
-
);
43
-
44
-
CREATE INDEX idx_plays_release_mbid ON plays (release_mbid);
45
-
CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid);
46
-
CREATE INDEX idx_plays_played_time ON plays (played_time);
47
-
CREATE INDEX idx_plays_did ON plays (did);
48
-
49
-
CREATE TABLE play_to_artists (
50
-
play_uri TEXT, -- references plays(uri)
51
-
artist_mbid UUID REFERENCES artists (mbid),
52
-
artist_name TEXT, -- storing here for ease of use when joining
53
-
PRIMARY KEY (play_uri, artist_mbid),
54
-
FOREIGN KEY (play_uri) REFERENCES plays (uri)
55
-
);
56
-
57
-
CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid);
58
-
59
-
-- Profiles table
60
-
CREATE TABLE profiles (
61
-
did TEXT PRIMARY KEY,
62
-
handle TEXT,
63
-
display_name TEXT,
64
-
description TEXT,
65
-
description_facets JSONB,
66
-
avatar TEXT, -- IPLD of the image, bafy...
67
-
banner TEXT,
68
-
created_at TIMESTAMP WITH TIME ZONE
69
-
);
70
-
71
-
-- User featured items table
72
-
CREATE TABLE featured_items (
73
-
did TEXT PRIMARY KEY,
74
-
mbid TEXT NOT NULL,
75
-
type TEXT NOT NULL
76
-
);
77
-
78
-
-- Statii table (status records)
79
-
CREATE TABLE statii (
80
-
uri TEXT PRIMARY KEY,
81
-
did TEXT NOT NULL,
82
-
rkey TEXT NOT NULL,
83
-
cid TEXT NOT NULL,
84
-
record JSONB NOT NULL,
85
-
indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
86
-
);
87
-
88
-
CREATE INDEX idx_statii_did_rkey ON statii (did, rkey);
89
-
90
-
-- Materialized view for artists' play counts
91
-
CREATE MATERIALIZED VIEW mv_artist_play_counts AS
92
-
SELECT
93
-
a.mbid AS artist_mbid,
94
-
a.name AS artist_name,
95
-
COUNT(p.uri) AS play_count
96
-
FROM
97
-
artists a
98
-
LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
99
-
LEFT JOIN plays p ON p.uri = pta.play_uri
100
-
GROUP BY
101
-
a.mbid,
102
-
a.name;
103
-
104
-
CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid);
105
-
106
-
-- Materialized view for releases' play counts
107
-
CREATE MATERIALIZED VIEW mv_release_play_counts AS
108
-
SELECT
109
-
r.mbid AS release_mbid,
110
-
r.name AS release_name,
111
-
COUNT(p.uri) AS play_count
112
-
FROM
113
-
releases r
114
-
LEFT JOIN plays p ON p.release_mbid = r.mbid
115
-
GROUP BY
116
-
r.mbid,
117
-
r.name;
118
-
119
-
CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid);
120
-
121
-
-- Materialized view for recordings' play counts
122
-
CREATE MATERIALIZED VIEW mv_recording_play_counts AS
123
-
SELECT
124
-
rec.mbid AS recording_mbid,
125
-
rec.name AS recording_name,
126
-
COUNT(p.uri) AS play_count
127
-
FROM
128
-
recordings rec
129
-
LEFT JOIN plays p ON p.recording_mbid = rec.mbid
130
-
GROUP BY
131
-
rec.mbid,
132
-
rec.name;
133
-
134
-
CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid);
135
-
136
-
-- Global play count materialized view
137
-
CREATE MATERIALIZED VIEW mv_global_play_count AS
138
-
SELECT
139
-
COUNT(uri) AS total_plays,
140
-
COUNT(DISTINCT did) AS unique_listeners
141
-
FROM plays;
142
-
143
-
CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays);
144
-
145
-
-- Top artists in the last 30 days
146
-
CREATE MATERIALIZED VIEW mv_top_artists_30days AS
147
-
SELECT
148
-
a.mbid AS artist_mbid,
149
-
a.name AS artist_name,
150
-
COUNT(p.uri) AS play_count
151
-
FROM artists a
152
-
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
153
-
INNER JOIN plays p ON p.uri = pta.play_uri
154
-
WHERE p.played_time >= NOW() - INTERVAL '30 days'
155
-
GROUP BY a.mbid, a.name
156
-
ORDER BY COUNT(p.uri) DESC;
157
-
158
-
-- Top releases in the last 30 days
159
-
CREATE MATERIALIZED VIEW mv_top_releases_30days AS
160
-
SELECT
161
-
r.mbid AS release_mbid,
162
-
r.name AS release_name,
163
-
COUNT(p.uri) AS play_count
164
-
FROM releases r
165
-
INNER JOIN plays p ON p.release_mbid = r.mbid
166
-
WHERE p.played_time >= NOW() - INTERVAL '30 days'
167
-
GROUP BY r.mbid, r.name
168
-
ORDER BY COUNT(p.uri) DESC;
169
-
170
-
-- Top artists for user in the last 30 days
171
-
CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS
172
-
SELECT
173
-
prof.did,
174
-
a.mbid AS artist_mbid,
175
-
a.name AS artist_name,
176
-
COUNT(p.uri) AS play_count
177
-
FROM artists a
178
-
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
179
-
INNER JOIN plays p ON p.uri = pta.play_uri
180
-
INNER JOIN profiles prof ON prof.did = p.did
181
-
WHERE p.played_time >= NOW() - INTERVAL '30 days'
182
-
GROUP BY prof.did, a.mbid, a.name
183
-
ORDER BY COUNT(p.uri) DESC;
184
-
185
-
-- Top artists for user in the last 7 days
186
-
CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS
187
-
SELECT
188
-
prof.did,
189
-
a.mbid AS artist_mbid,
190
-
a.name AS artist_name,
191
-
COUNT(p.uri) AS play_count
192
-
FROM artists a
193
-
INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
194
-
INNER JOIN plays p ON p.uri = pta.play_uri
195
-
INNER JOIN profiles prof ON prof.did = p.did
196
-
WHERE p.played_time >= NOW() - INTERVAL '7 days'
197
-
GROUP BY prof.did, a.mbid, a.name
198
-
ORDER BY COUNT(p.uri) DESC;
199
-
200
-
-- Top releases for user in the last 30 days
201
-
CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS
202
-
SELECT
203
-
prof.did,
204
-
r.mbid AS release_mbid,
205
-
r.name AS release_name,
206
-
COUNT(p.uri) AS play_count
207
-
FROM releases r
208
-
INNER JOIN plays p ON p.release_mbid = r.mbid
209
-
INNER JOIN profiles prof ON prof.did = p.did
210
-
WHERE p.played_time >= NOW() - INTERVAL '30 days'
211
-
GROUP BY prof.did, r.mbid, r.name
212
-
ORDER BY COUNT(p.uri) DESC;
213
-
214
-
-- Top releases for user in the last 7 days
215
-
CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS
216
-
SELECT
217
-
prof.did,
218
-
r.mbid AS release_mbid,
219
-
r.name AS release_name,
220
-
COUNT(p.uri) AS play_count
221
-
FROM releases r
222
-
INNER JOIN plays p ON p.release_mbid = r.mbid
223
-
INNER JOIN profiles prof ON prof.did = p.did
224
-
WHERE p.played_time >= NOW() - INTERVAL '7 days'
225
-
GROUP BY prof.did, r.mbid, r.name
226
-
ORDER BY COUNT(p.uri) DESC;
-59
services/migrations/20241220000002_car_import_tables.sql
-59
services/migrations/20241220000002_car_import_tables.sql
···
1
-
-- CAR import functionality tables
2
-
-- For handling AT Protocol CAR file imports and processing
3
-
4
-
-- Tracks uploaded CAR files that are queued for processing
5
-
CREATE TABLE IF NOT EXISTS car_import_requests (
6
-
import_id TEXT PRIMARY KEY,
7
-
car_data_base64 TEXT NOT NULL,
8
-
status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed
9
-
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
10
-
processed_at TIMESTAMP WITH TIME ZONE,
11
-
error_message TEXT,
12
-
file_size_bytes INTEGER,
13
-
block_count INTEGER,
14
-
extracted_records_count INTEGER DEFAULT 0
15
-
);
16
-
17
-
CREATE INDEX idx_car_import_requests_status ON car_import_requests (status);
18
-
CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at);
19
-
20
-
-- Tracks raw IPLD blocks extracted from CAR files
21
-
CREATE TABLE IF NOT EXISTS car_blocks (
22
-
cid TEXT PRIMARY KEY,
23
-
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
24
-
block_data BYTEA NOT NULL,
25
-
decoded_successfully BOOLEAN DEFAULT FALSE,
26
-
collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc.
27
-
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
28
-
);
29
-
30
-
CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id);
31
-
CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type);
32
-
33
-
-- Tracks records extracted from CAR imports that were successfully processed
34
-
CREATE TABLE IF NOT EXISTS car_extracted_records (
35
-
id SERIAL PRIMARY KEY,
36
-
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
37
-
cid TEXT NOT NULL REFERENCES car_blocks(cid),
38
-
collection_type TEXT NOT NULL,
39
-
record_uri TEXT, -- AT URI if applicable (e.g., for play records)
40
-
synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123')
41
-
rkey TEXT,
42
-
extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
43
-
processing_notes TEXT
44
-
);
45
-
46
-
CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id);
47
-
CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type);
48
-
CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri);
49
-
50
-
-- Tracks import metadata and commit information
51
-
CREATE TABLE IF NOT EXISTS car_import_metadata (
52
-
import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
53
-
metadata_key TEXT NOT NULL,
54
-
metadata_value JSONB NOT NULL,
55
-
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
56
-
PRIMARY KEY (import_id, metadata_key)
57
-
);
58
-
59
-
CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);
-34
services/rocketman/Cargo.toml
-34
services/rocketman/Cargo.toml
···
1
-
[package]
2
-
name = "rocketman"
3
-
version = "0.2.3"
4
-
edition = "2021"
5
-
6
-
license = "MIT"
7
-
authors = ["Natalie B. <nat@natalie.sh>"]
8
-
repository = "https://github.com/espeon/cadet"
9
-
10
-
readme = "readme.md"
11
-
12
-
description = "A modular(ish) jetstream consumer."
13
-
14
-
[dependencies]
15
-
tokio.workspace = true
16
-
tokio-tungstenite.workspace = true
17
-
futures-util = "0.3"
18
-
url.workspace = true
19
-
rand.workspace = true
20
-
tracing.workspace = true
21
-
tracing-subscriber.workspace = true
22
-
metrics.workspace = true
23
-
derive_builder = "0.20.2"
24
-
bon = "3.3.2"
25
-
serde = { workspace = true, features = ["derive"] }
26
-
serde_json.workspace = true
27
-
flume.workspace = true
28
-
anyhow.workspace = true
29
-
async-trait.workspace = true
30
-
zstd = { version = "0.13.3", optional = true }
31
-
32
-
[features]
33
-
default = ["zstd"]
34
-
zstd = ["dep:zstd"]
-77
services/rocketman/examples/spew-bsky-posts.rs
-77
services/rocketman/examples/spew-bsky-posts.rs
···
1
-
use rocketman::{
2
-
connection::JetstreamConnection,
3
-
handler,
4
-
ingestion::LexiconIngestor,
5
-
options::JetstreamOptions,
6
-
types::event::{ Event, Commit },
7
-
};
8
-
use serde_json::Value;
9
-
use std::{
10
-
collections::HashMap,
11
-
sync::Arc,
12
-
sync::Mutex,
13
-
};
14
-
use async_trait::async_trait;
15
-
16
-
#[tokio::main]
17
-
async fn main() {
18
-
// init the builder
19
-
let opts = JetstreamOptions::builder()
20
-
// your EXACT nsids
21
-
.wanted_collections(vec!["app.bsky.feed.post".to_string()])
22
-
.build();
23
-
// create the jetstream connector
24
-
let jetstream = JetstreamConnection::new(opts);
25
-
26
-
// create your ingestors
27
-
let mut ingestors: HashMap<String, Box<dyn LexiconIngestor + Send + Sync>> = HashMap::new();
28
-
ingestors.insert(
29
-
// your EXACT nsid
30
-
"app.bsky.feed.post".to_string(),
31
-
Box::new(MyCoolIngestor),
32
-
);
33
-
34
-
35
-
// tracks the last message we've processed
36
-
let cursor: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
37
-
38
-
// get channels
39
-
let msg_rx = jetstream.get_msg_rx();
40
-
let reconnect_tx = jetstream.get_reconnect_tx();
41
-
42
-
// spawn a task to process messages from the queue.
43
-
// this is a simple implementation, you can use a more complex one based on needs.
44
-
let c_cursor = cursor.clone();
45
-
tokio::spawn(async move {
46
-
while let Ok(message) = msg_rx.recv_async().await {
47
-
if let Err(e) =
48
-
handler::handle_message(message, &ingestors, reconnect_tx.clone(), c_cursor.clone())
49
-
.await
50
-
{
51
-
eprintln!("Error processing message: {}", e);
52
-
};
53
-
}
54
-
});
55
-
56
-
// connect to jetstream
57
-
// retries internally, but may fail if there is an extreme error.
58
-
if let Err(e) = jetstream.connect(cursor.clone()).await {
59
-
eprintln!("Failed to connect to Jetstream: {}", e);
60
-
std::process::exit(1);
61
-
}
62
-
}
63
-
64
-
pub struct MyCoolIngestor;
65
-
66
-
/// A cool ingestor implementation. Will just print the message. Does not do verification.
67
-
#[async_trait]
68
-
impl LexiconIngestor for MyCoolIngestor {
69
-
async fn ingest(&self, message: Event<Value>) -> anyhow::Result<()> {
70
-
if let Some(Commit { record: Some(record), .. }) = message.commit {
71
-
if let Some(Value::String(text)) = record.get("text") {
72
-
println!("{text:?}");
73
-
}
74
-
}
75
-
Ok(())
76
-
}
77
-
}
-11
services/rocketman/package.json
-11
services/rocketman/package.json
-74
services/rocketman/readme.md
-74
services/rocketman/readme.md
···
1
-
## Rocketman
2
-
3
-
A modular(ish) jetstream consumer. Backed by Tungstenite.
4
-
5
-
6
-
### Installation
7
-
```toml
8
-
[dependencies]
9
-
rocketman = "latest" # pyt the latest version here
10
-
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
11
-
```
12
-
### Usage
13
-
```rs
14
-
#[tokio::main]
15
-
async fn main() {
16
-
// init the builder
17
-
let opts = JetstreamOptions::builder()
18
-
// your EXACT nsids
19
-
.wanted_collections(vec!["com.example.cool.nsid".to_string()])
20
-
.build();
21
-
// create the jetstream connector
22
-
let jetstream = JetstreamConnection::new(opts);
23
-
24
-
// create your ingestors
25
-
let mut ingestors: HashMap<String, Box<dyn LexiconIngestor + Send + Sync>> = HashMap::new();
26
-
ingestors.insert(
27
-
// your EXACT nsid
28
-
"com.example.cool.nsid".to_string(),
29
-
Box::new(MyCoolIngestor),
30
-
);
31
-
32
-
33
-
// tracks the last message we've processed
34
-
let cursor: Arc<Mutex<Option<u64>>> = Arc::new(Mutex::new(None));
35
-
36
-
// get channels
37
-
let msg_rx = jetstream.get_msg_rx();
38
-
let reconnect_tx = jetstream.get_reconnect_tx();
39
-
40
-
// spawn a task to process messages from the queue.
41
-
// this is a simple implementation, you can use a more complex one based on needs.
42
-
let c_cursor = cursor.clone();
43
-
tokio::spawn(async move {
44
-
while let Ok(message) = msg_rx.recv_async().await {
45
-
if let Err(e) =
46
-
handler::handle_message(message, &ingestors, reconnect_tx.clone(), c_cursor.clone())
47
-
.await
48
-
{
49
-
error!("Error processing message: {}", e);
50
-
};
51
-
}
52
-
});
53
-
54
-
// connect to jetstream
55
-
// retries internally, but may fail if there is an extreme error.
56
-
if let Err(e) = jetstream.connect(cursor.clone()).await {
57
-
error!("Failed to connect to Jetstream: {}", e);
58
-
std::process::exit(1);
59
-
}
60
-
}
61
-
62
-
pub struct MyCoolIngestor;
63
-
64
-
/// A cool ingestor implementation. Will just print the message. Does not do verification.
65
-
impl LexiconIngestor for MyCoolIngestor {
66
-
async fn ingest(&self, message: Event<Value>) -> Result<()> {
67
-
info!("{:?}", message);
68
-
// Process message for default lexicon.
69
-
Ok(())
70
-
}
71
-
}
72
-
```
73
-
### gratz
74
-
Based heavily on [phil's jetstream consumer on atcosm constellation.](https://github.com/atcosm/links/blob/main/constellation/src/consumer/jetstream.rs)
-335
services/rocketman/src/connection.rs
-335
services/rocketman/src/connection.rs
···
1
-
use flume::{Receiver, Sender};
2
-
use futures_util::StreamExt;
3
-
use metrics::{counter, describe_counter, describe_histogram, histogram, Unit};
4
-
use std::cmp::{max, min};
5
-
use std::sync::{Arc, Mutex};
6
-
use std::time::Instant;
7
-
use tokio::time::{sleep, Duration};
8
-
use tokio_tungstenite::{connect_async, tungstenite::Message};
9
-
use tracing::{error, info};
10
-
use url::Url;
11
-
12
-
use crate::options::JetstreamOptions;
13
-
use crate::time::system_time::SystemTimeProvider;
14
-
use crate::time::TimeProvider;
15
-
16
-
pub struct JetstreamConnection {
17
-
pub opts: JetstreamOptions,
18
-
reconnect_tx: flume::Sender<()>,
19
-
reconnect_rx: flume::Receiver<()>,
20
-
msg_tx: flume::Sender<Message>,
21
-
msg_rx: flume::Receiver<Message>,
22
-
}
23
-
24
-
impl JetstreamConnection {
25
-
pub fn new(opts: JetstreamOptions) -> Self {
26
-
let (reconnect_tx, reconnect_rx) = flume::bounded(opts.bound);
27
-
let (msg_tx, msg_rx) = flume::bounded(opts.bound);
28
-
Self {
29
-
opts,
30
-
reconnect_tx,
31
-
reconnect_rx,
32
-
msg_tx,
33
-
msg_rx,
34
-
}
35
-
}
36
-
37
-
pub fn get_reconnect_tx(&self) -> Sender<()> {
38
-
self.reconnect_tx.clone()
39
-
}
40
-
41
-
pub fn get_msg_rx(&self) -> Receiver<Message> {
42
-
self.msg_rx.clone()
43
-
}
44
-
45
-
fn build_ws_url(&self, cursor: Arc<Mutex<Option<u64>>>) -> String {
46
-
let mut url = Url::parse(&self.opts.ws_url.to_string()).unwrap();
47
-
48
-
// Append query params
49
-
if let Some(ref cols) = self.opts.wanted_collections {
50
-
for col in cols {
51
-
url.query_pairs_mut().append_pair("wantedCollections", col);
52
-
}
53
-
}
54
-
if let Some(ref dids) = self.opts.wanted_dids {
55
-
for did in dids {
56
-
url.query_pairs_mut().append_pair("wantedDids", did);
57
-
}
58
-
}
59
-
if let Some(cursor) = cursor.lock().unwrap().as_ref() {
60
-
url.query_pairs_mut()
61
-
.append_pair("cursor", &cursor.to_string());
62
-
}
63
-
#[cfg(feature = "zstd")]
64
-
if self.opts.compress {
65
-
url.query_pairs_mut().append_pair("compress", "true");
66
-
}
67
-
68
-
url.to_string()
69
-
}
70
-
71
-
pub async fn connect(
72
-
&self,
73
-
cursor: Arc<Mutex<Option<u64>>>,
74
-
) -> Result<(), Box<dyn std::error::Error>> {
75
-
describe_counter!(
76
-
"jetstream.connection.attempt",
77
-
Unit::Count,
78
-
"attempts to connect to jetstream service"
79
-
);
80
-
describe_counter!(
81
-
"jetstream.connection.error",
82
-
Unit::Count,
83
-
"errors connecting to jetstream service"
84
-
);
85
-
describe_histogram!(
86
-
"jetstream.connection.duration",
87
-
Unit::Seconds,
88
-
"Time connected to jetstream service"
89
-
);
90
-
describe_counter!(
91
-
"jetstream.connection.reconnect",
92
-
Unit::Count,
93
-
"reconnects to jetstream service"
94
-
);
95
-
let mut retry_interval = 1;
96
-
97
-
let time_provider = SystemTimeProvider::new();
98
-
99
-
let mut start_time = time_provider.now();
100
-
101
-
loop {
102
-
counter!("jetstream.connection.attempt").increment(1);
103
-
info!("Connecting to {}", self.opts.ws_url);
104
-
let start = Instant::now();
105
-
106
-
let ws_url = self.build_ws_url(cursor.clone());
107
-
108
-
match connect_async(ws_url).await {
109
-
Ok((ws_stream, response)) => {
110
-
let elapsed = start.elapsed();
111
-
info!("Connected. HTTP status: {}", response.status());
112
-
113
-
let (_, mut read) = ws_stream.split();
114
-
115
-
loop {
116
-
// Inner loop to handle messages, reconnect signals, and receive timeout
117
-
let receive_timeout =
118
-
sleep(Duration::from_secs(self.opts.timeout_time_sec as u64));
119
-
tokio::pin!(receive_timeout);
120
-
121
-
loop {
122
-
tokio::select! {
123
-
message_result = read.next() => {
124
-
match message_result {
125
-
Some(message) => {
126
-
// Reset timeout on message received
127
-
receive_timeout.as_mut().reset(tokio::time::Instant::now() + Duration::from_secs(self.opts.timeout_time_sec as u64));
128
-
129
-
histogram!("jetstream.connection.duration").record(elapsed.as_secs_f64());
130
-
match message {
131
-
Ok(message) => {
132
-
if let Err(err) = self.msg_tx.send_async(message).await {
133
-
counter!("jetstream.error").increment(1);
134
-
error!("Failed to queue message: {}", err);
135
-
}
136
-
}
137
-
Err(e) => {
138
-
counter!("jetstream.error").increment(1);
139
-
error!("Error: {}", e);
140
-
}
141
-
}
142
-
}
143
-
None => {
144
-
info!("Stream closed by server.");
145
-
counter!("jetstream.connection.reconnect").increment(1);
146
-
break; // Stream ended, break inner loop to reconnect
147
-
}
148
-
}
149
-
}
150
-
_ = self.reconnect_rx.recv_async() => {
151
-
info!("Reconnect signal received.");
152
-
counter!("jetstream.connection.reconnect").increment(1);
153
-
break;
154
-
}
155
-
_ = &mut receive_timeout => {
156
-
// last final poll, just in case
157
-
match read.next().await {
158
-
Some(Ok(message)) => {
159
-
if let Err(err) = self.msg_tx.send_async(message).await {
160
-
counter!("jetstream.error").increment(1);
161
-
error!("Failed to queue message: {}", err);
162
-
}
163
-
// Reset timeout to continue
164
-
receive_timeout.as_mut().reset(tokio::time::Instant::now() + Duration::from_secs(self.opts.timeout_time_sec as u64));
165
-
}
166
-
Some(Err(e)) => {
167
-
counter!("jetstream.error").increment(1);
168
-
error!("Error receiving message during final poll: {}", e);
169
-
counter!("jetstream.connection.reconnect").increment(1);
170
-
break;
171
-
}
172
-
None => {
173
-
info!("No commits received in {} seconds, reconnecting.", self.opts.timeout_time_sec);
174
-
counter!("jetstream.connection.reconnect").increment(1);
175
-
break;
176
-
}
177
-
}
178
-
}
179
-
}
180
-
}
181
-
}
182
-
}
183
-
Err(e) => {
184
-
let elapsed_time = time_provider.elapsed(start_time);
185
-
// reset if time connected > the time we set
186
-
if elapsed_time.as_secs() > self.opts.max_retry_interval_seconds {
187
-
retry_interval = 0;
188
-
start_time = time_provider.now();
189
-
}
190
-
counter!("jetstream.connection.error").increment(1);
191
-
error!("Connection error: {}", e);
192
-
}
193
-
}
194
-
195
-
let sleep_time = max(1, min(self.opts.max_retry_interval_seconds, retry_interval));
196
-
info!("Reconnecting in {} seconds...", sleep_time);
197
-
sleep(Duration::from_secs(sleep_time)).await;
198
-
199
-
if retry_interval > self.opts.max_retry_interval_seconds {
200
-
retry_interval = self.opts.max_retry_interval_seconds;
201
-
} else {
202
-
retry_interval *= 2;
203
-
}
204
-
}
205
-
}
206
-
207
-
pub fn force_reconnect(&self) -> Result<(), flume::SendError<()>> {
208
-
info!("Force reconnect requested.");
209
-
self.reconnect_tx.send(()) // Send a reconnect signal
210
-
}
211
-
}
212
-
213
-
#[cfg(test)]
214
-
mod tests {
215
-
use super::*;
216
-
use std::sync::{Arc, Mutex};
217
-
use tokio::task;
218
-
use tokio::time::{timeout, Duration};
219
-
use tokio_tungstenite::tungstenite::Message;
220
-
221
-
#[test]
222
-
fn test_build_ws_url() {
223
-
let opts = JetstreamOptions {
224
-
wanted_collections: Some(vec!["col1".to_string(), "col2".to_string()]),
225
-
wanted_dids: Some(vec!["did1".to_string()]),
226
-
..Default::default()
227
-
};
228
-
let connection = JetstreamConnection::new(opts);
229
-
230
-
let test = Arc::new(Mutex::new(Some(8373)));
231
-
232
-
let url = connection.build_ws_url(test);
233
-
234
-
assert!(url.starts_with("wss://"));
235
-
assert!(url.contains("cursor=8373"));
236
-
assert!(url.contains("wantedCollections=col1"));
237
-
assert!(url.contains("wantedCollections=col2"));
238
-
assert!(url.contains("wantedDids=did1"));
239
-
}
240
-
241
-
#[tokio::test]
242
-
async fn test_force_reconnect() {
243
-
let opts = JetstreamOptions::default();
244
-
let connection = JetstreamConnection::new(opts);
245
-
246
-
// Spawn a task to listen for the reconnect signal
247
-
let reconnect_rx = connection.reconnect_rx.clone();
248
-
let recv_task = task::spawn(async move {
249
-
reconnect_rx
250
-
.recv_async()
251
-
.await
252
-
.expect("Failed to receive reconnect signal");
253
-
});
254
-
255
-
connection
256
-
.force_reconnect()
257
-
.expect("Failed to send reconnect signal");
258
-
259
-
// Ensure reconnect signal was received
260
-
assert!(recv_task.await.is_ok());
261
-
}
262
-
263
-
#[tokio::test]
264
-
async fn test_message_queue() {
265
-
let opts = JetstreamOptions::default();
266
-
let connection = JetstreamConnection::new(opts);
267
-
268
-
let msg_rx = connection.get_msg_rx();
269
-
let msg = Message::Text("test message".into());
270
-
271
-
// Send a message to the queue
272
-
connection
273
-
.msg_tx
274
-
.send_async(msg.clone())
275
-
.await
276
-
.expect("Failed to send message");
277
-
278
-
// Receive and verify the message
279
-
let received = msg_rx
280
-
.recv_async()
281
-
.await
282
-
.expect("Failed to receive message");
283
-
assert_eq!(received, msg);
284
-
}
285
-
286
-
#[tokio::test]
287
-
async fn test_connection_retries_on_failure() {
288
-
let opts = JetstreamOptions::default();
289
-
let connection = Arc::new(JetstreamConnection::new(opts));
290
-
291
-
let cursor = Arc::new(Mutex::new(None));
292
-
293
-
// Timeout to prevent infinite loop
294
-
let result = timeout(Duration::from_secs(3), connection.connect(cursor)).await;
295
-
296
-
assert!(result.is_err(), "Expected timeout due to retry logic");
297
-
}
298
-
299
-
#[tokio::test]
300
-
async fn test_reconnect_after_receive_timeout() {
301
-
use tokio::net::TcpListener;
302
-
use tokio_tungstenite::accept_async;
303
-
304
-
let opts = JetstreamOptions {
305
-
ws_url: crate::endpoints::JetstreamEndpoints::Custom("ws://127.0.0.1:9001".to_string()),
306
-
bound: 5,
307
-
max_retry_interval_seconds: 1,
308
-
..Default::default()
309
-
};
310
-
let connection = JetstreamConnection::new(opts);
311
-
let cursor = Arc::new(Mutex::new(None));
312
-
313
-
// set up dummy "websocket"
314
-
let listener = TcpListener::bind("127.0.0.1:9001")
315
-
.await
316
-
.expect("Failed to bind");
317
-
let server_handle = tokio::spawn(async move {
318
-
if let Ok((stream, _)) = listener.accept().await {
319
-
let ws_stream = accept_async(stream).await.expect("Failed to accept");
320
-
// send nothing
321
-
tokio::time::sleep(Duration::from_secs(6)).await;
322
-
drop(ws_stream);
323
-
}
324
-
});
325
-
326
-
// spawn, then run for >30 seconds to trigger reconnect
327
-
let connect_handle = tokio::spawn(async move {
328
-
tokio::time::timeout(Duration::from_secs(5), connection.connect(cursor))
329
-
.await
330
-
.ok();
331
-
});
332
-
333
-
let _ = tokio::join!(server_handle, connect_handle);
334
-
}
335
-
}
-65
services/rocketman/src/endpoints.rs
-65
services/rocketman/src/endpoints.rs
···
1
-
use std::fmt::{Display, Formatter, Result};
2
-
3
-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
4
-
pub enum JetstreamEndpointLocations {
5
-
UsEast,
6
-
UsWest,
7
-
}
8
-
9
-
impl Display for JetstreamEndpointLocations {
10
-
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
11
-
write!(
12
-
f,
13
-
"{}",
14
-
match self {
15
-
Self::UsEast => "us-east",
16
-
Self::UsWest => "us-west",
17
-
}
18
-
)
19
-
}
20
-
}
21
-
22
-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23
-
pub enum JetstreamEndpoints {
24
-
Public(JetstreamEndpointLocations, i8),
25
-
Custom(String),
26
-
}
27
-
28
-
impl Display for JetstreamEndpoints {
29
-
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
30
-
match self {
31
-
Self::Public(location, id) => write!(
32
-
f,
33
-
"wss://jetstream{}.{}.bsky.network/subscribe",
34
-
id, location
35
-
),
36
-
Self::Custom(url) => write!(f, "{}", url),
37
-
}
38
-
}
39
-
}
40
-
41
-
impl Default for JetstreamEndpoints {
42
-
fn default() -> Self {
43
-
Self::Public(JetstreamEndpointLocations::UsEast, 2)
44
-
}
45
-
}
46
-
47
-
#[cfg(test)]
48
-
mod tests {
49
-
use super::*;
50
-
51
-
#[test]
52
-
fn test_display_public() {
53
-
let endpoint = JetstreamEndpoints::Public(JetstreamEndpointLocations::UsEast, 2);
54
-
assert_eq!(
55
-
endpoint.to_string(),
56
-
"wss://jetstream2.us-east.bsky.network/subscribe"
57
-
);
58
-
}
59
-
60
-
#[test]
61
-
fn test_display_custom() {
62
-
let endpoint = JetstreamEndpoints::Custom("wss://custom.bsky.network/subscribe".into());
63
-
assert_eq!(endpoint.to_string(), "wss://custom.bsky.network/subscribe");
64
-
}
65
-
}
-1
services/rocketman/src/err.rs
-1
services/rocketman/src/err.rs
···
1
-
// TODO: error types instead of using anyhow
-452
services/rocketman/src/handler.rs
-452
services/rocketman/src/handler.rs
···
1
-
use anyhow::Result;
2
-
use flume::Sender;
3
-
use metrics::{counter, describe_counter, Unit};
4
-
use serde_json::Value;
5
-
use std::{
6
-
collections::HashMap,
7
-
sync::{Arc, Mutex},
8
-
};
9
-
use tokio_tungstenite::tungstenite::{Error, Message};
10
-
use tracing::{debug, error};
11
-
12
-
#[cfg(feature = "zstd")]
13
-
use std::io::Cursor as IoCursor;
14
-
#[cfg(feature = "zstd")]
15
-
use std::sync::LazyLock;
16
-
#[cfg(feature = "zstd")]
17
-
use zstd::dict::DecoderDictionary;
18
-
19
-
use crate::{
20
-
ingestion::LexiconIngestor,
21
-
types::event::{Event, Kind},
22
-
};
23
-
24
-
/// The custom `zstd` dictionary used for decoding compressed Jetstream messages.
25
-
///
26
-
/// Sourced from the [official Bluesky Jetstream repo.](https://github.com/bluesky-social/jetstream/tree/main/pkg/models)
27
-
#[cfg(feature = "zstd")]
28
-
static ZSTD_DICTIONARY: LazyLock<DecoderDictionary> =
29
-
LazyLock::new(|| DecoderDictionary::copy(include_bytes!("../zstd/dictionary")));
30
-
31
-
pub async fn handle_message(
32
-
message: Message,
33
-
ingestors: &HashMap<String, Box<dyn LexiconIngestor + Send + Sync>>,
34
-
reconnect_tx: Sender<()>,
35
-
cursor: Arc<Mutex<Option<u64>>>,
36
-
) -> Result<()> {
37
-
describe_counter!(
38
-
"jetstream.event",
39
-
Unit::Count,
40
-
"number of event ingest attempts"
41
-
);
42
-
describe_counter!(
43
-
"jetstream.event.parse",
44
-
Unit::Count,
45
-
"events that were successfully processed"
46
-
);
47
-
describe_counter!(
48
-
"jetstream.event.fail",
49
-
Unit::Count,
50
-
"events that could not be read"
51
-
);
52
-
describe_counter!("jetstream.error", Unit::Count, "errors encountered");
53
-
match message {
54
-
Message::Text(text) => {
55
-
debug!("Text message received");
56
-
counter!("jetstream.event").increment(1);
57
-
let envelope: Event<Value> = serde_json::from_str(&text).map_err(|e| {
58
-
anyhow::anyhow!("Failed to parse message: {} with json string {}", e, text)
59
-
})?;
60
-
debug!("envelope: {:?}", envelope);
61
-
handle_envelope(envelope, cursor, ingestors).await?;
62
-
Ok(())
63
-
}
64
-
#[cfg(feature = "zstd")]
65
-
Message::Binary(bytes) => {
66
-
debug!("Binary message received");
67
-
counter!("jetstream.event").increment(1);
68
-
let decoder = zstd::stream::Decoder::with_prepared_dictionary(
69
-
IoCursor::new(bytes),
70
-
&*ZSTD_DICTIONARY,
71
-
)?;
72
-
let envelope: Event<Value> = serde_json::from_reader(decoder)
73
-
.map_err(|e| anyhow::anyhow!("Failed to parse binary message: {}", e))?;
74
-
debug!("envelope: {:?}", envelope);
75
-
handle_envelope(envelope, cursor, ingestors).await?;
76
-
Ok(())
77
-
}
78
-
#[cfg(not(feature = "zstd"))]
79
-
Message::Binary(_) => {
80
-
debug!("Binary message received");
81
-
Err(anyhow::anyhow!(
82
-
"binary message received but zstd feature is not enabled"
83
-
))
84
-
}
85
-
Message::Close(_) => {
86
-
debug!("Server closed connection");
87
-
if let Err(e) = reconnect_tx.send(()) {
88
-
counter!("jetstream.event.parse.error", "error" => "failed_to_send_reconnect_signal").increment(1);
89
-
error!("Failed to send reconnect signal: {}", e);
90
-
}
91
-
Err(Error::ConnectionClosed.into())
92
-
}
93
-
_ => Ok(()),
94
-
}
95
-
}
96
-
97
-
async fn handle_envelope(
98
-
envelope: Event<Value>,
99
-
cursor: Arc<Mutex<Option<u64>>>,
100
-
ingestors: &HashMap<String, Box<dyn LexiconIngestor + Send + Sync>>,
101
-
) -> Result<()> {
102
-
if let Some(ref time_us) = envelope.time_us {
103
-
debug!("Time: {}", time_us);
104
-
if let Some(cursor) = cursor.lock().unwrap().as_mut() {
105
-
debug!("Cursor: {}", cursor);
106
-
if time_us > cursor {
107
-
debug!("Cursor is behind, resetting");
108
-
*cursor = *time_us;
109
-
}
110
-
}
111
-
}
112
-
113
-
match envelope.kind {
114
-
Kind::Commit => match extract_commit_nsid(&envelope) {
115
-
Ok(nsid) => {
116
-
if let Some(fun) = ingestors.get(&nsid) {
117
-
match fun.ingest(envelope).await {
118
-
Ok(_) => {
119
-
counter!("jetstream.event.parse.commit", "nsid" => nsid).increment(1)
120
-
}
121
-
Err(e) => {
122
-
error!("Error ingesting commit with nsid {}: {}", nsid, e);
123
-
counter!("jetstream.error").increment(1);
124
-
counter!("jetstream.event.fail").increment(1);
125
-
}
126
-
}
127
-
}
128
-
}
129
-
Err(e) => error!("Error parsing commit: {}", e),
130
-
},
131
-
Kind::Identity => {
132
-
counter!("jetstream.event.parse.identity").increment(1);
133
-
}
134
-
Kind::Account => {
135
-
counter!("jetstream.event.parse.account").increment(1);
136
-
}
137
-
Kind::Unknown(kind) => {
138
-
counter!("jetstream.event.parse.unknown", "kind" => kind).increment(1);
139
-
}
140
-
}
141
-
Ok(())
142
-
}
143
-
144
-
fn extract_commit_nsid(envelope: &Event<Value>) -> anyhow::Result<String> {
145
-
// if the type is not a commit
146
-
if envelope.commit.is_none() {
147
-
return Err(anyhow::anyhow!(
148
-
"Message has no commit, so there is no nsid attached."
149
-
));
150
-
} else if let Some(ref commit) = envelope.commit {
151
-
return Ok(commit.collection.clone());
152
-
}
153
-
154
-
Err(anyhow::anyhow!("Failed to extract nsid: unknown error"))
155
-
}
156
-
157
-
#[cfg(test)]
158
-
mod tests {
159
-
use super::*;
160
-
use crate::types::event::Event;
161
-
use anyhow::Result;
162
-
use async_trait::async_trait;
163
-
use flume::{Receiver, Sender};
164
-
use serde_json::json;
165
-
use std::{
166
-
collections::HashMap,
167
-
sync::{Arc, Mutex},
168
-
};
169
-
use tokio_tungstenite::tungstenite::Message;
170
-
171
-
// Dummy ingestor that records if it was called.
172
-
struct DummyIngestor {
173
-
pub called: Arc<Mutex<bool>>,
174
-
}
175
-
176
-
#[async_trait]
177
-
impl crate::ingestion::LexiconIngestor for DummyIngestor {
178
-
async fn ingest(&self, _event: Event<serde_json::Value>) -> Result<(), anyhow::Error> {
179
-
let mut called = self.called.lock().unwrap();
180
-
*called = true;
181
-
Ok(())
182
-
}
183
-
}
184
-
185
-
// Dummy ingestor that always returns an error.
186
-
struct ErrorIngestor;
187
-
188
-
#[async_trait]
189
-
impl crate::ingestion::LexiconIngestor for ErrorIngestor {
190
-
async fn ingest(&self, _event: Event<serde_json::Value>) -> Result<(), anyhow::Error> {
191
-
Err(anyhow::anyhow!("Ingest error"))
192
-
}
193
-
}
194
-
195
-
// Helper to create a reconnect channel.
196
-
fn setup_reconnect_channel() -> (Sender<()>, Receiver<()>) {
197
-
flume::unbounded()
198
-
}
199
-
200
-
#[tokio::test]
201
-
async fn test_valid_commit_success() {
202
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
203
-
let cursor = Arc::new(Mutex::new(Some(100)));
204
-
let called_flag = Arc::new(Mutex::new(false));
205
-
206
-
// Create a valid commit event JSON.
207
-
let event_json = json!({
208
-
"did": "did:example:123",
209
-
"time_us": 200,
210
-
"kind": "commit",
211
-
"commit": {
212
-
"rev": "1",
213
-
"operation": "create",
214
-
"collection": "ns1",
215
-
"rkey": "rkey1",
216
-
"record": { "foo": "bar" },
217
-
"cid": "cid123"
218
-
},
219
-
})
220
-
.to_string();
221
-
222
-
let mut ingestors: HashMap<
223
-
String,
224
-
Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>,
225
-
> = HashMap::new();
226
-
ingestors.insert(
227
-
"ns1".to_string(),
228
-
Box::new(DummyIngestor {
229
-
called: called_flag.clone(),
230
-
}),
231
-
);
232
-
233
-
let result = handle_message(
234
-
Message::Text(event_json),
235
-
&ingestors,
236
-
reconnect_tx,
237
-
cursor.clone(),
238
-
)
239
-
.await;
240
-
assert!(result.is_ok());
241
-
// Check that the ingestor was called.
242
-
assert!(*called_flag.lock().unwrap());
243
-
// Verify that the cursor got updated.
244
-
assert_eq!(*cursor.lock().unwrap(), Some(200));
245
-
}
246
-
247
-
#[cfg(feature = "zstd")]
248
-
#[tokio::test]
249
-
async fn test_binary_valid_commit() {
250
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
251
-
let cursor = Arc::new(Mutex::new(Some(100)));
252
-
let called_flag = Arc::new(Mutex::new(false));
253
-
254
-
let uncompressed_json = json!({
255
-
"did": "did:example:123",
256
-
"time_us": 200,
257
-
"kind": "commit",
258
-
"commit": {
259
-
"rev": "1",
260
-
"operation": "create",
261
-
"collection": "ns1",
262
-
"rkey": "rkey1",
263
-
"record": { "foo": "bar" },
264
-
"cid": "cid123"
265
-
},
266
-
})
267
-
.to_string();
268
-
269
-
let compressed_dest: IoCursor<Vec<u8>> = IoCursor::new(vec![]);
270
-
let mut encoder = zstd::Encoder::with_prepared_dictionary(
271
-
compressed_dest,
272
-
&zstd::dict::EncoderDictionary::copy(include_bytes!("../zstd/dictionary"), 0),
273
-
)
274
-
.unwrap();
275
-
std::io::copy(
276
-
&mut IoCursor::new(uncompressed_json.as_bytes()),
277
-
&mut encoder,
278
-
)
279
-
.unwrap();
280
-
let compressed_dest = encoder.finish().unwrap();
281
-
282
-
let mut ingestors: HashMap<
283
-
String,
284
-
Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>,
285
-
> = HashMap::new();
286
-
ingestors.insert(
287
-
"ns1".to_string(),
288
-
Box::new(DummyIngestor {
289
-
called: called_flag.clone(),
290
-
}),
291
-
);
292
-
293
-
let result = handle_message(
294
-
Message::Binary(compressed_dest.into_inner()),
295
-
&ingestors,
296
-
reconnect_tx,
297
-
cursor.clone(),
298
-
)
299
-
.await;
300
-
301
-
assert!(result.is_ok());
302
-
// Check that the ingestor was called.
303
-
assert!(*called_flag.lock().unwrap());
304
-
// Verify that the cursor got updated.
305
-
assert_eq!(*cursor.lock().unwrap(), Some(200));
306
-
}
307
-
308
-
#[tokio::test]
309
-
async fn test_commit_ingest_failure() {
310
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
311
-
let cursor = Arc::new(Mutex::new(Some(100)));
312
-
313
-
// Valid commit event with an ingestor that fails.
314
-
let event_json = json!({
315
-
"did": "did:example:123",
316
-
"time_us": 300,
317
-
"kind": "commit",
318
-
"commit": {
319
-
"rev": "1",
320
-
"operation": "create",
321
-
"collection": "ns_error",
322
-
"rkey": "rkey1",
323
-
"record": { "foo": "bar" },
324
-
"cid": "cid123"
325
-
},
326
-
"identity": null
327
-
})
328
-
.to_string();
329
-
330
-
let mut ingestors: HashMap<
331
-
String,
332
-
Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>,
333
-
> = HashMap::new();
334
-
ingestors.insert("ns_error".to_string(), Box::new(ErrorIngestor));
335
-
336
-
// Even though ingestion fails, handle_message returns Ok(()).
337
-
let result = handle_message(
338
-
Message::Text(event_json),
339
-
&ingestors,
340
-
reconnect_tx,
341
-
cursor.clone(),
342
-
)
343
-
.await;
344
-
assert!(result.is_ok());
345
-
// Cursor should still update because it comes before the ingest call.
346
-
assert_eq!(*cursor.lock().unwrap(), Some(300));
347
-
}
348
-
349
-
#[tokio::test]
350
-
async fn test_identity_message() {
351
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
352
-
let cursor = Arc::new(Mutex::new(None));
353
-
// Valid identity event.
354
-
let event_json = json!({
355
-
"did": "did:example:123",
356
-
"time_us": 150,
357
-
"kind": "identity",
358
-
"commit": null,
359
-
"identity": {
360
-
"did": "did:example:123",
361
-
"handle": "user",
362
-
"seq": 1,
363
-
"time": "2025-01-01T00:00:00Z"
364
-
}
365
-
})
366
-
.to_string();
367
-
let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> =
368
-
HashMap::new();
369
-
370
-
let result =
371
-
handle_message(Message::Text(event_json), &ingestors, reconnect_tx, cursor).await;
372
-
assert!(result.is_ok());
373
-
}
374
-
375
-
#[tokio::test]
376
-
async fn test_close_message() {
377
-
let (reconnect_tx, reconnect_rx) = setup_reconnect_channel();
378
-
let cursor = Arc::new(Mutex::new(None));
379
-
let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> =
380
-
HashMap::new();
381
-
382
-
let result = handle_message(Message::Close(None), &ingestors, reconnect_tx, cursor).await;
383
-
// Should return an error due to connection close.
384
-
assert!(result.is_err());
385
-
// Verify that a reconnect signal was sent.
386
-
let signal = reconnect_rx.recv_async().await;
387
-
assert!(signal.is_ok());
388
-
}
389
-
390
-
#[tokio::test]
391
-
async fn test_invalid_json() {
392
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
393
-
let cursor = Arc::new(Mutex::new(None));
394
-
let ingestors: HashMap<String, Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>> =
395
-
HashMap::new();
396
-
397
-
let invalid_json = "this is not json".to_string();
398
-
let result = handle_message(
399
-
Message::Text(invalid_json),
400
-
&ingestors,
401
-
reconnect_tx,
402
-
cursor,
403
-
)
404
-
.await;
405
-
assert!(result.is_err());
406
-
}
407
-
408
-
#[tokio::test]
409
-
async fn test_cursor_not_updated_if_lower() {
410
-
let (reconnect_tx, _reconnect_rx) = setup_reconnect_channel();
411
-
// Set an initial cursor value.
412
-
let cursor = Arc::new(Mutex::new(Some(300)));
413
-
let event_json = json!({
414
-
"did": "did:example:123",
415
-
"time_us": 200,
416
-
"kind": "commit",
417
-
"commit": {
418
-
"rev": "1",
419
-
"operation": "create",
420
-
"collection": "ns1",
421
-
"rkey": "rkey1",
422
-
"record": { "foo": "bar" },
423
-
"cid": "cid123"
424
-
},
425
-
"identity": null
426
-
})
427
-
.to_string();
428
-
429
-
// Use a dummy ingestor that does nothing.
430
-
let mut ingestors: HashMap<
431
-
String,
432
-
Box<dyn crate::ingestion::LexiconIngestor + Send + Sync>,
433
-
> = HashMap::new();
434
-
ingestors.insert(
435
-
"ns1".to_string(),
436
-
Box::new(DummyIngestor {
437
-
called: Arc::new(Mutex::new(false)),
438
-
}),
439
-
);
440
-
441
-
let result = handle_message(
442
-
Message::Text(event_json),
443
-
&ingestors,
444
-
reconnect_tx,
445
-
cursor.clone(),
446
-
)
447
-
.await;
448
-
assert!(result.is_ok());
449
-
// Cursor should remain unchanged.
450
-
assert_eq!(*cursor.lock().unwrap(), Some(300));
451
-
}
452
-
}
-22
services/rocketman/src/ingestion.rs
-22
services/rocketman/src/ingestion.rs
···
1
-
use anyhow::Result;
2
-
use async_trait::async_trait;
3
-
use serde_json::Value;
4
-
use tracing::info;
5
-
6
-
use crate::types::event::Event;
7
-
8
-
#[async_trait]
9
-
pub trait LexiconIngestor {
10
-
async fn ingest(&self, message: Event<Value>) -> Result<()>;
11
-
}
12
-
13
-
pub struct DefaultLexiconIngestor;
14
-
15
-
#[async_trait]
16
-
impl LexiconIngestor for DefaultLexiconIngestor {
17
-
async fn ingest(&self, message: Event<Value>) -> Result<()> {
18
-
info!("Default lexicon processing: {:?}", message);
19
-
// Process message for default lexicon.
20
-
Ok(())
21
-
}
22
-
}
-8
services/rocketman/src/lib.rs
-8
services/rocketman/src/lib.rs
-40
services/rocketman/src/options.rs
-40
services/rocketman/src/options.rs
···
1
-
use bon::Builder;
2
-
3
-
use crate::endpoints::JetstreamEndpoints;
4
-
5
-
#[derive(Builder, Debug)]
6
-
pub struct JetstreamOptions {
7
-
#[builder(default)]
8
-
pub ws_url: JetstreamEndpoints,
9
-
#[builder(default)]
10
-
pub max_retry_interval_seconds: u64,
11
-
#[builder(default)]
12
-
pub connection_success_time_seconds: u64,
13
-
#[builder(default)]
14
-
pub bound: usize,
15
-
#[builder(default)]
16
-
pub timeout_time_sec: usize,
17
-
#[cfg(feature = "zstd")]
18
-
#[builder(default = true)]
19
-
pub compress: bool,
20
-
pub wanted_collections: Option<Vec<String>>,
21
-
pub wanted_dids: Option<Vec<String>>,
22
-
pub cursor: Option<String>,
23
-
}
24
-
25
-
impl Default for JetstreamOptions {
26
-
fn default() -> Self {
27
-
Self {
28
-
ws_url: JetstreamEndpoints::default(),
29
-
max_retry_interval_seconds: 120,
30
-
connection_success_time_seconds: 60,
31
-
bound: 65536,
32
-
timeout_time_sec: 40,
33
-
#[cfg(feature = "zstd")]
34
-
compress: true,
35
-
wanted_collections: None,
36
-
wanted_dids: None,
37
-
cursor: None,
38
-
}
39
-
}
40
-
}
-11
services/rocketman/src/time/mod.rs
-11
services/rocketman/src/time/mod.rs
···
1
-
use std::time::{Duration, Instant, SystemTime};
2
-
3
-
pub mod system_time;
4
-
5
-
pub trait TimeProvider {
6
-
fn new() -> Self;
7
-
fn now(&self) -> SystemTime; // Get the current time
8
-
fn elapsed(&self, earlier: SystemTime) -> Duration; // Calculate the elapsed time.
9
-
fn instant_now(&self) -> Instant; // For compatibility with your existing code (if needed)
10
-
fn instant_elapsed(&self, earlier: Instant) -> Duration;
11
-
}
-28
services/rocketman/src/time/system_time.rs
-28
services/rocketman/src/time/system_time.rs
···
1
-
use std::time::{Duration, Instant, SystemTime};
2
-
3
-
use super::TimeProvider;
4
-
5
-
#[derive(Default, Clone, Copy)] // Add these derives for ease of use
6
-
pub struct SystemTimeProvider; // No fields needed, just a marker type
7
-
8
-
impl TimeProvider for SystemTimeProvider {
9
-
fn new() -> Self {
10
-
Self
11
-
}
12
-
13
-
fn now(&self) -> SystemTime {
14
-
SystemTime::now()
15
-
}
16
-
17
-
fn elapsed(&self, earlier: SystemTime) -> Duration {
18
-
earlier.elapsed().unwrap_or_else(|_| Duration::from_secs(0))
19
-
}
20
-
21
-
fn instant_now(&self) -> Instant {
22
-
Instant::now()
23
-
}
24
-
25
-
fn instant_elapsed(&self, earlier: Instant) -> Duration {
26
-
earlier.elapsed()
27
-
}
28
-
}
-116
services/rocketman/src/types/event.rs
-116
services/rocketman/src/types/event.rs
···
1
-
use serde::{Deserialize, Deserializer, Serialize};
2
-
3
-
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
4
-
#[serde(rename_all = "lowercase")]
5
-
pub enum Kind {
6
-
Account,
7
-
Identity,
8
-
Commit,
9
-
Unknown(String),
10
-
}
11
-
12
-
#[derive(Debug, Serialize, Deserialize)]
13
-
#[serde(rename_all = "snake_case")]
14
-
pub struct Event<T> {
15
-
pub did: String,
16
-
pub time_us: Option<u64>,
17
-
pub kind: Kind,
18
-
pub commit: Option<Commit<T>>,
19
-
pub identity: Option<Identity>,
20
-
}
21
-
22
-
#[derive(Debug, Serialize, Deserialize)]
23
-
pub struct Identity {
24
-
did: String,
25
-
handle: Option<String>,
26
-
seq: u64,
27
-
time: String,
28
-
}
29
-
30
-
#[derive(Debug, Serialize, Deserialize)]
31
-
#[serde(rename_all = "lowercase")]
32
-
enum AccountStatus {
33
-
TakenDown,
34
-
Suspended,
35
-
Deleted,
36
-
Activated,
37
-
}
38
-
39
-
#[derive(Debug, Serialize, Deserialize)]
40
-
pub struct Account {
41
-
did: String,
42
-
handle: String,
43
-
seq: u64,
44
-
time: String,
45
-
status: AccountStatus,
46
-
}
47
-
48
-
#[derive(Debug, Serialize)]
49
-
#[serde(rename_all = "camelCase")]
50
-
pub struct Commit<T> {
51
-
pub rev: String,
52
-
pub operation: Operation,
53
-
pub collection: String,
54
-
pub rkey: String,
55
-
pub record: Option<T>,
56
-
pub cid: Option<String>,
57
-
}
58
-
59
-
#[derive(Debug, Serialize, Deserialize)]
60
-
#[serde(rename_all = "lowercase")]
61
-
pub enum Operation {
62
-
Create,
63
-
Update,
64
-
Delete,
65
-
}
66
-
67
-
/// Enforce that record is None only when operation is 'delete'
68
-
impl<'de, T> Deserialize<'de> for Commit<T>
69
-
where
70
-
T: Deserialize<'de>,
71
-
{
72
-
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
73
-
where
74
-
D: Deserializer<'de>,
75
-
{
76
-
// Helper struct to perform the deserialization.
77
-
#[derive(Deserialize)]
78
-
#[serde(rename_all = "camelCase")]
79
-
struct Helper<T> {
80
-
rev: String,
81
-
operation: Operation,
82
-
collection: String,
83
-
rkey: String,
84
-
record: Option<T>,
85
-
cid: Option<String>,
86
-
}
87
-
88
-
let helper = Helper::deserialize(deserializer)?;
89
-
90
-
match helper.operation {
91
-
Operation::Delete => {
92
-
if helper.record.is_some() || helper.cid.is_some() {
93
-
return Err(<D::Error as serde::de::Error>::custom(
94
-
"record and cid must be null when operation is delete",
95
-
));
96
-
}
97
-
}
98
-
_ => {
99
-
if helper.record.is_none() || helper.cid.is_none() {
100
-
return Err(<D::Error as serde::de::Error>::custom(
101
-
"record and cid must be present unless operation is delete",
102
-
));
103
-
}
104
-
}
105
-
}
106
-
107
-
Ok(Commit {
108
-
rev: helper.rev,
109
-
operation: helper.operation,
110
-
collection: helper.collection,
111
-
rkey: helper.rkey,
112
-
record: helper.record,
113
-
cid: helper.cid,
114
-
})
115
-
}
116
-
}
-1
services/rocketman/src/types/mod.rs
-1
services/rocketman/src/types/mod.rs
···
1
-
pub mod event;
services/rocketman/zstd/dictionary
services/rocketman/zstd/dictionary
This is a binary file and will not be displayed.
+1
-6
services/satellite/src/counts.rs
+1
-6
services/satellite/src/counts.rs
···
3
3
http::StatusCode,
4
4
Json,
5
5
};
6
-
use chrono::{DateTime, Utc};
7
6
use serde::{Deserialize, Serialize};
8
7
use sqlx::FromRow;
9
8
use uuid::Uuid;
···
43
42
pub limit: i64,
44
43
}
45
44
46
-
#[derive(FromRow, Debug, Deserialize, Serialize)]
45
+
#[derive(FromRow, Debug)]
47
46
pub struct Play {
48
47
pub did: String,
49
48
pub track_name: String,
···
51
50
pub release_name: Option<String>,
52
51
pub release_mbid: Option<Uuid>,
53
52
pub duration: Option<i32>,
54
-
pub played_time: Option<DateTime<Utc>>,
55
53
pub uri: Option<String>,
56
54
// MASSIVE HUGE HACK
57
55
pub artists: Option<String>,
···
65
63
pub release_name: Option<String>,
66
64
pub release_mbid: Option<Uuid>,
67
65
pub duration: Option<i32>,
68
-
pub played_time: Option<DateTime<Utc>>,
69
66
pub uri: Option<String>,
70
67
pub artists: Vec<Artist>,
71
68
}
···
92
89
-- TODO: replace with actual
93
90
STRING_AGG(pa.artist_name || '|' || TEXT(pa.artist_mbid), ',') AS artists,
94
91
p.release_name,
95
-
p.played_time,
96
92
p.duration,
97
93
p.uri,
98
94
p.recording_mbid,
···
138
134
release_name: play.release_name,
139
135
release_mbid: play.release_mbid,
140
136
duration: play.duration,
141
-
played_time: play.played_time,
142
137
uri: play.uri,
143
138
artists,
144
139
}
+31
services/types/Cargo.toml
+31
services/types/Cargo.toml
···
1
+
[package]
2
+
name = "types"
3
+
version = "0.1.0"
4
+
edition = "2021"
5
+
6
+
[dependencies]
7
+
atrium-api.workspace = true
8
+
atrium-xrpc = "0.12.1"
9
+
chrono = "0.4.39"
10
+
http = "1.2.0"
11
+
ipld-core = { version = "0.4.2", features = ["serde"] }
12
+
langtag = { version = "0.3", features = ["serde"] }
13
+
regex = "1.11.1"
14
+
serde = { workspace = true, features = ["derive"] }
15
+
serde_bytes = "0.11.15"
16
+
serde_ipld_dagcbor = "0.6.2"
17
+
serde_json.workspace = true
18
+
thiserror = "2.0.11"
19
+
20
+
# features
21
+
[features]
22
+
default = [
23
+
"namespace-fmteal",
24
+
"namespace-appbsky",
25
+
"namespace-toolsozone",
26
+
"namespace-chatbsky",
27
+
]
28
+
namespace-fmteal = []
29
+
namespace-appbsky = []
30
+
namespace-toolsozone = []
31
+
namespace-chatbsky = []
+10
services/types/readme.md
+10
services/types/readme.md
···
1
+
## Types
2
+
Rust lexicons for teal.fm and others.
3
+
4
+
### Generate lexicons
5
+
You will need to install [esquema-cli](https://github.com/fatfingers23/esquema) a fork of the [atrium codegen tool](https://github.com/sugyan/atrium).
6
+
7
+
Currently can install directly from the repo
8
+
`cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git`
9
+
10
+
Then can recreate with `esquema-cli generate local --lexdir ./lexicons --outdir ./src` from this directory
+55
target.sh
+55
target.sh
···
1
+
#!/bin/bash
2
+
set -e
3
+
4
+
# Debug: Print all available build variables
5
+
echo "DEBUG: TARGETPLATFORM=$TARGETPLATFORM"
6
+
echo "DEBUG: BUILDPLATFORM=$BUILDPLATFORM"
7
+
echo "DEBUG: TARGETARCH=$TARGETARCH"
8
+
echo "DEBUG: TARGETOS=$TARGETOS"
9
+
10
+
# Use TARGETARCH directly (more reliable than TARGETPLATFORM)
11
+
TARGET_ARCH_VAR="${TARGETARCH:-}"
12
+
13
+
# If TARGETARCH is not set, try to extract from TARGETPLATFORM
14
+
if [ -z "$TARGET_ARCH_VAR" ] && [ -n "$TARGETPLATFORM" ]; then
15
+
TARGET_ARCH_VAR=$(echo "$TARGETPLATFORM" | cut -d'/' -f2)
16
+
echo "DEBUG: Extracted TARGET_ARCH_VAR=$TARGET_ARCH_VAR from TARGETPLATFORM"
17
+
fi
18
+
19
+
# Final fallback: detect from uname
20
+
if [ -z "$TARGET_ARCH_VAR" ]; then
21
+
ARCH=$(uname -m)
22
+
case "$ARCH" in
23
+
"x86_64")
24
+
TARGET_ARCH_VAR="amd64"
25
+
;;
26
+
"aarch64")
27
+
TARGET_ARCH_VAR="arm64"
28
+
;;
29
+
*)
30
+
echo "ERROR: Could not detect target architecture. uname -m returned: $ARCH"
31
+
echo "Available variables: TARGETARCH=$TARGETARCH, TARGETPLATFORM=$TARGETPLATFORM"
32
+
exit 1
33
+
;;
34
+
esac
35
+
echo "DEBUG: Detected TARGET_ARCH_VAR=$TARGET_ARCH_VAR from uname"
36
+
fi
37
+
38
+
# Map architecture to Rust target
39
+
case "$TARGET_ARCH_VAR" in
40
+
"amd64")
41
+
export RUST_TARGET="x86_64-unknown-linux-gnu"
42
+
export TARGET_ARCH="amd64"
43
+
;;
44
+
"arm64")
45
+
export RUST_TARGET="aarch64-unknown-linux-gnu"
46
+
export TARGET_ARCH="arm64"
47
+
;;
48
+
*)
49
+
echo "ERROR: Unsupported target architecture: $TARGET_ARCH_VAR"
50
+
echo "Supported architectures: amd64, arm64"
51
+
exit 1
52
+
;;
53
+
esac
54
+
55
+
echo "SUCCESS: Using RUST_TARGET=$RUST_TARGET, TARGET_ARCH=$TARGET_ARCH"
+92
-62
tools/lexicon-cli/src/commands/generate.ts
+92
-62
tools/lexicon-cli/src/commands/generate.ts
···
1
-
import { execa } from 'execa';
2
-
import { existsSync } from 'fs';
3
-
import { join } from 'path';
4
-
import pc from 'picocolors';
5
-
import { findWorkspaceRoot } from '../utils/workspace.js';
1
+
import { existsSync } from "fs";
2
+
import { join } from "path";
3
+
import { execa } from "execa";
4
+
import pc from "picocolors";
5
+
6
+
import { findWorkspaceRoot } from "../utils/workspace.js";
6
7
7
8
interface GenerateOptions {
8
9
tsOnly?: boolean;
···
12
13
13
14
export async function generate(options: GenerateOptions = {}) {
14
15
const workspaceRoot = findWorkspaceRoot();
15
-
16
-
console.log(pc.blue('๐ง Generating lexicon types...'));
17
-
16
+
17
+
console.log(pc.blue("๐ง Generating lexicon types..."));
18
+
18
19
try {
19
20
if (!options.rustOnly) {
20
21
await generateTypeScript(workspaceRoot, options.force);
21
22
}
22
-
23
+
23
24
if (!options.tsOnly) {
24
25
await generateRust(workspaceRoot, options.force);
25
26
}
26
-
27
-
console.log(pc.green('โ
Lexicon generation complete!'));
27
+
28
+
console.log(pc.green("โ
Lexicon generation complete!"));
28
29
} catch (error) {
29
-
console.error(pc.red('โ Generation failed:'), error instanceof Error ? error.message : String(error));
30
+
console.error(
31
+
pc.red("โ Generation failed:"),
32
+
error instanceof Error ? error.message : String(error),
33
+
);
30
34
process.exit(1);
31
35
}
32
36
}
33
37
34
38
async function generateTypeScript(workspaceRoot: string, force?: boolean) {
35
-
const lexiconsPath = join(workspaceRoot, 'lexicons');
36
-
39
+
const lexiconsPath = join(workspaceRoot, "lexicons");
40
+
37
41
if (!existsSync(lexiconsPath)) {
38
-
throw new Error('Lexicons directory not found at workspace root');
42
+
throw new Error("Lexicons directory not found at workspace root");
39
43
}
40
-
44
+
41
45
// Check if packages/lexicons exists for TypeScript generation
42
-
const packagesLexiconsPath = join(workspaceRoot, 'packages/lexicons');
46
+
const packagesLexiconsPath = join(workspaceRoot, "packages/lexicons");
43
47
if (!existsSync(packagesLexiconsPath)) {
44
-
console.log(pc.yellow(' โ ๏ธ TypeScript lexicons package not found, skipping TypeScript generation'));
48
+
console.log(
49
+
pc.yellow(
50
+
" โ ๏ธ TypeScript lexicons package not found, skipping TypeScript generation",
51
+
),
52
+
);
45
53
return;
46
54
}
47
-
48
-
console.log(pc.cyan(' ๐ฆ Generating TypeScript types...'));
49
-
55
+
56
+
console.log(pc.cyan(" ๐ฆ Generating TypeScript types..."));
57
+
50
58
try {
51
-
await execa('pnpm', ['lex:gen-server'], {
59
+
await execa("pnpm", ["lex:gen-server"], {
52
60
cwd: packagesLexiconsPath,
53
-
stdio: 'inherit'
61
+
stdio: "inherit",
54
62
});
55
-
console.log(pc.green(' โ TypeScript types generated'));
63
+
console.log(pc.green(" โ TypeScript types generated"));
56
64
} catch (error) {
57
-
throw new Error(`TypeScript generation failed: ${error instanceof Error ? error.message : String(error)}`);
65
+
throw new Error(
66
+
`TypeScript generation failed: ${error instanceof Error ? error.message : String(error)}`,
67
+
);
58
68
}
59
69
}
60
70
61
71
async function generateRust(workspaceRoot: string, force?: boolean) {
62
-
const typesPath = join(workspaceRoot, 'services/types');
63
-
const lexiconsPath = join(workspaceRoot, 'lexicons');
64
-
72
+
const typesPath = join(workspaceRoot, "services/types");
73
+
const lexiconsPath = join(workspaceRoot, "lexicons");
74
+
65
75
if (!existsSync(typesPath)) {
66
-
throw new Error('Rust types service not found');
76
+
throw new Error("Rust types service not found");
67
77
}
68
-
78
+
69
79
if (!existsSync(lexiconsPath)) {
70
-
throw new Error('Lexicons directory not found at workspace root');
80
+
throw new Error("Lexicons directory not found at workspace root");
71
81
}
72
-
73
-
console.log(pc.cyan(' ๐ฆ Generating Rust types...'));
74
-
82
+
83
+
console.log(pc.cyan(" ๐ฆ Generating Rust types..."));
84
+
75
85
try {
76
86
// Check if esquema-cli is available
77
87
try {
78
-
await execa('esquema-cli', ['--version'], { stdio: 'pipe' });
88
+
await execa("esquema-cli", ["--version"], { stdio: "pipe" });
79
89
} catch {
80
-
console.log(pc.yellow(' โ ๏ธ esquema-cli not found. Installing...'));
90
+
console.log(pc.yellow(" โ ๏ธ esquema-cli not found. Installing..."));
81
91
try {
82
-
await execa('cargo', [
83
-
'install',
84
-
'esquema-cli',
85
-
'--git',
86
-
'https://github.com/fatfingers23/esquema.git'
87
-
], {
88
-
stdio: 'inherit'
89
-
});
90
-
console.log(pc.green(' โ esquema-cli installed successfully'));
92
+
await execa(
93
+
"cargo",
94
+
[
95
+
"install",
96
+
"esquema-cli",
97
+
"--git",
98
+
"https://github.com/fatfingers23/esquema.git",
99
+
],
100
+
{
101
+
stdio: "inherit",
102
+
},
103
+
);
104
+
console.log(pc.green(" โ esquema-cli installed successfully"));
91
105
} catch (installError) {
92
-
throw new Error('Failed to install esquema-cli. Please install manually: cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git');
106
+
throw new Error(
107
+
"Failed to install esquema-cli. Please install manually: cargo install esquema-cli --git https://github.com/fatfingers23/esquema.git",
108
+
);
93
109
}
94
110
}
95
-
96
-
await execa('esquema-cli', [
97
-
'generate',
98
-
'local',
99
-
'--lexdir',
100
-
lexiconsPath,
101
-
'--outdir',
102
-
join(typesPath, 'src')
103
-
], {
104
-
cwd: typesPath,
105
-
stdio: 'inherit'
106
-
});
107
-
108
-
console.log(pc.green(' โ Rust types generated'));
111
+
112
+
// create typespath/src if it doesn't exist
113
+
if (!existsSync(join(typesPath, "src"))) {
114
+
console.log(pc.yellow(" Creating src directory for Rust types..."));
115
+
await execa("mkdir", ["-p", join(typesPath, "src")], {
116
+
stdio: "inherit",
117
+
});
118
+
}
119
+
120
+
await execa(
121
+
"esquema-cli",
122
+
[
123
+
"generate",
124
+
"local",
125
+
"--lexdir",
126
+
lexiconsPath,
127
+
"--outdir",
128
+
join(typesPath, "src"),
129
+
],
130
+
{
131
+
cwd: typesPath,
132
+
stdio: "inherit",
133
+
},
134
+
);
135
+
136
+
console.log(pc.green(" โ Rust types generated"));
109
137
} catch (error) {
110
-
throw new Error(`Rust generation failed: ${error instanceof Error ? error.message : String(error)}`);
138
+
throw new Error(
139
+
`Rust generation failed: ${error instanceof Error ? error.message : String(error)}`,
140
+
);
111
141
}
112
-
}
142
+
}
+44
tools/teal-cli/Cargo.toml
+44
tools/teal-cli/Cargo.toml
···
1
+
[package]
2
+
name = "teal-cli"
3
+
version = "0.1.0"
4
+
edition = "2021"
5
+
description = "A simple management tool for teal.fm AT Protocol services"
6
+
7
+
[[bin]]
8
+
name = "teal"
9
+
path = "src/main.rs"
10
+
11
+
[dependencies]
12
+
# CLI framework
13
+
clap = { version = "4.0", features = ["derive"] }
14
+
anyhow = "1.0"
15
+
serde = { version = "1.0", features = ["derive"] }
16
+
serde_json = "1.0"
17
+
18
+
# Async runtime
19
+
tokio = { version = "1.0", features = [
20
+
"rt",
21
+
"macros",
22
+
"fs",
23
+
"rt-multi-thread",
24
+
] }
25
+
26
+
# Cryptography
27
+
k256 = { version = "0.13", features = ["ecdsa"] }
28
+
multibase = "0.9"
29
+
hex = "0.4"
30
+
rand = "0.8"
31
+
32
+
# File system and paths
33
+
dirs = "5.0"
34
+
35
+
# Utilities
36
+
chrono = { version = "0.4", features = ["serde"] }
37
+
colored = "2.0"
38
+
39
+
40
+
[features]
41
+
default = []
42
+
43
+
[dev-dependencies]
44
+
tempfile = "3.0"
+257
tools/teal-cli/README.md
+257
tools/teal-cli/README.md
···
1
+
# Teal CLI
2
+
3
+
A comprehensive management tool for Teal AT Protocol services, featuring cryptographic key management and CAR (Content Addressable aRchive) file exploration.
4
+
5
+
## Installation
6
+
7
+
From the project root:
8
+
9
+
```bash
10
+
cargo build --release --bin teal
11
+
```
12
+
13
+
The binary will be available at `target/release/teal`.
14
+
15
+
## Usage
16
+
17
+
### CAR File Explorer
18
+
19
+
Explore and analyze CAR files containing AT Protocol and Teal records.
20
+
21
+
#### Fetch CAR file from the internet
22
+
23
+
```bash
24
+
# Fetch from AT Protocol handle
25
+
teal car fetch --identity alice.bsky.social
26
+
27
+
# Fetch from DID
28
+
teal car fetch --identity did:plc:vdjlpwlhbnug4fnjodwr3vzh
29
+
30
+
# Fetch and save to specific file
31
+
teal car fetch --identity mmatt.net --output mmatt.car
32
+
33
+
# Fetch and immediately explore
34
+
teal car fetch --identity mmatt.net --explore
35
+
```
36
+
37
+
#### Explore a CAR file
38
+
39
+
```bash
40
+
# Basic exploration
41
+
teal car explore --file path/to/archive.car
42
+
43
+
# Verbose output with detailed information
44
+
teal car explore --file path/to/archive.car --verbose
45
+
```
46
+
47
+
#### Search for specific content
48
+
49
+
```bash
50
+
# Search for records containing "play"
51
+
teal car search --file path/to/archive.car --query "play"
52
+
53
+
# Search with verbose JSON output
54
+
teal car search --file path/to/archive.car --query "queen" --verbose
55
+
```
56
+
57
+
#### Export Teal records to JSON
58
+
59
+
```bash
60
+
# Export to default directory (./teal_exports)
61
+
teal car export --file path/to/archive.car
62
+
63
+
# Export to custom directory
64
+
teal car export --file path/to/archive.car --output ./my_exports
65
+
```
66
+
67
+
### Generate a new K256 key pair
68
+
69
+
```bash
70
+
# Generate with default settings (saves to ~/.teal/keys/)
71
+
teal gen-key
72
+
73
+
# Generate with custom name
74
+
teal gen-key --name production
75
+
76
+
# Generate with custom output directory
77
+
teal gen-key --output ./keys
78
+
79
+
# Overwrite existing keys
80
+
teal gen-key --force
81
+
82
+
# Output only the multibase (useful for scripts)
83
+
teal gen-key --format multibase
84
+
85
+
# Output as JSON
86
+
teal gen-key --format json
87
+
```
88
+
89
+
### Extract public key from existing private key
90
+
91
+
```bash
92
+
# Extract as multibase (default)
93
+
teal extract-pubkey --private-key ./keys/repo.key
94
+
95
+
# Extract as hex
96
+
teal extract-pubkey --private-key ./keys/repo.key --format hex
97
+
98
+
# Extract as JSON with both formats
99
+
teal extract-pubkey --private-key ./keys/repo.key --format json
100
+
```
101
+
102
+
### List available keys
103
+
104
+
```bash
105
+
# List keys in default directory
106
+
teal list
107
+
108
+
# List keys in custom directory
109
+
teal list --directory ./keys
110
+
```
111
+
112
+
### Rotate keys (backup old, generate new)
113
+
114
+
```bash
115
+
# Rotate the default 'repo' key
116
+
teal rotate --name repo
117
+
118
+
# Rotate with custom backup directory
119
+
teal rotate --name repo --backup-dir ./backups
120
+
```
121
+
122
+
## CAR File Analysis
123
+
124
+
The CAR explorer can analyze AT Protocol archives and identify:
125
+
126
+
- **Teal Records**: Music plays (`fm.teal.alpha.feed.play`), profiles (`fm.teal.alpha.actor.profile`), and status updates
127
+
- **AT Protocol Records**: BlueSky posts, likes, follows, and other social data
128
+
- **Commit Operations**: Repository changes and metadata
129
+
- **IPLD Structure**: Content addressing and linking
130
+
131
+
### Example Output
132
+
133
+
```
134
+
๐ CAR Analysis Results
135
+
==================================================
136
+
137
+
๐ File Overview:
138
+
File size: 10267026 bytes
139
+
Total blocks: 30195
140
+
Root CIDs: 1
141
+
142
+
๐ Record Types:
143
+
app.bsky.feed.like: 11034
144
+
app.bsky.feed.post: 7510
145
+
fm.teal.alpha.feed.play: 2605
146
+
fm.teal.alpha.actor.profile: 1
147
+
148
+
๐ต Teal Records Found:
149
+
fm.teal.alpha.feed.play: 2605
150
+
fm.teal.alpha.actor.profile: 1
151
+
152
+
๐ Sample Teal Records:
153
+
1. fm.teal.alpha.feed.play (bafyreigmu...)
154
+
๐ต Track: Bohemian Rhapsody
155
+
๐ค Artists: Queen
156
+
โฑ๏ธ Duration: 355000ms
157
+
```
158
+
159
+
### Exported JSON Structure
160
+
161
+
```json
162
+
[
163
+
{
164
+
"cid": "bafyreigmuwliezhxczoxgxq5hjtsdzaj3jl54kg...",
165
+
"data": {
166
+
"$type": "fm.teal.alpha.feed.play",
167
+
"track_name": "Bohemian Rhapsody",
168
+
"artist_names": ["Queen"],
169
+
"duration": 355000,
170
+
"played_time": "2024-01-15T14:30:00Z"
171
+
}
172
+
}
173
+
]
174
+
```
175
+
176
+
## Key Management
177
+
178
+
The tool generates K256 (secp256k1) keys compatible with AT Protocol:
179
+
180
+
- **Private Key**: 32-byte secp256k1 private key stored as binary
181
+
- **Public Key**: Base58-encoded multibase of the compressed public key
182
+
- **Default Location**: `~/.teal/keys/`
183
+
184
+
### File Structure
185
+
186
+
```
187
+
~/.teal/keys/
188
+
โโโ repo.key # Private key (32 bytes, binary)
189
+
โโโ repo.pub # Public key multibase (text)
190
+
โโโ production.key # Another private key
191
+
โโโ production.pub # Another public key multibase
192
+
```
193
+
194
+
## Integration
195
+
196
+
Replace the hardcoded multibase in your DID document:
197
+
198
+
```rust
199
+
// Before (hardcoded)
200
+
"publicKeyMultibase": "z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK"
201
+
202
+
// After (using generated key)
203
+
let pubkey = std::fs::read_to_string("~/.teal/keys/repo.pub")?;
204
+
// Use pubkey in your DID document
205
+
```
206
+
207
+
## Examples
208
+
209
+
### CAR File Analysis
210
+
211
+
```bash
212
+
# Fetch CAR file from a user's handle
213
+
teal car fetch --identity mmatt.net --output mmatt.car
214
+
215
+
# Fetch and immediately explore
216
+
teal car fetch --identity alice.bsky.social --explore
217
+
218
+
# Analyze a local CAR export
219
+
teal car explore --file nat.car
220
+
221
+
# Search for specific tracks
222
+
teal car search --file nat.car --query "bohemian rhapsody"
223
+
224
+
# Export all Teal records for data analysis
225
+
teal car export --file nat.car --output ./music_data
226
+
227
+
# View exported play records
228
+
cat ./music_data/fm_teal_alpha_feed_play.json | jq '.[0]'
229
+
```
230
+
231
+
### Quick setup
232
+
233
+
```bash
234
+
# Generate a key for development
235
+
teal gen-key --name dev
236
+
237
+
# Get the multibase for your DID document
238
+
teal extract-pubkey --private-key ~/.teal/keys/dev.key
239
+
```
240
+
241
+
### Production deployment
242
+
243
+
```bash
244
+
# Generate production keys in a secure location
245
+
teal gen-key --name production --output /secure/keys
246
+
247
+
# Extract multibase for configuration
248
+
PUBKEY=$(teal extract-pubkey --private-key /secure/keys/production.key)
249
+
echo "Public key: $PUBKEY"
250
+
```
251
+
252
+
## Security Notes
253
+
254
+
- Private keys are stored as raw 32-byte files with restrictive permissions (600 on Unix)
255
+
- Keys are generated using cryptographically secure random number generation
256
+
- Never commit private keys to version control
257
+
- Consider using secure key management systems in production
+104
tools/teal-cli/rkey_example.md
+104
tools/teal-cli/rkey_example.md
···
1
+
# How to Extract rkey from AT Protocol CAR Files
2
+
3
+
The **rkey** (record key) is not stored inside the IPLD record data itself. Instead, it's found in **commit operations** that map collection paths to record CIDs.
4
+
5
+
## AT Protocol Structure
6
+
7
+
```
8
+
Repository Structure:
9
+
โโโ Records (IPLD blocks)
10
+
โ โโโ bafyrei123... (actual play record data)
11
+
โ โโโ bafyrei456... (actual profile record data)
12
+
โ โโโ bafyrei789... (actual post record data)
13
+
โโโ Commits (IPLD blocks)
14
+
โโโ bafycommit1... (operations mapping paths to CIDs)
15
+
โโโ bafycommit2... (more operations)
16
+
```
17
+
18
+
## Example: Record IPLD (without rkey)
19
+
20
+
```json
21
+
{
22
+
"$type": "fm.teal.alpha.feed.play",
23
+
"track_name": "Bohemian Rhapsody",
24
+
"artist_names": ["Queen"],
25
+
"duration": 355000,
26
+
"played_time": "2024-01-15T14:30:00Z"
27
+
}
28
+
```
29
+
30
+
**โ No rkey here!** The record contains the data but not its key.
31
+
32
+
## Example: Commit IPLD (with rkey mappings)
33
+
34
+
```json
35
+
{
36
+
"ops": [
37
+
{
38
+
"action": "create",
39
+
"path": "fm.teal.alpha.feed.play/3k2akjdlkjsf", // โ collection/rkey
40
+
"cid": "bafyrei123..." // โ points to the record above
41
+
},
42
+
{
43
+
"action": "create",
44
+
"path": "fm.teal.alpha.actor.profile/self",
45
+
"cid": "bafyrei456..."
46
+
}
47
+
],
48
+
"prev": "bafyrei...",
49
+
"rev": "3k2bkl...",
50
+
"time": "2024-01-15T14:35:00Z"
51
+
}
52
+
```
53
+
54
+
**โ
rkey is here!** Extract it from the `path` field: `"3k2akjdlkjsf"`
55
+
56
+
## Extraction Algorithm
57
+
58
+
```rust
59
+
fn extract_rkeys_from_commits(commits: &[CommitInfo]) -> HashMap<String, String> {
60
+
let mut cid_to_rkey = HashMap::new();
61
+
62
+
for commit in commits {
63
+
for operation in &commit.operations {
64
+
// Path format: "collection/rkey"
65
+
if let Some(rkey) = operation.path.split('/').last() {
66
+
if let Some(ref record_cid) = operation.record_cid {
67
+
cid_to_rkey.insert(record_cid.clone(), rkey.to_string());
68
+
}
69
+
}
70
+
}
71
+
}
72
+
73
+
cid_to_rkey
74
+
}
75
+
```
76
+
77
+
## Complete Example
78
+
79
+
1. **Find commit blocks** in CAR file
80
+
2. **Extract operations** from commit IPLD
81
+
3. **Parse paths** like `"fm.teal.alpha.feed.play/3k2akjdlkjsf"`
82
+
4. **Map CID โ rkey**: `bafyrei123... โ 3k2akjdlkjsf`
83
+
5. **Use rkey** when processing records
84
+
85
+
## Why This Matters
86
+
87
+
The rkey is essential for:
88
+
- **AT URI construction**: `at://did:plc:user123/fm.teal.alpha.feed.play/3k2akjdlkjsf`
89
+
- **Record identity**: Uniquely identifies the record within the collection
90
+
- **Data integrity**: Maintains proper AT Protocol addressing
91
+
92
+
## CLI Usage
93
+
94
+
```bash
95
+
# Explore CAR file and show rkey extraction
96
+
teal car explore --file archive.car --verbose
97
+
98
+
# The verbose output will show:
99
+
# ๐ rkey Extraction Examples:
100
+
# 1. bafyrei123... โ rkey: 3k2akjdlkjsf
101
+
# 2. bafyrei456... โ rkey: self
102
+
```
103
+
104
+
**Note**: Some CAR files may not contain commit operations with rkey mappings, especially if they're partial exports or contain only raw records without repository structure.
+116
tools/teal-cli/src/commands/dev.rs
+116
tools/teal-cli/src/commands/dev.rs
···
1
+
use anyhow::Result;
2
+
use colored::*;
3
+
4
+
use crate::config::TealConfig;
5
+
use crate::DevCommands;
6
+
7
+
pub async fn run(cmd: DevCommands, config: &TealConfig) -> Result<()> {
8
+
match cmd {
9
+
DevCommands::Setup {
10
+
skip_docker,
11
+
skip_db,
12
+
} => setup_dev_environment(skip_docker, skip_db, config).await,
13
+
DevCommands::Clean { all } => clean_dev_artifacts(all).await,
14
+
DevCommands::Dev { port, watch } => run_dev_server(port, watch, config).await,
15
+
DevCommands::Seed { count, data_type } => generate_seed_data(count, data_type, config).await,
16
+
}
17
+
}
18
+
19
+
async fn setup_dev_environment(
20
+
skip_docker: bool,
21
+
skip_db: bool,
22
+
config: &TealConfig,
23
+
) -> Result<()> {
24
+
println!("{} Setting up development environment...", "๐ ๏ธ".blue());
25
+
println!();
26
+
27
+
if !skip_docker {
28
+
println!("{} Docker Setup:", "๐ณ".blue());
29
+
println!(" {} Checking Docker...", "โข".bold());
30
+
31
+
// TODO: Check if Docker is installed and running
32
+
println!(" {} Docker check not implemented", "โ ๏ธ".yellow());
33
+
println!(" {} Manually ensure Docker is running", "๐ก".blue());
34
+
println!();
35
+
}
36
+
37
+
if !skip_db {
38
+
println!("{} Database Setup:", "๐๏ธ".blue());
39
+
println!(" {} Database URL: {}", "โข".bold(), mask_db_url(&config.database.url));
40
+
41
+
// TODO: Run database initialization and migrations
42
+
println!(" {} Database setup not implemented", "โ ๏ธ".yellow());
43
+
println!(" {} Run: teal database init", "๐ก".blue());
44
+
println!(" {} Run: teal database migrate", "๐ก".blue());
45
+
println!();
46
+
}
47
+
48
+
println!("{} Keys Setup:", "๐".blue());
49
+
let key_path = config.get_key_path(&config.crypto.default_key_name);
50
+
if key_path.exists() {
51
+
println!(" {} Default key already exists", "โ
".green());
52
+
} else {
53
+
println!(" {} Generating default key...", "โข".bold());
54
+
// TODO: Auto-generate key
55
+
println!(" {} Run: teal crypto gen-key", "๐ก".blue());
56
+
}
57
+
println!();
58
+
59
+
println!("{} Development environment setup complete!", "โ
".green());
60
+
println!();
61
+
println!("{} Next steps:", "๐ก".yellow());
62
+
println!(" 1. teal crypto gen-key --name dev");
63
+
println!(" 2. teal database init");
64
+
println!(" 3. teal dev dev --watch");
65
+
66
+
Ok(())
67
+
}
68
+
69
+
async fn clean_dev_artifacts(all: bool) -> Result<()> {
70
+
println!("{} Cleaning development artifacts...", "๐งน".blue());
71
+
println!();
72
+
73
+
let mut cleaned_items = Vec::new();
74
+
75
+
// Clean logs
76
+
if let Ok(entries) = std::fs::read_dir("logs") {
77
+
let mut log_count = 0;
78
+
for entry in entries.flatten() {
79
+
if entry.path().extension().map_or(false, |ext| ext == "log") {
80
+
// TODO: Actually delete log files
81
+
log_count += 1;
82
+
}
83
+
}
84
+
if log_count > 0 {
85
+
cleaned_items.push(format!("{} log files", log_count));
86
+
}
87
+
}
88
+
89
+
// Clean temporary files
90
+
if let Ok(entries) = std::fs::read_dir(".") {
91
+
let mut temp_count = 0;
92
+
for entry in entries.flatten() {
93
+
let path = entry.path();
94
+
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
95
+
if name.starts_with("tmp_") || name.ends_with(".tmp") {
96
+
temp_count += 1;
97
+
}
98
+
}
99
+
}
100
+
if temp_count > 0 {
101
+
cleaned_items.push(format!("{} temporary files", temp_count));
102
+
}
103
+
}
104
+
105
+
if all {
106
+
// Clean build artifacts
107
+
cleaned_items.push("build artifacts".to_string());
108
+
println!(" {} Would clean: target/ directory", "โข".bold());
109
+
110
+
// Clean Docker artifacts
111
+
cleaned_items.push("Docker artifacts".to_string());
112
+
println!(" {} Would clean: Docker images and containers", "โข".bold());
113
+
}
114
+
115
+
if cleaned_items.is_empty() {
116
+
println!("{} No artifacts to clean", "โน๏ธ".blue
+349
tools/teal-cli/src/crypto.rs
+349
tools/teal-cli/src/crypto.rs
···
1
+
use anyhow::{Context, Result};
2
+
use colored::*;
3
+
use k256::ecdsa::{SigningKey, VerifyingKey};
4
+
use k256::SecretKey;
5
+
use multibase::Base;
6
+
use rand::rngs::OsRng;
7
+
use serde_json::json;
8
+
use std::path::PathBuf;
9
+
use tokio::fs;
10
+
11
+
/// Generate a new K256 private key
12
+
pub fn generate_private_key() -> SigningKey {
13
+
SigningKey::random(&mut OsRng)
14
+
}
15
+
16
+
/// Load a private key from a file
17
+
pub async fn load_private_key(path: &PathBuf) -> Result<SigningKey> {
18
+
let key_bytes = fs::read(path)
19
+
.await
20
+
.with_context(|| format!("Failed to read private key from {:?}", path))?;
21
+
22
+
if key_bytes.len() != 32 {
23
+
anyhow::bail!(
24
+
"Invalid private key length. Expected 32 bytes, got {}",
25
+
key_bytes.len()
26
+
);
27
+
}
28
+
29
+
let secret_key = SecretKey::from_slice(&key_bytes).context("Failed to parse private key")?;
30
+
31
+
Ok(SigningKey::from(secret_key))
32
+
}
33
+
34
+
/// Save a private key to a file
35
+
pub async fn save_private_key(key: &SigningKey, path: &PathBuf) -> Result<()> {
36
+
let key_bytes = key.as_nonzero_scalar().to_bytes();
37
+
38
+
// Create parent directory if it doesn't exist
39
+
if let Some(parent) = path.parent() {
40
+
fs::create_dir_all(parent)
41
+
.await
42
+
.with_context(|| format!("Failed to create key directory: {:?}", parent))?;
43
+
}
44
+
45
+
fs::write(path, key_bytes)
46
+
.await
47
+
.with_context(|| format!("Failed to write private key to {:?}", path))?;
48
+
49
+
// Set restrictive permissions on Unix systems
50
+
#[cfg(unix)]
51
+
{
52
+
use std::os::unix::fs::PermissionsExt;
53
+
let mut perms = fs::metadata(path).await?.permissions();
54
+
perms.set_mode(0o600); // rw-------
55
+
fs::set_permissions(path, perms).await?;
56
+
}
57
+
58
+
Ok(())
59
+
}
60
+
61
+
/// Convert a public key to AT Protocol compatible multibase format
62
+
pub fn public_key_to_multibase(public_key: &VerifyingKey) -> Result<String> {
63
+
// Get the compressed public key bytes (33 bytes)
64
+
let public_key_bytes = public_key.to_encoded_point(true).as_bytes().to_vec();
65
+
66
+
// Encode as multibase with base58btc (z prefix)
67
+
let multibase_string = multibase::encode(Base::Base58Btc, &public_key_bytes);
68
+
69
+
Ok(multibase_string)
70
+
}
71
+
72
+
/// Generate a new key pair and save to files
73
+
pub async fn generate_key(
74
+
name: String,
75
+
keys_dir: PathBuf,
76
+
force: bool,
77
+
format: String,
78
+
) -> Result<()> {
79
+
let private_key_path = keys_dir.join(format!("{}.key", name));
80
+
let public_key_path = keys_dir.join(format!("{}.pub", name));
81
+
82
+
// Check if files already exist
83
+
if !force && (private_key_path.exists() || public_key_path.exists()) {
84
+
anyhow::bail!(
85
+
"Key files already exist for '{}'. Use --force to overwrite.\n Private: {:?}\n Public: {:?}",
86
+
name,
87
+
private_key_path,
88
+
public_key_path
89
+
);
90
+
}
91
+
92
+
println!(
93
+
"{} Generating K256 key pair for '{}'...",
94
+
"๐".blue(),
95
+
name.bold()
96
+
);
97
+
98
+
// Generate new private key
99
+
let private_key = generate_private_key();
100
+
let public_key = private_key.verifying_key();
101
+
102
+
// Save private key
103
+
save_private_key(&private_key, &private_key_path)
104
+
.await
105
+
.with_context(|| format!("Failed to save private key to {:?}", private_key_path))?;
106
+
107
+
// Generate public key multibase
108
+
let public_key_multibase =
109
+
public_key_to_multibase(public_key).context("Failed to generate public key multibase")?;
110
+
111
+
// Output based on format
112
+
match format.as_str() {
113
+
"json" => {
114
+
let output = json!({
115
+
"keyName": name,
116
+
"privateKeyPath": private_key_path,
117
+
"publicKeyPath": public_key_path,
118
+
"publicKeyMultibase": public_key_multibase,
119
+
"publicKeyHex": hex::encode(public_key.to_encoded_point(false).as_bytes()),
120
+
});
121
+
println!("{}", serde_json::to_string_pretty(&output)?);
122
+
}
123
+
"multibase" => {
124
+
println!("{}", public_key_multibase);
125
+
}
126
+
_ => {
127
+
// includes "files"
128
+
// Save public key multibase to file
129
+
fs::write(&public_key_path, &public_key_multibase)
130
+
.await
131
+
.with_context(|| format!("Failed to write public key to {:?}", public_key_path))?;
132
+
133
+
println!("{} Key pair generated successfully!", "โ
".green());
134
+
println!(" {} {}", "Name:".bold(), name);
135
+
println!(" {} {:?}", "Private key:".bold(), private_key_path);
136
+
println!(" {} {:?}", "Public key:".bold(), public_key_path);
137
+
println!(
138
+
" {} {}",
139
+
"Multibase:".bold(),
140
+
public_key_multibase.bright_blue()
141
+
);
142
+
println!();
143
+
println!("{} Add this to your DID document:", "๐ก".yellow());
144
+
println!(" \"publicKeyMultibase\": \"{}\"", public_key_multibase);
145
+
}
146
+
}
147
+
148
+
Ok(())
149
+
}
150
+
151
+
/// Extract public key from private key file
152
+
pub async fn extract_pubkey(private_key_path: PathBuf, format: String) -> Result<()> {
153
+
println!(
154
+
"{} Extracting public key from {:?}...",
155
+
"๐".blue(),
156
+
private_key_path
157
+
);
158
+
159
+
let private_key = load_private_key(&private_key_path)
160
+
.await
161
+
.with_context(|| format!("Failed to load private key from {:?}", private_key_path))?;
162
+
163
+
let public_key = private_key.verifying_key();
164
+
165
+
match format.as_str() {
166
+
"multibase" => {
167
+
let multibase = public_key_to_multibase(public_key)?;
168
+
println!("{}", multibase);
169
+
}
170
+
"hex" => {
171
+
let hex = hex::encode(public_key.to_encoded_point(false).as_bytes());
172
+
println!("{}", hex);
173
+
}
174
+
"compressed-hex" => {
175
+
let hex = hex::encode(public_key.to_encoded_point(true).as_bytes());
176
+
println!("{}", hex);
177
+
}
178
+
"json" => {
179
+
let multibase = public_key_to_multibase(public_key)?;
180
+
let hex_uncompressed = hex::encode(public_key.to_encoded_point(false).as_bytes());
181
+
let hex_compressed = hex::encode(public_key.to_encoded_point(true).as_bytes());
182
+
183
+
let output = json!({
184
+
"publicKeyMultibase": multibase,
185
+
"publicKeyHex": hex_uncompressed,
186
+
"publicKeyHexCompressed": hex_compressed,
187
+
});
188
+
println!("{}", serde_json::to_string_pretty(&output)?);
189
+
}
190
+
_ => {
191
+
anyhow::bail!(
192
+
"Invalid format '{}'. Use: multibase, hex, compressed-hex, or json",
193
+
format
194
+
);
195
+
}
196
+
}
197
+
198
+
Ok(())
199
+
}
200
+
201
+
/// List available keys in directory
202
+
pub async fn list_keys(keys_dir: PathBuf) -> Result<()> {
203
+
if !keys_dir.exists() {
204
+
println!("{} No keys directory found at {:?}", "โน๏ธ".blue(), keys_dir);
205
+
println!("Run 'teal gen-key' to create your first key.");
206
+
return Ok(());
207
+
}
208
+
209
+
let mut keys = Vec::new();
210
+
let mut entries = fs::read_dir(&keys_dir).await?;
211
+
212
+
while let Some(entry) = entries.next_entry().await? {
213
+
let path = entry.path();
214
+
if let Some(extension) = path.extension() {
215
+
if extension == "key" {
216
+
if let Some(stem) = path.file_stem() {
217
+
if let Some(name) = stem.to_str() {
218
+
keys.push(name.to_string());
219
+
}
220
+
}
221
+
}
222
+
}
223
+
}
224
+
225
+
if keys.is_empty() {
226
+
println!("{} No keys found in {:?}", "โน๏ธ".blue(), keys_dir);
227
+
println!("Run 'teal gen-key' to create your first key.");
228
+
return Ok(());
229
+
}
230
+
231
+
keys.sort();
232
+
233
+
println!("{} Available keys in {:?}:", "๐".blue(), keys_dir);
234
+
println!();
235
+
236
+
let keys_count = keys.len();
237
+
238
+
for key_name in keys {
239
+
let private_path = keys_dir.join(format!("{}.key", key_name));
240
+
let public_path = keys_dir.join(format!("{}.pub", key_name));
241
+
242
+
let mut status_parts = Vec::new();
243
+
244
+
if private_path.exists() {
245
+
status_parts.push("private".green().to_string());
246
+
}
247
+
248
+
if public_path.exists() {
249
+
status_parts.push("public".cyan().to_string());
250
+
251
+
// Try to read and display the multibase
252
+
if let Ok(multibase) = fs::read_to_string(&public_path).await {
253
+
let multibase = multibase.trim();
254
+
println!(
255
+
" {} {} ({})",
256
+
"โข".bold(),
257
+
key_name.bold(),
258
+
status_parts.join(", ")
259
+
);
260
+
println!(" {}: {}", "Multibase".dimmed(), multibase.bright_blue());
261
+
} else {
262
+
println!(
263
+
" {} {} ({})",
264
+
"โข".bold(),
265
+
key_name.bold(),
266
+
status_parts.join(", ")
267
+
);
268
+
}
269
+
} else {
270
+
println!(
271
+
" {} {} ({})",
272
+
"โข".bold(),
273
+
key_name.bold(),
274
+
status_parts.join(", ")
275
+
);
276
+
}
277
+
278
+
// Show file modification times
279
+
if let Ok(metadata) = fs::metadata(&private_path).await {
280
+
if let Ok(modified) = metadata.modified() {
281
+
let datetime = chrono::DateTime::<chrono::Local>::from(modified);
282
+
println!(
283
+
" {}: {}",
284
+
"Created".dimmed(),
285
+
datetime.format("%Y-%m-%d %H:%M:%S").to_string().dimmed()
286
+
);
287
+
}
288
+
}
289
+
println!();
290
+
}
291
+
292
+
println!(
293
+
"{} Total: {} key(s)",
294
+
"๐".blue(),
295
+
keys_count.to_string().bold()
296
+
);
297
+
298
+
Ok(())
299
+
}
300
+
301
+
/// Rotate a key (backup old, generate new)
302
+
pub async fn rotate_key(
303
+
keys_dir: PathBuf,
304
+
name: String,
305
+
backup_dir: Option<PathBuf>,
306
+
) -> Result<()> {
307
+
let private_key_path = keys_dir.join(format!("{}.key", name));
308
+
309
+
if !private_key_path.exists() {
310
+
anyhow::bail!("Key '{}' does not exist in {:?}", name, keys_dir);
311
+
}
312
+
313
+
println!("{} Rotating key '{}'...", "๐".blue(), name.bold());
314
+
315
+
// Backup existing key
316
+
let backup_location = backup_dir.unwrap_or_else(|| keys_dir.join("backups"));
317
+
318
+
fs::create_dir_all(&backup_location).await?;
319
+
320
+
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
321
+
let backup_private = backup_location.join(format!("{}_{}.key", name, timestamp));
322
+
let backup_public = backup_location.join(format!("{}_{}.pub", name, timestamp));
323
+
324
+
fs::copy(&private_key_path, &backup_private).await?;
325
+
326
+
let public_key_path = keys_dir.join(format!("{}.pub", name));
327
+
if public_key_path.exists() {
328
+
fs::copy(&public_key_path, &backup_public).await?;
329
+
}
330
+
331
+
println!("Backed up existing key to: {:?}", backup_private);
332
+
333
+
// Generate new key
334
+
let new_key = generate_private_key();
335
+
save_private_key(&new_key, &private_key_path).await?;
336
+
337
+
// Save new public key multibase
338
+
let public_key = new_key.verifying_key();
339
+
let multibase = public_key_to_multibase(public_key)?;
340
+
fs::write(&public_key_path, &multibase).await?;
341
+
342
+
println!("{} Key rotation completed!", "โ
".green());
343
+
println!(" {} {}", "New multibase:".bold(), multibase.bright_blue());
344
+
println!();
345
+
println!("{} Update your DID document with:", "๐ก".yellow());
346
+
println!(" \"publicKeyMultibase\": \"{}\"", multibase);
347
+
348
+
Ok(())
349
+
}
+102
tools/teal-cli/src/main.rs
+102
tools/teal-cli/src/main.rs
···
1
+
use anyhow::Result;
2
+
use clap::{Parser, Subcommand};
3
+
4
+
use std::path::PathBuf;
5
+
6
+
mod crypto;
7
+
8
+
#[derive(Parser)]
9
+
#[command(name = "teal")]
10
+
#[command(about = "Teal management utilities")]
11
+
#[command(version = "0.1.0")]
12
+
struct Cli {
13
+
#[command(subcommand)]
14
+
command: Commands,
15
+
}
16
+
17
+
#[derive(Subcommand)]
18
+
enum Commands {
19
+
/// Generate a new K256 key pair
20
+
GenKey {
21
+
/// Key name/identifier
22
+
#[arg(short, long, default_value = "repo")]
23
+
name: String,
24
+
25
+
/// Output directory (defaults to ~/.teal/keys)
26
+
#[arg(short, long)]
27
+
output: Option<PathBuf>,
28
+
29
+
/// Overwrite existing keys
30
+
#[arg(short, long)]
31
+
force: bool,
32
+
33
+
/// Output format: json, multibase, or files
34
+
#[arg(long, default_value = "files")]
35
+
format: String,
36
+
},
37
+
38
+
/// Extract public key multibase from private key
39
+
ExtractPubkey {
40
+
/// Path to private key file
41
+
#[arg(short, long)]
42
+
private_key: PathBuf,
43
+
44
+
/// Output format
45
+
#[arg(short, long, default_value = "multibase")]
46
+
format: String,
47
+
},
48
+
49
+
/// List available keys
50
+
List {
51
+
/// Keys directory (defaults to ~/.teal/keys)
52
+
#[arg(short, long)]
53
+
directory: Option<PathBuf>,
54
+
},
55
+
56
+
/// Rotate keys (generate new, backup old)
57
+
Rotate {
58
+
/// Key name to rotate
59
+
#[arg(short, long)]
60
+
name: String,
61
+
62
+
/// Backup directory
63
+
#[arg(short, long)]
64
+
backup_dir: Option<PathBuf>,
65
+
},
66
+
}
67
+
68
+
fn get_default_keys_dir() -> PathBuf {
69
+
dirs::home_dir()
70
+
.unwrap_or_else(|| PathBuf::from("."))
71
+
.join(".teal")
72
+
.join("keys")
73
+
}
74
+
75
+
#[tokio::main]
76
+
async fn main() -> Result<()> {
77
+
let cli = Cli::parse();
78
+
79
+
match cli.command {
80
+
Commands::GenKey {
81
+
name,
82
+
output,
83
+
force,
84
+
format,
85
+
} => {
86
+
let keys_dir = output.unwrap_or_else(get_default_keys_dir);
87
+
crypto::generate_key(name, keys_dir, force, format).await
88
+
}
89
+
Commands::ExtractPubkey {
90
+
private_key,
91
+
format,
92
+
} => crypto::extract_pubkey(private_key, format).await,
93
+
Commands::List { directory } => {
94
+
let keys_dir = directory.unwrap_or_else(get_default_keys_dir);
95
+
crypto::list_keys(keys_dir).await
96
+
}
97
+
Commands::Rotate { name, backup_dir } => {
98
+
let keys_dir = get_default_keys_dir();
99
+
crypto::rotate_key(keys_dir, name, backup_dir).await
100
+
}
101
+
}
102
+
}
+13
-1
turbo.json
+13
-1
turbo.json
···
23
23
},
24
24
"lex:gen-server": {
25
25
"dependsOn": [],
26
-
"outputs": ["./src/types/**"]
26
+
"outputs": ["./src/**"]
27
27
},
28
28
"lex:gen": {
29
29
"dependsOn": [],
···
43
43
},
44
44
"db:migrate": {
45
45
"cache": false
46
+
},
47
+
"@teal/amethyst#build": {
48
+
"dependsOn": ["@teal/lexicons#lex:gen-server"],
49
+
"outputs": ["./build/**"]
50
+
},
51
+
"@teal/amethyst#build:web": {
52
+
"dependsOn": ["@teal/lexicons#lex:gen-server"],
53
+
"outputs": ["./build/**"]
54
+
},
55
+
"@teal/amethyst#build:ios": {
56
+
"dependsOn": ["@teal/lexicons#lex:gen-server"],
57
+
"outputs": ["./build/**"]
46
58
}
47
59
}
48
60
}