+501
Diff
round #1
+19
Cargo.lock
+19
Cargo.lock
···
6212
6212
"tranquil-config",
6213
6213
]
6214
6214
6215
+
[[package]]
6216
+
name = "tranquil-lexicon"
6217
+
version = "0.3.1"
6218
+
dependencies = [
6219
+
"chrono",
6220
+
"hickory-resolver",
6221
+
"parking_lot",
6222
+
"reqwest",
6223
+
"serde",
6224
+
"serde_json",
6225
+
"thiserror 2.0.17",
6226
+
"tokio",
6227
+
"tracing",
6228
+
"unicode-segmentation",
6229
+
"urlencoding",
6230
+
"wiremock",
6231
+
]
6232
+
6215
6233
[[package]]
6216
6234
name = "tranquil-oauth"
6217
6235
version = "0.3.1"
···
6309
6327
"tranquil-crypto",
6310
6328
"tranquil-db",
6311
6329
"tranquil-db-traits",
6330
+
"tranquil-lexicon",
6312
6331
"tranquil-oauth",
6313
6332
"tranquil-repo",
6314
6333
"tranquil-ripple",
+4
Cargo.toml
+4
Cargo.toml
···
16
16
"crates/tranquil-db-traits",
17
17
"crates/tranquil-db",
18
18
"crates/tranquil-pds",
19
+
"crates/tranquil-lexicon",
19
20
]
20
21
21
22
[workspace.package]
···
38
39
tranquil-db-traits = { path = "crates/tranquil-db-traits" }
39
40
tranquil-db = { path = "crates/tranquil-db" }
40
41
tranquil-ripple = { path = "crates/tranquil-ripple" }
42
+
tranquil-lexicon = { path = "crates/tranquil-lexicon" }
43
+
44
+
unicode-segmentation = "1"
41
45
42
46
aes-gcm = "0.10"
43
47
backon = "1"
+26
crates/tranquil-lexicon/Cargo.toml
+26
crates/tranquil-lexicon/Cargo.toml
···
1
+
[package]
2
+
name = "tranquil-lexicon"
3
+
version.workspace = true
4
+
edition.workspace = true
5
+
license.workspace = true
6
+
7
+
[features]
8
+
default = []
9
+
resolve = ["dep:reqwest", "dep:hickory-resolver", "dep:tokio", "dep:parking_lot", "dep:tracing", "dep:urlencoding"]
10
+
11
+
[dependencies]
12
+
serde = { workspace = true }
13
+
serde_json = { workspace = true }
14
+
thiserror = { workspace = true }
15
+
unicode-segmentation = { workspace = true }
16
+
chrono = { workspace = true }
17
+
reqwest = { workspace = true, optional = true }
18
+
hickory-resolver = { workspace = true, optional = true }
19
+
tokio = { workspace = true, optional = true }
20
+
parking_lot = { workspace = true, optional = true }
21
+
tracing = { workspace = true, optional = true }
22
+
urlencoding = { workspace = true, optional = true }
23
+
24
+
[dev-dependencies]
25
+
wiremock = { workspace = true }
26
+
tokio = { workspace = true }
+217
crates/tranquil-lexicon/src/formats.rs
+217
crates/tranquil-lexicon/src/formats.rs
···
1
+
pub fn is_valid_did(s: &str) -> bool {
2
+
s.strip_prefix("did:")
3
+
.and_then(|rest| rest.split_once(':'))
4
+
.is_some_and(|(method, id)| {
5
+
!method.is_empty()
6
+
&& method
7
+
.chars()
8
+
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
9
+
&& !id.is_empty()
10
+
})
11
+
}
12
+
13
+
pub fn is_valid_handle(s: &str) -> bool {
14
+
!s.is_empty()
15
+
&& s.len() <= 253
16
+
&& s.contains('.')
17
+
&& s.split('.').all(|seg| {
18
+
!seg.is_empty()
19
+
&& seg.len() <= 63
20
+
&& seg.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
21
+
&& !seg.starts_with('-')
22
+
&& !seg.ends_with('-')
23
+
})
24
+
}
25
+
26
+
pub fn is_valid_at_uri(s: &str) -> bool {
27
+
s.strip_prefix("at://").is_some_and(|rest| {
28
+
let authority = rest.split('/').next().unwrap_or("");
29
+
is_valid_did(authority) || is_valid_handle(authority)
30
+
})
31
+
}
32
+
33
+
pub fn is_valid_datetime(s: &str) -> bool {
34
+
chrono::DateTime::parse_from_rfc3339(s).is_ok()
35
+
}
36
+
37
+
pub fn is_valid_uri(s: &str) -> bool {
38
+
s.split_once("://").is_some_and(|(scheme, rest)| {
39
+
!scheme.is_empty()
40
+
&& scheme
41
+
.chars()
42
+
.all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '.' || c == '-')
43
+
&& scheme.starts_with(|c: char| c.is_ascii_alphabetic())
44
+
&& !rest.is_empty()
45
+
})
46
+
}
47
+
48
+
pub fn is_valid_cid(s: &str) -> bool {
49
+
s.len() >= 8
50
+
&& s.chars().all(|c| c.is_ascii_alphanumeric())
51
+
&& s.starts_with(|c: char| c == 'b' || c == 'z' || c == 'Q')
52
+
}
53
+
54
+
pub fn is_valid_language(s: &str) -> bool {
55
+
!s.is_empty() && s.len() <= 64 && s.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
56
+
}
57
+
58
+
pub fn is_valid_tid(s: &str) -> bool {
59
+
s.len() == 13
60
+
&& s.chars()
61
+
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
62
+
}
63
+
64
+
pub fn is_valid_record_key(s: &str) -> bool {
65
+
!s.is_empty()
66
+
&& s.len() <= 512
67
+
&& s != "."
68
+
&& s != ".."
69
+
&& s.chars().all(|c| {
70
+
c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~' || c == ':'
71
+
})
72
+
}
73
+
74
+
pub fn is_valid_at_identifier(s: &str) -> bool {
75
+
is_valid_did(s) || is_valid_handle(s)
76
+
}
77
+
78
+
pub fn is_valid_nsid(s: &str) -> bool {
79
+
!s.is_empty()
80
+
&& s.split('.').count() >= 3
81
+
&& s.split('.').all(|seg| {
82
+
!seg.is_empty() && seg.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
83
+
})
84
+
}
85
+
86
+
use crate::schema::StringFormat;
87
+
88
+
pub fn validate_format(format: &StringFormat, value: &str) -> bool {
89
+
match format {
90
+
StringFormat::Did => is_valid_did(value),
91
+
StringFormat::Handle => is_valid_handle(value),
92
+
StringFormat::AtUri => is_valid_at_uri(value),
93
+
StringFormat::Datetime => is_valid_datetime(value),
94
+
StringFormat::Uri => is_valid_uri(value),
95
+
StringFormat::Cid => is_valid_cid(value),
96
+
StringFormat::Language => is_valid_language(value),
97
+
StringFormat::Tid => is_valid_tid(value),
98
+
StringFormat::RecordKey => is_valid_record_key(value),
99
+
StringFormat::AtIdentifier => is_valid_at_identifier(value),
100
+
StringFormat::Nsid => is_valid_nsid(value),
101
+
}
102
+
}
103
+
104
+
#[cfg(test)]
105
+
mod tests {
106
+
use super::*;
107
+
108
+
#[test]
109
+
fn test_valid_dids() {
110
+
assert!(is_valid_did("did:plc:1234567890abcdefghijk"));
111
+
assert!(is_valid_did("did:web:example.com"));
112
+
assert!(!is_valid_did(""));
113
+
assert!(!is_valid_did("plc:123"));
114
+
assert!(!is_valid_did("did:"));
115
+
assert!(!is_valid_did("did:plc:"));
116
+
}
117
+
118
+
#[test]
119
+
fn test_valid_handles() {
120
+
assert!(is_valid_handle("user.bsky.social"));
121
+
assert!(is_valid_handle("example.com"));
122
+
assert!(!is_valid_handle("noperiod"));
123
+
assert!(!is_valid_handle(""));
124
+
}
125
+
126
+
#[test]
127
+
fn test_valid_at_uris() {
128
+
assert!(is_valid_at_uri("at://did:plc:abc/app.bsky.feed.post/123"));
129
+
assert!(is_valid_at_uri(
130
+
"at://user.bsky.social/app.bsky.feed.post/123"
131
+
));
132
+
assert!(!is_valid_at_uri("https://example.com"));
133
+
assert!(!is_valid_at_uri("at://"));
134
+
assert!(!is_valid_at_uri("at://not valid"));
135
+
}
136
+
137
+
#[test]
138
+
fn test_valid_datetimes() {
139
+
assert!(is_valid_datetime("2024-01-01T00:00:00.000Z"));
140
+
assert!(is_valid_datetime("2024-01-01T00:00:00Z"));
141
+
assert!(!is_valid_datetime("not-a-date"));
142
+
assert!(!is_valid_datetime("2024-13-01T00:00:00Z"));
143
+
}
144
+
145
+
#[test]
146
+
fn test_valid_uris() {
147
+
assert!(is_valid_uri("https://example.com"));
148
+
assert!(is_valid_uri("http://localhost"));
149
+
assert!(is_valid_uri("ftp://files.example.com/path"));
150
+
assert!(!is_valid_uri("://x"));
151
+
assert!(!is_valid_uri("not a uri"));
152
+
assert!(!is_valid_uri("123://bad"));
153
+
assert!(!is_valid_uri("https://"));
154
+
}
155
+
156
+
#[test]
157
+
fn test_valid_cids() {
158
+
assert!(is_valid_cid("bafyreiabcdef123456"));
159
+
assert!(is_valid_cid(
160
+
"QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG"
161
+
));
162
+
assert!(is_valid_cid("zQmSomeMultibase"));
163
+
assert!(!is_valid_cid("abc"));
164
+
assert!(!is_valid_cid(""));
165
+
assert!(!is_valid_cid("xyzinvalidprefix1234"));
166
+
}
167
+
168
+
#[test]
169
+
fn test_valid_tids() {
170
+
assert!(is_valid_tid("3k2n5j2abcdef"));
171
+
assert!(!is_valid_tid("short"));
172
+
assert!(!is_valid_tid("3K2N5J2ABCDEF"));
173
+
}
174
+
175
+
#[test]
176
+
fn test_valid_record_keys() {
177
+
assert!(is_valid_record_key("valid-key_123"));
178
+
assert!(is_valid_record_key("self"));
179
+
assert!(!is_valid_record_key(""));
180
+
assert!(!is_valid_record_key("."));
181
+
assert!(!is_valid_record_key(".."));
182
+
}
183
+
184
+
#[test]
185
+
fn test_valid_nsids() {
186
+
assert!(is_valid_nsid("app.bsky.feed.post"));
187
+
assert!(is_valid_nsid("com.atproto.repo.strongRef"));
188
+
assert!(!is_valid_nsid("too.short"));
189
+
assert!(!is_valid_nsid(""));
190
+
}
191
+
192
+
#[test]
193
+
fn test_did_method_with_digits() {
194
+
assert!(is_valid_did(
195
+
"did:key:z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK"
196
+
));
197
+
assert!(is_valid_did("did:3:abc123"));
198
+
assert!(is_valid_did("did:a1b2:test"));
199
+
assert!(!is_valid_did("did:UPPER:test"));
200
+
assert!(!is_valid_did("did::test"));
201
+
}
202
+
203
+
#[test]
204
+
fn test_record_key_with_colon() {
205
+
assert!(is_valid_record_key("self"));
206
+
assert!(is_valid_record_key("key:with:colons"));
207
+
assert!(is_valid_record_key("at:something"));
208
+
}
209
+
210
+
#[test]
211
+
fn test_valid_languages() {
212
+
assert!(is_valid_language("en"));
213
+
assert!(is_valid_language("en-US"));
214
+
assert!(is_valid_language("pt-BR"));
215
+
assert!(!is_valid_language(""));
216
+
}
217
+
}
+27
crates/tranquil-lexicon/src/lib.rs
+27
crates/tranquil-lexicon/src/lib.rs
···
1
+
mod formats;
2
+
mod registry;
3
+
mod schema;
4
+
mod validate;
5
+
6
+
#[cfg(feature = "resolve")]
7
+
mod dynamic;
8
+
#[cfg(feature = "resolve")]
9
+
mod resolve;
10
+
11
+
#[cfg(test)]
12
+
mod test_schemas;
13
+
14
+
pub use formats::{
15
+
is_valid_at_identifier, is_valid_at_uri, is_valid_cid, is_valid_datetime, is_valid_did,
16
+
is_valid_handle, is_valid_language, is_valid_nsid, is_valid_record_key, is_valid_tid,
17
+
is_valid_uri,
18
+
};
19
+
pub use registry::LexiconRegistry;
20
+
pub use schema::{LexiconDoc, ParsedRef, parse_ref};
21
+
pub use validate::{LexValidationError, validate_record};
22
+
23
+
#[cfg(feature = "resolve")]
24
+
pub use resolve::{
25
+
ResolveError, fetch_schema_from_pds, resolve_did_from_dns, resolve_lexicon,
26
+
resolve_lexicon_from_did, resolve_lexicon_with_config, resolve_pds_endpoint,
27
+
};
+208
crates/tranquil-lexicon/src/schema.rs
+208
crates/tranquil-lexicon/src/schema.rs
···
1
+
use serde::Deserialize;
2
+
use std::collections::HashMap;
3
+
4
+
#[derive(Debug, Deserialize)]
5
+
pub struct LexiconDoc {
6
+
pub lexicon: u32,
7
+
pub id: String,
8
+
#[serde(default)]
9
+
pub defs: HashMap<String, LexDef>,
10
+
}
11
+
12
+
#[derive(Debug, Deserialize)]
13
+
#[serde(tag = "type")]
14
+
pub enum LexDef {
15
+
#[serde(rename = "record")]
16
+
Record(LexRecord),
17
+
#[serde(rename = "object")]
18
+
Object(LexObject),
19
+
#[serde(rename = "token")]
20
+
Token {},
21
+
#[serde(rename = "string")]
22
+
StringDef(LexStringDef),
23
+
#[serde(rename = "query")]
24
+
Query {},
25
+
#[serde(rename = "procedure")]
26
+
Procedure {},
27
+
#[serde(rename = "subscription")]
28
+
Subscription {},
29
+
#[serde(rename = "params")]
30
+
Params {},
31
+
#[serde(rename = "permission")]
32
+
Permission {},
33
+
#[serde(rename = "permission-set")]
34
+
PermissionSet {},
35
+
}
36
+
37
+
#[derive(Debug, Deserialize)]
38
+
pub struct LexRecord {
39
+
#[serde(default)]
40
+
pub key: Option<String>,
41
+
pub record: LexObject,
42
+
}
43
+
44
+
#[derive(Debug, Deserialize)]
45
+
pub struct LexObject {
46
+
#[serde(default)]
47
+
pub required: Vec<String>,
48
+
#[serde(default)]
49
+
pub nullable: Vec<String>,
50
+
#[serde(default)]
51
+
pub properties: HashMap<String, LexProperty>,
52
+
}
53
+
54
+
#[derive(Debug, Deserialize)]
55
+
#[serde(tag = "type")]
56
+
pub enum LexProperty {
57
+
#[serde(rename = "string")]
58
+
String(LexString),
59
+
#[serde(rename = "integer")]
60
+
Integer(LexInteger),
61
+
#[serde(rename = "boolean")]
62
+
Boolean {},
63
+
#[serde(rename = "bytes")]
64
+
Bytes(LexBytes),
65
+
#[serde(rename = "cid-link")]
66
+
CidLink {},
67
+
#[serde(rename = "blob")]
68
+
Blob(LexBlob),
69
+
#[serde(rename = "unknown")]
70
+
Unknown {},
71
+
#[serde(rename = "ref")]
72
+
Ref(LexRef),
73
+
#[serde(rename = "union")]
74
+
Union(LexUnion),
75
+
#[serde(rename = "array")]
76
+
Array(LexArray),
77
+
#[serde(rename = "object")]
78
+
Object(LexObject),
79
+
}
80
+
81
+
#[derive(Debug, Deserialize)]
82
+
#[serde(rename_all = "camelCase")]
83
+
pub struct LexString {
84
+
#[serde(default)]
85
+
pub max_length: Option<u64>,
86
+
#[serde(default)]
87
+
pub min_length: Option<u64>,
88
+
#[serde(default)]
89
+
pub max_graphemes: Option<u64>,
90
+
#[serde(default)]
91
+
pub min_graphemes: Option<u64>,
92
+
#[serde(default)]
93
+
pub format: Option<StringFormat>,
94
+
#[serde(default)]
95
+
pub known_values: Option<Vec<String>>,
96
+
#[serde(rename = "enum", default)]
97
+
pub enum_values: Option<Vec<String>>,
98
+
#[serde(rename = "const", default)]
99
+
pub const_value: Option<String>,
100
+
#[serde(default)]
101
+
pub default: Option<String>,
102
+
}
103
+
104
+
#[derive(Debug, Deserialize)]
105
+
pub struct LexInteger {
106
+
#[serde(default)]
107
+
pub minimum: Option<i64>,
108
+
#[serde(default)]
109
+
pub maximum: Option<i64>,
110
+
#[serde(default)]
111
+
pub default: Option<i64>,
112
+
#[serde(rename = "enum", default)]
113
+
pub enum_values: Option<Vec<i64>>,
114
+
#[serde(rename = "const", default)]
115
+
pub const_value: Option<i64>,
116
+
}
117
+
118
+
#[derive(Debug, Deserialize)]
119
+
#[serde(rename_all = "camelCase")]
120
+
pub struct LexBytes {
121
+
#[serde(default)]
122
+
pub max_length: Option<u64>,
123
+
#[serde(default)]
124
+
pub min_length: Option<u64>,
125
+
}
126
+
127
+
#[derive(Debug, Deserialize)]
128
+
#[serde(rename_all = "camelCase")]
129
+
pub struct LexBlob {
130
+
#[serde(default)]
131
+
pub accept: Option<Vec<String>>,
132
+
#[serde(default)]
133
+
pub max_size: Option<u64>,
134
+
}
135
+
136
+
#[derive(Debug, Deserialize)]
137
+
#[serde(rename_all = "camelCase")]
138
+
pub struct LexArray {
139
+
pub items: Box<LexProperty>,
140
+
#[serde(default)]
141
+
pub min_length: Option<u64>,
142
+
#[serde(default)]
143
+
pub max_length: Option<u64>,
144
+
}
145
+
146
+
#[derive(Debug, Deserialize)]
147
+
pub struct LexUnion {
148
+
#[serde(default)]
149
+
pub refs: Vec<String>,
150
+
#[serde(default)]
151
+
pub closed: bool,
152
+
}
153
+
154
+
#[derive(Debug, Deserialize)]
155
+
#[serde(rename_all = "camelCase")]
156
+
pub struct LexRef {
157
+
#[serde(rename = "ref")]
158
+
pub reference: String,
159
+
}
160
+
161
+
#[derive(Debug, Clone, Deserialize)]
162
+
pub enum StringFormat {
163
+
#[serde(rename = "did")]
164
+
Did,
165
+
#[serde(rename = "handle")]
166
+
Handle,
167
+
#[serde(rename = "at-uri")]
168
+
AtUri,
169
+
#[serde(rename = "datetime")]
170
+
Datetime,
171
+
#[serde(rename = "uri")]
172
+
Uri,
173
+
#[serde(rename = "cid")]
174
+
Cid,
175
+
#[serde(rename = "language")]
176
+
Language,
177
+
#[serde(rename = "tid")]
178
+
Tid,
179
+
#[serde(rename = "record-key")]
180
+
RecordKey,
181
+
#[serde(rename = "at-identifier")]
182
+
AtIdentifier,
183
+
#[serde(rename = "nsid")]
184
+
Nsid,
185
+
}
186
+
187
+
pub enum ParsedRef<'a> {
188
+
Local(&'a str),
189
+
Qualified { nsid: &'a str, fragment: &'a str },
190
+
Bare(&'a str),
191
+
}
192
+
193
+
pub fn parse_ref(reference: &str) -> ParsedRef<'_> {
194
+
match reference.strip_prefix('#') {
195
+
Some(local) => ParsedRef::Local(local),
196
+
None => {
197
+
let stripped = reference.strip_prefix("lex:").unwrap_or(reference);
198
+
match stripped.split_once('#') {
199
+
Some((nsid, fragment)) => ParsedRef::Qualified { nsid, fragment },
200
+
None => ParsedRef::Bare(stripped),
201
+
}
202
+
}
203
+
}
204
+
}
205
+
206
+
#[derive(Debug, Deserialize)]
207
+
#[serde(rename_all = "camelCase")]
208
+
pub struct LexStringDef {}
History
2 rounds
0 comments
oyster.cafe
submitted
#1
1 commit
expand
collapse
feat(lexicon): add crate with schema types and format validators
expand 0 comments
pull request successfully merged
oyster.cafe
submitted
#0
1 commit
expand
collapse
feat(lexicon): add crate with schema types and format validators