Our Personal Data Server from scratch! tranquil.farm
atproto pds rust postgresql fun oauth

feat(lexicon): add crate with schema types and format validators #44

merged opened by oyster.cafe targeting main from feat/real-lex-schema-validation
Labels

None yet.

assignee

None yet.

Participants 1
AT URI
at://did:plc:3fwecdnvtcscjnrx2p4n7alz/sh.tangled.repo.pull/3mgvbqsbi2v22
+501
Diff #1
+19
Cargo.lock
··· 6212 6212 "tranquil-config", 6213 6213 ] 6214 6214 6215 + [[package]] 6216 + name = "tranquil-lexicon" 6217 + version = "0.3.1" 6218 + dependencies = [ 6219 + "chrono", 6220 + "hickory-resolver", 6221 + "parking_lot", 6222 + "reqwest", 6223 + "serde", 6224 + "serde_json", 6225 + "thiserror 2.0.17", 6226 + "tokio", 6227 + "tracing", 6228 + "unicode-segmentation", 6229 + "urlencoding", 6230 + "wiremock", 6231 + ] 6232 + 6215 6233 [[package]] 6216 6234 name = "tranquil-oauth" 6217 6235 version = "0.3.1" ··· 6309 6327 "tranquil-crypto", 6310 6328 "tranquil-db", 6311 6329 "tranquil-db-traits", 6330 + "tranquil-lexicon", 6312 6331 "tranquil-oauth", 6313 6332 "tranquil-repo", 6314 6333 "tranquil-ripple",
+4
Cargo.toml
··· 16 16 "crates/tranquil-db-traits", 17 17 "crates/tranquil-db", 18 18 "crates/tranquil-pds", 19 + "crates/tranquil-lexicon", 19 20 ] 20 21 21 22 [workspace.package] ··· 38 39 tranquil-db-traits = { path = "crates/tranquil-db-traits" } 39 40 tranquil-db = { path = "crates/tranquil-db" } 40 41 tranquil-ripple = { path = "crates/tranquil-ripple" } 42 + tranquil-lexicon = { path = "crates/tranquil-lexicon" } 43 + 44 + unicode-segmentation = "1" 41 45 42 46 aes-gcm = "0.10" 43 47 backon = "1"
+26
crates/tranquil-lexicon/Cargo.toml
··· 1 + [package] 2 + name = "tranquil-lexicon" 3 + version.workspace = true 4 + edition.workspace = true 5 + license.workspace = true 6 + 7 + [features] 8 + default = [] 9 + resolve = ["dep:reqwest", "dep:hickory-resolver", "dep:tokio", "dep:parking_lot", "dep:tracing", "dep:urlencoding"] 10 + 11 + [dependencies] 12 + serde = { workspace = true } 13 + serde_json = { workspace = true } 14 + thiserror = { workspace = true } 15 + unicode-segmentation = { workspace = true } 16 + chrono = { workspace = true } 17 + reqwest = { workspace = true, optional = true } 18 + hickory-resolver = { workspace = true, optional = true } 19 + tokio = { workspace = true, optional = true } 20 + parking_lot = { workspace = true, optional = true } 21 + tracing = { workspace = true, optional = true } 22 + urlencoding = { workspace = true, optional = true } 23 + 24 + [dev-dependencies] 25 + wiremock = { workspace = true } 26 + tokio = { workspace = true }
+217
crates/tranquil-lexicon/src/formats.rs
··· 1 + pub fn is_valid_did(s: &str) -> bool { 2 + s.strip_prefix("did:") 3 + .and_then(|rest| rest.split_once(':')) 4 + .is_some_and(|(method, id)| { 5 + !method.is_empty() 6 + && method 7 + .chars() 8 + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit()) 9 + && !id.is_empty() 10 + }) 11 + } 12 + 13 + pub fn is_valid_handle(s: &str) -> bool { 14 + !s.is_empty() 15 + && s.len() <= 253 16 + && s.contains('.') 17 + && s.split('.').all(|seg| { 18 + !seg.is_empty() 19 + && seg.len() <= 63 20 + && seg.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') 21 + && !seg.starts_with('-') 22 + && !seg.ends_with('-') 23 + }) 24 + } 25 + 26 + pub fn is_valid_at_uri(s: &str) -> bool { 27 + s.strip_prefix("at://").is_some_and(|rest| { 28 + let authority = rest.split('/').next().unwrap_or(""); 29 + is_valid_did(authority) || is_valid_handle(authority) 30 + }) 31 + } 32 + 33 + pub fn is_valid_datetime(s: &str) -> bool { 34 + chrono::DateTime::parse_from_rfc3339(s).is_ok() 35 + } 36 + 37 + pub fn is_valid_uri(s: &str) -> bool { 38 + s.split_once("://").is_some_and(|(scheme, rest)| { 39 + !scheme.is_empty() 40 + && scheme 41 + .chars() 42 + .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '.' || c == '-') 43 + && scheme.starts_with(|c: char| c.is_ascii_alphabetic()) 44 + && !rest.is_empty() 45 + }) 46 + } 47 + 48 + pub fn is_valid_cid(s: &str) -> bool { 49 + s.len() >= 8 50 + && s.chars().all(|c| c.is_ascii_alphanumeric()) 51 + && s.starts_with(|c: char| c == 'b' || c == 'z' || c == 'Q') 52 + } 53 + 54 + pub fn is_valid_language(s: &str) -> bool { 55 + !s.is_empty() && s.len() <= 64 && s.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') 56 + } 57 + 58 + pub fn is_valid_tid(s: &str) -> bool { 59 + s.len() == 13 60 + && s.chars() 61 + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit()) 62 + } 63 + 64 + pub fn is_valid_record_key(s: &str) -> bool { 65 + !s.is_empty() 66 + && s.len() <= 512 67 + && s != "." 68 + && s != ".." 69 + && s.chars().all(|c| { 70 + c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '~' || c == ':' 71 + }) 72 + } 73 + 74 + pub fn is_valid_at_identifier(s: &str) -> bool { 75 + is_valid_did(s) || is_valid_handle(s) 76 + } 77 + 78 + pub fn is_valid_nsid(s: &str) -> bool { 79 + !s.is_empty() 80 + && s.split('.').count() >= 3 81 + && s.split('.').all(|seg| { 82 + !seg.is_empty() && seg.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') 83 + }) 84 + } 85 + 86 + use crate::schema::StringFormat; 87 + 88 + pub fn validate_format(format: &StringFormat, value: &str) -> bool { 89 + match format { 90 + StringFormat::Did => is_valid_did(value), 91 + StringFormat::Handle => is_valid_handle(value), 92 + StringFormat::AtUri => is_valid_at_uri(value), 93 + StringFormat::Datetime => is_valid_datetime(value), 94 + StringFormat::Uri => is_valid_uri(value), 95 + StringFormat::Cid => is_valid_cid(value), 96 + StringFormat::Language => is_valid_language(value), 97 + StringFormat::Tid => is_valid_tid(value), 98 + StringFormat::RecordKey => is_valid_record_key(value), 99 + StringFormat::AtIdentifier => is_valid_at_identifier(value), 100 + StringFormat::Nsid => is_valid_nsid(value), 101 + } 102 + } 103 + 104 + #[cfg(test)] 105 + mod tests { 106 + use super::*; 107 + 108 + #[test] 109 + fn test_valid_dids() { 110 + assert!(is_valid_did("did:plc:1234567890abcdefghijk")); 111 + assert!(is_valid_did("did:web:example.com")); 112 + assert!(!is_valid_did("")); 113 + assert!(!is_valid_did("plc:123")); 114 + assert!(!is_valid_did("did:")); 115 + assert!(!is_valid_did("did:plc:")); 116 + } 117 + 118 + #[test] 119 + fn test_valid_handles() { 120 + assert!(is_valid_handle("user.bsky.social")); 121 + assert!(is_valid_handle("example.com")); 122 + assert!(!is_valid_handle("noperiod")); 123 + assert!(!is_valid_handle("")); 124 + } 125 + 126 + #[test] 127 + fn test_valid_at_uris() { 128 + assert!(is_valid_at_uri("at://did:plc:abc/app.bsky.feed.post/123")); 129 + assert!(is_valid_at_uri( 130 + "at://user.bsky.social/app.bsky.feed.post/123" 131 + )); 132 + assert!(!is_valid_at_uri("https://example.com")); 133 + assert!(!is_valid_at_uri("at://")); 134 + assert!(!is_valid_at_uri("at://not valid")); 135 + } 136 + 137 + #[test] 138 + fn test_valid_datetimes() { 139 + assert!(is_valid_datetime("2024-01-01T00:00:00.000Z")); 140 + assert!(is_valid_datetime("2024-01-01T00:00:00Z")); 141 + assert!(!is_valid_datetime("not-a-date")); 142 + assert!(!is_valid_datetime("2024-13-01T00:00:00Z")); 143 + } 144 + 145 + #[test] 146 + fn test_valid_uris() { 147 + assert!(is_valid_uri("https://example.com")); 148 + assert!(is_valid_uri("http://localhost")); 149 + assert!(is_valid_uri("ftp://files.example.com/path")); 150 + assert!(!is_valid_uri("://x")); 151 + assert!(!is_valid_uri("not a uri")); 152 + assert!(!is_valid_uri("123://bad")); 153 + assert!(!is_valid_uri("https://")); 154 + } 155 + 156 + #[test] 157 + fn test_valid_cids() { 158 + assert!(is_valid_cid("bafyreiabcdef123456")); 159 + assert!(is_valid_cid( 160 + "QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG" 161 + )); 162 + assert!(is_valid_cid("zQmSomeMultibase")); 163 + assert!(!is_valid_cid("abc")); 164 + assert!(!is_valid_cid("")); 165 + assert!(!is_valid_cid("xyzinvalidprefix1234")); 166 + } 167 + 168 + #[test] 169 + fn test_valid_tids() { 170 + assert!(is_valid_tid("3k2n5j2abcdef")); 171 + assert!(!is_valid_tid("short")); 172 + assert!(!is_valid_tid("3K2N5J2ABCDEF")); 173 + } 174 + 175 + #[test] 176 + fn test_valid_record_keys() { 177 + assert!(is_valid_record_key("valid-key_123")); 178 + assert!(is_valid_record_key("self")); 179 + assert!(!is_valid_record_key("")); 180 + assert!(!is_valid_record_key(".")); 181 + assert!(!is_valid_record_key("..")); 182 + } 183 + 184 + #[test] 185 + fn test_valid_nsids() { 186 + assert!(is_valid_nsid("app.bsky.feed.post")); 187 + assert!(is_valid_nsid("com.atproto.repo.strongRef")); 188 + assert!(!is_valid_nsid("too.short")); 189 + assert!(!is_valid_nsid("")); 190 + } 191 + 192 + #[test] 193 + fn test_did_method_with_digits() { 194 + assert!(is_valid_did( 195 + "did:key:z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK" 196 + )); 197 + assert!(is_valid_did("did:3:abc123")); 198 + assert!(is_valid_did("did:a1b2:test")); 199 + assert!(!is_valid_did("did:UPPER:test")); 200 + assert!(!is_valid_did("did::test")); 201 + } 202 + 203 + #[test] 204 + fn test_record_key_with_colon() { 205 + assert!(is_valid_record_key("self")); 206 + assert!(is_valid_record_key("key:with:colons")); 207 + assert!(is_valid_record_key("at:something")); 208 + } 209 + 210 + #[test] 211 + fn test_valid_languages() { 212 + assert!(is_valid_language("en")); 213 + assert!(is_valid_language("en-US")); 214 + assert!(is_valid_language("pt-BR")); 215 + assert!(!is_valid_language("")); 216 + } 217 + }
+27
crates/tranquil-lexicon/src/lib.rs
··· 1 + mod formats; 2 + mod registry; 3 + mod schema; 4 + mod validate; 5 + 6 + #[cfg(feature = "resolve")] 7 + mod dynamic; 8 + #[cfg(feature = "resolve")] 9 + mod resolve; 10 + 11 + #[cfg(test)] 12 + mod test_schemas; 13 + 14 + pub use formats::{ 15 + is_valid_at_identifier, is_valid_at_uri, is_valid_cid, is_valid_datetime, is_valid_did, 16 + is_valid_handle, is_valid_language, is_valid_nsid, is_valid_record_key, is_valid_tid, 17 + is_valid_uri, 18 + }; 19 + pub use registry::LexiconRegistry; 20 + pub use schema::{LexiconDoc, ParsedRef, parse_ref}; 21 + pub use validate::{LexValidationError, validate_record}; 22 + 23 + #[cfg(feature = "resolve")] 24 + pub use resolve::{ 25 + ResolveError, fetch_schema_from_pds, resolve_did_from_dns, resolve_lexicon, 26 + resolve_lexicon_from_did, resolve_lexicon_with_config, resolve_pds_endpoint, 27 + };
+208
crates/tranquil-lexicon/src/schema.rs
··· 1 + use serde::Deserialize; 2 + use std::collections::HashMap; 3 + 4 + #[derive(Debug, Deserialize)] 5 + pub struct LexiconDoc { 6 + pub lexicon: u32, 7 + pub id: String, 8 + #[serde(default)] 9 + pub defs: HashMap<String, LexDef>, 10 + } 11 + 12 + #[derive(Debug, Deserialize)] 13 + #[serde(tag = "type")] 14 + pub enum LexDef { 15 + #[serde(rename = "record")] 16 + Record(LexRecord), 17 + #[serde(rename = "object")] 18 + Object(LexObject), 19 + #[serde(rename = "token")] 20 + Token {}, 21 + #[serde(rename = "string")] 22 + StringDef(LexStringDef), 23 + #[serde(rename = "query")] 24 + Query {}, 25 + #[serde(rename = "procedure")] 26 + Procedure {}, 27 + #[serde(rename = "subscription")] 28 + Subscription {}, 29 + #[serde(rename = "params")] 30 + Params {}, 31 + #[serde(rename = "permission")] 32 + Permission {}, 33 + #[serde(rename = "permission-set")] 34 + PermissionSet {}, 35 + } 36 + 37 + #[derive(Debug, Deserialize)] 38 + pub struct LexRecord { 39 + #[serde(default)] 40 + pub key: Option<String>, 41 + pub record: LexObject, 42 + } 43 + 44 + #[derive(Debug, Deserialize)] 45 + pub struct LexObject { 46 + #[serde(default)] 47 + pub required: Vec<String>, 48 + #[serde(default)] 49 + pub nullable: Vec<String>, 50 + #[serde(default)] 51 + pub properties: HashMap<String, LexProperty>, 52 + } 53 + 54 + #[derive(Debug, Deserialize)] 55 + #[serde(tag = "type")] 56 + pub enum LexProperty { 57 + #[serde(rename = "string")] 58 + String(LexString), 59 + #[serde(rename = "integer")] 60 + Integer(LexInteger), 61 + #[serde(rename = "boolean")] 62 + Boolean {}, 63 + #[serde(rename = "bytes")] 64 + Bytes(LexBytes), 65 + #[serde(rename = "cid-link")] 66 + CidLink {}, 67 + #[serde(rename = "blob")] 68 + Blob(LexBlob), 69 + #[serde(rename = "unknown")] 70 + Unknown {}, 71 + #[serde(rename = "ref")] 72 + Ref(LexRef), 73 + #[serde(rename = "union")] 74 + Union(LexUnion), 75 + #[serde(rename = "array")] 76 + Array(LexArray), 77 + #[serde(rename = "object")] 78 + Object(LexObject), 79 + } 80 + 81 + #[derive(Debug, Deserialize)] 82 + #[serde(rename_all = "camelCase")] 83 + pub struct LexString { 84 + #[serde(default)] 85 + pub max_length: Option<u64>, 86 + #[serde(default)] 87 + pub min_length: Option<u64>, 88 + #[serde(default)] 89 + pub max_graphemes: Option<u64>, 90 + #[serde(default)] 91 + pub min_graphemes: Option<u64>, 92 + #[serde(default)] 93 + pub format: Option<StringFormat>, 94 + #[serde(default)] 95 + pub known_values: Option<Vec<String>>, 96 + #[serde(rename = "enum", default)] 97 + pub enum_values: Option<Vec<String>>, 98 + #[serde(rename = "const", default)] 99 + pub const_value: Option<String>, 100 + #[serde(default)] 101 + pub default: Option<String>, 102 + } 103 + 104 + #[derive(Debug, Deserialize)] 105 + pub struct LexInteger { 106 + #[serde(default)] 107 + pub minimum: Option<i64>, 108 + #[serde(default)] 109 + pub maximum: Option<i64>, 110 + #[serde(default)] 111 + pub default: Option<i64>, 112 + #[serde(rename = "enum", default)] 113 + pub enum_values: Option<Vec<i64>>, 114 + #[serde(rename = "const", default)] 115 + pub const_value: Option<i64>, 116 + } 117 + 118 + #[derive(Debug, Deserialize)] 119 + #[serde(rename_all = "camelCase")] 120 + pub struct LexBytes { 121 + #[serde(default)] 122 + pub max_length: Option<u64>, 123 + #[serde(default)] 124 + pub min_length: Option<u64>, 125 + } 126 + 127 + #[derive(Debug, Deserialize)] 128 + #[serde(rename_all = "camelCase")] 129 + pub struct LexBlob { 130 + #[serde(default)] 131 + pub accept: Option<Vec<String>>, 132 + #[serde(default)] 133 + pub max_size: Option<u64>, 134 + } 135 + 136 + #[derive(Debug, Deserialize)] 137 + #[serde(rename_all = "camelCase")] 138 + pub struct LexArray { 139 + pub items: Box<LexProperty>, 140 + #[serde(default)] 141 + pub min_length: Option<u64>, 142 + #[serde(default)] 143 + pub max_length: Option<u64>, 144 + } 145 + 146 + #[derive(Debug, Deserialize)] 147 + pub struct LexUnion { 148 + #[serde(default)] 149 + pub refs: Vec<String>, 150 + #[serde(default)] 151 + pub closed: bool, 152 + } 153 + 154 + #[derive(Debug, Deserialize)] 155 + #[serde(rename_all = "camelCase")] 156 + pub struct LexRef { 157 + #[serde(rename = "ref")] 158 + pub reference: String, 159 + } 160 + 161 + #[derive(Debug, Clone, Deserialize)] 162 + pub enum StringFormat { 163 + #[serde(rename = "did")] 164 + Did, 165 + #[serde(rename = "handle")] 166 + Handle, 167 + #[serde(rename = "at-uri")] 168 + AtUri, 169 + #[serde(rename = "datetime")] 170 + Datetime, 171 + #[serde(rename = "uri")] 172 + Uri, 173 + #[serde(rename = "cid")] 174 + Cid, 175 + #[serde(rename = "language")] 176 + Language, 177 + #[serde(rename = "tid")] 178 + Tid, 179 + #[serde(rename = "record-key")] 180 + RecordKey, 181 + #[serde(rename = "at-identifier")] 182 + AtIdentifier, 183 + #[serde(rename = "nsid")] 184 + Nsid, 185 + } 186 + 187 + pub enum ParsedRef<'a> { 188 + Local(&'a str), 189 + Qualified { nsid: &'a str, fragment: &'a str }, 190 + Bare(&'a str), 191 + } 192 + 193 + pub fn parse_ref(reference: &str) -> ParsedRef<'_> { 194 + match reference.strip_prefix('#') { 195 + Some(local) => ParsedRef::Local(local), 196 + None => { 197 + let stripped = reference.strip_prefix("lex:").unwrap_or(reference); 198 + match stripped.split_once('#') { 199 + Some((nsid, fragment)) => ParsedRef::Qualified { nsid, fragment }, 200 + None => ParsedRef::Bare(stripped), 201 + } 202 + } 203 + } 204 + } 205 + 206 + #[derive(Debug, Deserialize)] 207 + #[serde(rename_all = "camelCase")] 208 + pub struct LexStringDef {}

History

2 rounds 0 comments
sign up or login to add to the discussion
1 commit
expand
feat(lexicon): add crate with schema types and format validators
expand 0 comments
pull request successfully merged
1 commit
expand
feat(lexicon): add crate with schema types and format validators
expand 0 comments