A better Rust ATProto crate

runtime validation

Orual 1bac3ef3 d485c065

+2535
+5
Cargo.lock
··· 2435 2435 name = "jacquard-lexicon" 2436 2436 version = "0.8.0" 2437 2437 dependencies = [ 2438 + "cid", 2439 + "dashmap", 2438 2440 "glob", 2439 2441 "heck 0.5.0", 2440 2442 "inventory", 2441 2443 "jacquard-common", 2442 2444 "miette", 2445 + "multihash", 2443 2446 "prettyplease", 2444 2447 "proc-macro2", 2445 2448 "quote", 2446 2449 "serde", 2450 + "serde_ipld_dagcbor", 2447 2451 "serde_json", 2448 2452 "serde_repr", 2449 2453 "serde_with", 2454 + "sha2", 2450 2455 "syn 2.0.108", 2451 2456 "tempfile", 2452 2457 "thiserror 2.0.17",
+3
Cargo.toml
··· 50 50 url = "2.5" 51 51 cid = { version = "0.11.1", features = ["serde", "std"] } 52 52 ipld-core = { version = "0.4.2", features = ["serde"] } 53 + multihash = "0.19" 54 + dashmap = "6.1" 53 55 54 56 # Proc macros 55 57 proc-macro2 = "1.0" ··· 75 77 percent-encoding = "2.3" 76 78 urlencoding = "2.1.3" 77 79 rand_core = "0.6" 80 + sha2 = "0.10" 78 81 79 82 # Time 80 83 chrono = "0.4"
+49
crates/jacquard-common/src/types.rs
··· 94 94 String(LexiconStringType), 95 95 } 96 96 97 + impl std::fmt::Display for DataModelType { 98 + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 99 + match self { 100 + DataModelType::Null => write!(f, "null"), 101 + DataModelType::Boolean => write!(f, "boolean"), 102 + DataModelType::Integer => write!(f, "integer"), 103 + DataModelType::Bytes => write!(f, "bytes"), 104 + DataModelType::CidLink => write!(f, "cid-link"), 105 + DataModelType::Blob => write!(f, "blob"), 106 + DataModelType::Array => write!(f, "array"), 107 + DataModelType::Object => write!(f, "object"), 108 + DataModelType::String(s) => write!(f, "{}", s), 109 + } 110 + } 111 + } 112 + 97 113 /// Lexicon string format types for typed strings in the AT Protocol data model 98 114 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)] 99 115 #[serde(rename_all = "kebab-case")] ··· 125 141 String, 126 142 } 127 143 144 + impl std::fmt::Display for LexiconStringType { 145 + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 146 + match self { 147 + LexiconStringType::Datetime => write!(f, "datetime"), 148 + LexiconStringType::AtUri => write!(f, "at-uri"), 149 + LexiconStringType::Did => write!(f, "did"), 150 + LexiconStringType::Handle => write!(f, "handle"), 151 + LexiconStringType::AtIdentifier => write!(f, "at-identifier"), 152 + LexiconStringType::Nsid => write!(f, "nsid"), 153 + LexiconStringType::Cid => write!(f, "cid"), 154 + LexiconStringType::Language => write!(f, "language"), 155 + LexiconStringType::Tid => write!(f, "tid"), 156 + LexiconStringType::RecordKey => write!(f, "record-key"), 157 + LexiconStringType::Uri(u) => write!(f, "uri({})", u), 158 + LexiconStringType::String => write!(f, "string"), 159 + } 160 + } 161 + } 162 + 128 163 /// URI scheme types for lexicon URI format constraints 129 164 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] 130 165 #[serde(tag = "type")] ··· 144 179 /// Any valid URI 145 180 Any, 146 181 } 182 + 183 + impl std::fmt::Display for UriType { 184 + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 185 + match self { 186 + UriType::Did => write!(f, "did"), 187 + UriType::At => write!(f, "at"), 188 + UriType::Https => write!(f, "https"), 189 + UriType::Wss => write!(f, "wss"), 190 + UriType::Cid => write!(f, "cid"), 191 + UriType::Dns => write!(f, "dns"), 192 + UriType::Any => write!(f, "any"), 193 + } 194 + } 195 + }
+25
crates/jacquard-common/src/types/value.rs
··· 167 167 matches!(self, Data::Null) 168 168 } 169 169 170 + /// Get the "$type" discriminator field if this is an object with a string "$type" field 171 + /// 172 + /// This is a shortcut for union type discrimination in AT Protocol. 173 + /// Returns `None` if this is not an object or if the "$type" field is missing/not a string. 174 + pub fn type_discriminator(&self) -> Option<&str> { 175 + self.as_object()?.type_discriminator() 176 + } 177 + 170 178 /// Serialize to canonical DAG-CBOR bytes for CID computation 171 179 /// 172 180 /// This produces the deterministic CBOR encoding used for content-addressing. ··· 350 358 /// Get an iterator over the keys 351 359 pub fn keys(&self) -> std::collections::btree_map::Keys<'_, SmolStr, Data<'s>> { 352 360 self.0.keys() 361 + } 362 + 363 + /// Get the "$type" discriminator field if present and it's a string 364 + /// 365 + /// This is a shortcut for union type discrimination in AT Protocol. 366 + pub fn type_discriminator(&self) -> Option<&str> { 367 + self.get("$type")?.as_str() 353 368 } 354 369 355 370 /// Get an iterator over the values ··· 568 583 /// Check if this is a null value 569 584 pub fn is_null(&self) -> bool { 570 585 matches!(self, RawData::Null) 586 + } 587 + 588 + /// Get the "$type" discriminator field if this is an object with a string "$type" field 589 + /// 590 + /// This is a shortcut for union type discrimination in AT Protocol. 591 + /// Returns `None` if this is not an object or if the "$type" field is missing/not a string. 592 + pub fn type_discriminator(&self) -> Option<&str> { 593 + let obj = self.as_object()?; 594 + let type_val = obj.get("$type")?; 595 + type_val.as_str() 571 596 } 572 597 573 598 /// Serialize to canonical DAG-CBOR bytes for CID computation
+41
crates/jacquard-common/src/types/value/tests.rs
··· 1282 1282 let values: Vec<_> = result.values().collect(); 1283 1283 assert_eq!(values.len(), 1); 1284 1284 } 1285 + 1286 + #[test] 1287 + fn test_type_discriminator() { 1288 + // Object with $type field 1289 + let mut map = BTreeMap::new(); 1290 + map.insert( 1291 + SmolStr::new_static("$type"), 1292 + Data::String(AtprotoStr::String(CowStr::new_static("app.bsky.feed.post"))), 1293 + ); 1294 + map.insert(SmolStr::new_static("text"), Data::String(AtprotoStr::String(CowStr::new_static("hello")))); 1295 + let obj = Object(map); 1296 + 1297 + assert_eq!(obj.type_discriminator(), Some("app.bsky.feed.post")); 1298 + 1299 + let data = Data::Object(obj.clone()); 1300 + assert_eq!(data.type_discriminator(), Some("app.bsky.feed.post")); 1301 + 1302 + // Object without $type field 1303 + let mut map2 = BTreeMap::new(); 1304 + map2.insert(SmolStr::new_static("foo"), Data::Integer(42)); 1305 + let obj2 = Object(map2); 1306 + 1307 + assert_eq!(obj2.type_discriminator(), None); 1308 + 1309 + let data2 = Data::Object(obj2); 1310 + assert_eq!(data2.type_discriminator(), None); 1311 + 1312 + // Non-object data 1313 + let data3 = Data::Integer(42); 1314 + assert_eq!(data3.type_discriminator(), None); 1315 + 1316 + // RawData with $type 1317 + let mut raw_map = BTreeMap::new(); 1318 + raw_map.insert( 1319 + SmolStr::new_static("$type"), 1320 + RawData::String(CowStr::new_static("test.type")), 1321 + ); 1322 + let raw_obj = RawData::Object(raw_map); 1323 + 1324 + assert_eq!(raw_obj.type_discriminator(), Some("test.type")); 1325 + }
+5
crates/jacquard-lexicon/Cargo.toml
··· 12 12 license.workspace = true 13 13 14 14 [dependencies] 15 + cid.workspace = true 16 + dashmap.workspace = true 15 17 glob = "0.3" 16 18 heck.workspace = true 17 19 inventory = "0.3" 18 20 jacquard-common = { version = "0.8", path = "../jacquard-common" } 19 21 miette = { workspace = true } 22 + multihash.workspace = true 20 23 prettyplease.workspace = true 21 24 proc-macro2.workspace = true 22 25 quote.workspace = true 23 26 serde.workspace = true 27 + serde_ipld_dagcbor.workspace = true 24 28 serde_json.workspace = true 25 29 serde_repr.workspace = true 26 30 serde_with.workspace = true 31 + sha2.workspace = true 27 32 syn.workspace = true 28 33 thiserror.workspace = true 29 34 unicode-segmentation = "1.12"
+4
crates/jacquard-lexicon/src/codegen.rs
··· 90 90 #nsid 91 91 } 92 92 93 + fn def_name() -> &'static str { 94 + #def_name 95 + } 96 + 93 97 fn lexicon_doc() -> ::jacquard_lexicon::lexicon::LexiconDoc<'static> { 94 98 #shared_fn_ident() 95 99 }
+4
crates/jacquard-lexicon/src/codegen/schema_impl.rs
··· 45 45 #nsid 46 46 } 47 47 48 + fn def_name() -> &'static str { 49 + #def_name 50 + } 51 + 48 52 fn lexicon_doc() -> ::jacquard_lexicon::lexicon::LexiconDoc<'static> { 49 53 #doc_literal 50 54 }
+2
crates/jacquard-lexicon/src/lib.rs
··· 13 13 //! - [`union_registry`] - Tracks union types for collision detection 14 14 //! - [`fs`] - Filesystem utilities for lexicon storage 15 15 //! - [`derive_impl`] - Implementation functions for derive macros (used by jacquard-derive) 16 + //! - [`validation`] - Runtime validation of Data against lexicon schemas 16 17 17 18 pub mod codegen; 18 19 pub mod corpus; ··· 22 23 pub mod lexicon; 23 24 pub mod schema; 24 25 pub mod union_registry; 26 + pub mod validation;
+9
crates/jacquard-lexicon/src/schema.rs
··· 67 67 /// For fragments, this is the base NSID (without `#fragment`). 68 68 fn nsid() -> &'static str; 69 69 70 + /// The definition name within the lexicon document 71 + /// 72 + /// Returns "main" for the primary definition, or the fragment name for other defs. 73 + /// For example, in a lexicon with multiple defs like `pub.leaflet.poll.definition`, 74 + /// the main type returns "main" while the `Option` type returns "option". 75 + fn def_name() -> &'static str { 76 + "main" 77 + } 78 + 70 79 /// The schema ID for this type 71 80 /// 72 81 /// Defaults to NSID. Override for fragments to include `#fragment` suffix.
+1278
crates/jacquard-lexicon/src/validation.rs
··· 1 + //! Runtime validation of Data values against lexicon schemas 2 + //! 3 + //! This module provides infrastructure for validating untyped `Data` values against 4 + //! lexicon schemas, enabling partial deserialization, debugging, and schema migration. 5 + 6 + use crate::{lexicon::LexiconDoc, schema::LexiconSchemaRef}; 7 + use cid::Cid as IpldCid; 8 + use dashmap::DashMap; 9 + use jacquard_common::{ 10 + IntoStatic, 11 + smol_str::{self, ToSmolStr}, 12 + types::value::Data, 13 + }; 14 + use sha2::{Digest, Sha256}; 15 + use smol_str::SmolStr; 16 + use std::{ 17 + fmt, 18 + sync::{Arc, LazyLock, OnceLock}, 19 + }; 20 + 21 + /// Path to a value within a data structure 22 + /// 23 + /// Tracks the location of values during validation for precise error reporting. 24 + #[derive(Debug, Clone, PartialEq, Eq)] 25 + pub struct ValidationPath { 26 + segments: Vec<PathSegment>, 27 + } 28 + 29 + /// A segment in a validation path 30 + #[derive(Debug, Clone, PartialEq, Eq)] 31 + pub enum PathSegment { 32 + /// Object field access 33 + Field(SmolStr), 34 + /// Array index access 35 + Index(usize), 36 + /// Union variant discriminator 37 + UnionVariant(SmolStr), 38 + } 39 + 40 + impl ValidationPath { 41 + /// Create a new empty path 42 + pub fn new() -> Self { 43 + Self { 44 + segments: Vec::new(), 45 + } 46 + } 47 + 48 + /// Add a field segment to the path 49 + pub fn push_field(&mut self, name: &str) { 50 + self.segments.push(PathSegment::Field(name.into())); 51 + } 52 + 53 + /// Add an index segment to the path 54 + pub fn push_index(&mut self, idx: usize) { 55 + self.segments.push(PathSegment::Index(idx)); 56 + } 57 + 58 + /// Add a union variant segment to the path 59 + pub fn push_variant(&mut self, type_str: &str) { 60 + self.segments 61 + .push(PathSegment::UnionVariant(type_str.into())); 62 + } 63 + 64 + /// Remove the last segment from the path 65 + pub fn pop(&mut self) { 66 + self.segments.pop(); 67 + } 68 + 69 + /// Get the depth of the path 70 + pub fn depth(&self) -> usize { 71 + self.segments.len() 72 + } 73 + 74 + /// Check if the path is empty 75 + pub fn is_empty(&self) -> bool { 76 + self.segments.is_empty() 77 + } 78 + } 79 + 80 + impl Default for ValidationPath { 81 + fn default() -> Self { 82 + Self::new() 83 + } 84 + } 85 + 86 + impl fmt::Display for ValidationPath { 87 + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 88 + if self.segments.is_empty() { 89 + return write!(f, "(root)"); 90 + } 91 + 92 + for seg in &self.segments { 93 + match seg { 94 + PathSegment::Field(name) => write!(f, ".{}", name)?, 95 + PathSegment::Index(idx) => write!(f, "[{}]", idx)?, 96 + PathSegment::UnionVariant(t) => write!(f, "($type={})", t)?, 97 + } 98 + } 99 + Ok(()) 100 + } 101 + } 102 + 103 + /// Structural validation errors 104 + /// 105 + /// These errors indicate that the data structure doesn't match the schema's type expectations. 106 + #[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)] 107 + pub enum StructuralError { 108 + #[error("Type mismatch at {path}: expected {expected}, got {actual}")] 109 + TypeMismatch { 110 + path: ValidationPath, 111 + expected: jacquard_common::types::DataModelType, 112 + actual: jacquard_common::types::DataModelType, 113 + }, 114 + 115 + #[error("Missing required field at {path}: '{field}'")] 116 + MissingRequiredField { 117 + path: ValidationPath, 118 + field: SmolStr, 119 + }, 120 + 121 + #[error("Missing union discriminator ($type) at {path}")] 122 + MissingUnionDiscriminator { path: ValidationPath }, 123 + 124 + #[error("Union type mismatch at {path}: $type='{actual_type}' not in [{expected_refs}]")] 125 + UnionNoMatch { 126 + path: ValidationPath, 127 + actual_type: SmolStr, 128 + expected_refs: SmolStr, 129 + }, 130 + 131 + #[error("Unresolved ref at {path}: '{ref_nsid}'")] 132 + UnresolvedRef { 133 + path: ValidationPath, 134 + ref_nsid: SmolStr, 135 + }, 136 + 137 + #[error("Reference cycle detected at {path}: '{ref_nsid}' (stack: {stack})")] 138 + RefCycle { 139 + path: ValidationPath, 140 + ref_nsid: SmolStr, 141 + stack: SmolStr, 142 + }, 143 + 144 + #[error("Max validation depth exceeded at {path}: {max}")] 145 + MaxDepthExceeded { path: ValidationPath, max: usize }, 146 + } 147 + 148 + /// Constraint validation errors 149 + /// 150 + /// These errors indicate that the data violates lexicon constraints like max_length, 151 + /// max_graphemes, ranges, etc. The structure is correct but values are out of bounds. 152 + #[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)] 153 + pub enum ConstraintError { 154 + #[error("{path} exceeds max length: {actual} > {max}")] 155 + MaxLength { 156 + path: ValidationPath, 157 + max: usize, 158 + actual: usize, 159 + }, 160 + 161 + #[error("{path} exceeds max graphemes: {actual} > {max}")] 162 + MaxGraphemes { 163 + path: ValidationPath, 164 + max: usize, 165 + actual: usize, 166 + }, 167 + 168 + #[error("{path} below min length: {actual} < {min}")] 169 + MinLength { 170 + path: ValidationPath, 171 + min: usize, 172 + actual: usize, 173 + }, 174 + 175 + #[error("{path} below min graphemes: {actual} < {min}")] 176 + MinGraphemes { 177 + path: ValidationPath, 178 + min: usize, 179 + actual: usize, 180 + }, 181 + 182 + #[error("{path} value {actual} exceeds maximum: {max}")] 183 + Maximum { 184 + path: ValidationPath, 185 + max: i64, 186 + actual: i64, 187 + }, 188 + 189 + #[error("{path} value {actual} below minimum: {min}")] 190 + Minimum { 191 + path: ValidationPath, 192 + min: i64, 193 + actual: i64, 194 + }, 195 + } 196 + 197 + /// Unified validation error type 198 + #[derive(Debug, Clone, thiserror::Error)] 199 + pub enum ValidationError { 200 + #[error(transparent)] 201 + Structural(#[from] StructuralError), 202 + 203 + #[error(transparent)] 204 + Constraint(#[from] ConstraintError), 205 + } 206 + 207 + /// Registry of lexicon schemas for validation 208 + /// 209 + /// Collects schemas from inventory at construction and supports runtime insertion. 210 + #[derive(Debug, Clone)] 211 + pub struct SchemaRegistry { 212 + /// Schema documents indexed by NSID (concurrent access safe) 213 + schemas: DashMap<SmolStr, LexiconDoc<'static>>, 214 + } 215 + 216 + impl SchemaRegistry { 217 + /// Build registry from inventory-collected schemas 218 + pub fn from_inventory() -> Self { 219 + let schemas = DashMap::new(); 220 + 221 + for entry in inventory::iter::<LexiconSchemaRef> { 222 + let doc = (entry.provider)(); 223 + schemas.insert(entry.nsid.to_smolstr(), doc); 224 + } 225 + 226 + Self { schemas } 227 + } 228 + 229 + /// Create an empty registry 230 + pub fn new() -> Self { 231 + Self { 232 + schemas: DashMap::new(), 233 + } 234 + } 235 + 236 + /// Get schema by NSID 237 + /// 238 + /// IMPORTANT: Clone the returned schema immediately to avoid holding DashMap ref 239 + pub fn get(&self, nsid: &str) -> Option<LexiconDoc<'static>> { 240 + self.schemas.get(nsid).map(|doc| doc.clone()) 241 + } 242 + 243 + /// Insert or update a schema (for runtime schema loading) 244 + pub fn insert(&self, nsid: SmolStr, doc: LexiconDoc<'static>) { 245 + self.schemas.insert(nsid, doc); 246 + } 247 + 248 + /// Get specific def from a schema 249 + /// 250 + /// IMPORTANT: Returns cloned def to avoid holding DashMap ref 251 + pub fn get_def( 252 + &self, 253 + nsid: &str, 254 + def_name: &str, 255 + ) -> Option<crate::lexicon::LexUserType<'static>> { 256 + // Clone immediately to release DashMap ref before returning 257 + self.schemas 258 + .get(nsid) 259 + .and_then(|doc| doc.defs.get(def_name).cloned()) 260 + } 261 + } 262 + 263 + impl Default for SchemaRegistry { 264 + fn default() -> Self { 265 + Self::from_inventory() 266 + } 267 + } 268 + 269 + /// Cache key for validation results 270 + /// 271 + /// Content-addressed by CID to enable efficient caching across identical data. 272 + #[derive(Debug, Clone, Hash, Eq, PartialEq)] 273 + struct ValidationCacheKey { 274 + nsid: SmolStr, 275 + def_name: SmolStr, 276 + cid: IpldCid, 277 + } 278 + 279 + impl ValidationCacheKey { 280 + /// Create cache key from schema info and data 281 + fn from_data<T: crate::schema::LexiconSchema>( 282 + data: &Data, 283 + ) -> Result<Self, CidComputationError> { 284 + let cid = compute_data_cid(data)?; 285 + Ok(Self { 286 + nsid: SmolStr::new_static(T::nsid()), 287 + def_name: SmolStr::new_static(T::def_name()), 288 + cid, 289 + }) 290 + } 291 + } 292 + 293 + /// Errors that can occur when computing CIDs 294 + #[derive(Debug, thiserror::Error)] 295 + pub enum CidComputationError { 296 + #[error("Failed to serialize data to DAG-CBOR: {0}")] 297 + DagCborEncode(#[from] serde_ipld_dagcbor::EncodeError<std::collections::TryReserveError>), 298 + 299 + #[error("Failed to create multihash: {0}")] 300 + Multihash(#[from] multihash::Error), 301 + } 302 + 303 + /// Compute CID for Data value 304 + /// 305 + /// Uses SHA-256 hash and DAG-CBOR codec for content addressing. 306 + fn compute_data_cid(data: &Data) -> Result<IpldCid, CidComputationError> { 307 + // Serialize to DAG-CBOR 308 + let dag_cbor = data.to_dag_cbor()?; 309 + 310 + // Compute SHA-256 hash 311 + let hash = Sha256::digest(&dag_cbor); 312 + 313 + // Create multihash (code 0x12 = sha2-256) 314 + let multihash = multihash::Multihash::wrap(0x12, &hash)?; 315 + 316 + // Create CIDv1 with dag-cbor codec (0x71) 317 + Ok(IpldCid::new_v1(0x71, multihash)) 318 + } 319 + 320 + /// Result of validating Data against a schema 321 + /// 322 + /// Distinguishes between structural errors (type mismatches, missing fields) and 323 + /// constraint violations (max_length, ranges, etc.). Constraint validation is lazy. 324 + #[derive(Debug, Clone)] 325 + pub struct ValidationResult { 326 + /// Structural errors (computed immediately) 327 + structural: Vec<StructuralError>, 328 + 329 + /// Constraint errors (computed on first access) 330 + constraints: OnceLock<Vec<ConstraintError>>, 331 + 332 + /// Context for lazy constraint validation 333 + data: Option<Arc<Data<'static>>>, 334 + schema_ref: Option<(SmolStr, SmolStr)>, // (nsid, def_name) 335 + registry: Option<Arc<SchemaRegistry>>, 336 + } 337 + 338 + impl ValidationResult { 339 + /// Create a validation result with no errors 340 + pub fn valid() -> Self { 341 + Self { 342 + structural: Vec::new(), 343 + constraints: OnceLock::new(), 344 + data: None, 345 + schema_ref: None, 346 + registry: None, 347 + } 348 + } 349 + 350 + /// Create a validation result with structural errors 351 + pub fn with_structural_errors(errors: Vec<StructuralError>) -> Self { 352 + Self { 353 + structural: errors, 354 + constraints: OnceLock::new(), 355 + data: None, 356 + schema_ref: None, 357 + registry: None, 358 + } 359 + } 360 + 361 + /// Create a validation result with context for lazy constraint validation 362 + pub fn with_context( 363 + structural: Vec<StructuralError>, 364 + data: Arc<Data<'static>>, 365 + nsid: SmolStr, 366 + def_name: SmolStr, 367 + registry: Arc<SchemaRegistry>, 368 + ) -> Self { 369 + Self { 370 + structural, 371 + constraints: OnceLock::new(), 372 + data: Some(data), 373 + schema_ref: Some((nsid, def_name)), 374 + registry: Some(registry), 375 + } 376 + } 377 + 378 + /// Check if validation passed (no structural or constraint errors) 379 + pub fn is_valid(&self) -> bool { 380 + self.structural.is_empty() && self.constraint_errors().is_empty() 381 + } 382 + 383 + /// Check if structurally valid (ignoring constraint checks) 384 + pub fn is_structurally_valid(&self) -> bool { 385 + self.structural.is_empty() 386 + } 387 + 388 + /// Get structural errors 389 + pub fn structural_errors(&self) -> &[StructuralError] { 390 + &self.structural 391 + } 392 + 393 + /// Get constraint errors (computed lazily on first access) 394 + pub fn constraint_errors(&self) -> &[ConstraintError] { 395 + self.constraints.get_or_init(|| { 396 + // If no context or structurally invalid, skip constraint validation 397 + if !self.is_structurally_valid() || self.data.is_none() || self.schema_ref.is_none() { 398 + return Vec::new(); 399 + } 400 + 401 + let data = self.data.as_ref().unwrap(); 402 + let (nsid, def_name) = self.schema_ref.as_ref().unwrap(); 403 + 404 + let mut path = ValidationPath::new(); 405 + validate_constraints( 406 + &mut path, 407 + data, 408 + nsid.as_str(), 409 + def_name.as_str(), 410 + self.registry.as_ref(), 411 + ) 412 + }) 413 + } 414 + 415 + /// Check if there are any constraint violations 416 + pub fn has_constraint_violations(&self) -> bool { 417 + !self.constraint_errors().is_empty() 418 + } 419 + 420 + /// Get all errors (structural and constraint) 421 + pub fn all_errors(&self) -> impl Iterator<Item = ValidationError> + '_ { 422 + self.structural 423 + .iter() 424 + .cloned() 425 + .map(ValidationError::Structural) 426 + .chain( 427 + self.constraint_errors() 428 + .iter() 429 + .cloned() 430 + .map(ValidationError::Constraint), 431 + ) 432 + } 433 + } 434 + 435 + /// Schema validator with caching 436 + /// 437 + /// Validates Data values against lexicon schemas, caching results by content hash. 438 + pub struct SchemaValidator { 439 + registry: SchemaRegistry, 440 + cache: DashMap<ValidationCacheKey, Arc<ValidationResult>>, 441 + } 442 + 443 + impl SchemaValidator { 444 + /// Get the global validator instance 445 + pub fn global() -> &'static Self { 446 + static VALIDATOR: LazyLock<SchemaValidator> = LazyLock::new(|| SchemaValidator { 447 + registry: SchemaRegistry::from_inventory(), 448 + cache: DashMap::new(), 449 + }); 450 + &VALIDATOR 451 + } 452 + 453 + /// Create a new validator with empty registry 454 + pub fn new() -> Self { 455 + Self { 456 + registry: SchemaRegistry::new(), 457 + cache: DashMap::new(), 458 + } 459 + } 460 + 461 + /// Validate data against a schema 462 + /// 463 + /// Results are cached by content hash for efficiency. 464 + pub fn validate<T: crate::schema::LexiconSchema>( 465 + &self, 466 + data: &Data, 467 + ) -> Result<ValidationResult, CidComputationError> { 468 + // Compute cache key 469 + let key = ValidationCacheKey::from_data::<T>(data)?; 470 + 471 + // Check cache (clone Arc immediately to avoid holding ref) 472 + if let Some(cached) = self.cache.get(&key).map(|r| Arc::clone(&r)) { 473 + return Ok((*cached).clone()); 474 + } 475 + 476 + // Validate (placeholder - actual validation in Phase 3) 477 + let result = self.validate_uncached::<T>(data); 478 + 479 + // Cache result 480 + self.cache.insert(key, Arc::new(result.clone())); 481 + 482 + Ok(result) 483 + } 484 + 485 + /// Validate without caching (internal) 486 + fn validate_uncached<T: crate::schema::LexiconSchema>(&self, data: &Data) -> ValidationResult { 487 + let def = match self.registry.get_def(T::nsid(), T::def_name()) { 488 + Some(d) => d, 489 + None => { 490 + // Schema not found - this is a structural error 491 + return ValidationResult::with_structural_errors(vec![ 492 + StructuralError::UnresolvedRef { 493 + path: ValidationPath::new(), 494 + ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(), 495 + }, 496 + ]); 497 + } 498 + }; 499 + 500 + let mut path = ValidationPath::new(); 501 + let mut ctx = ValidationContext::new(T::nsid(), T::def_name()); 502 + 503 + let errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 504 + 505 + // If structurally valid, create result with context for lazy constraint validation 506 + if errors.is_empty() { 507 + // Convert data to owned for constraint validation 508 + let owned_data = Arc::new(data.clone().into_static()); 509 + ValidationResult::with_context( 510 + errors, 511 + owned_data, 512 + SmolStr::new_static(T::nsid()), 513 + SmolStr::new_static(T::def_name()), 514 + Arc::new(self.registry.clone()), 515 + ) 516 + } else { 517 + ValidationResult::with_structural_errors(errors) 518 + } 519 + } 520 + 521 + /// Get the schema registry 522 + pub fn registry(&self) -> &SchemaRegistry { 523 + &self.registry 524 + } 525 + } 526 + 527 + impl Default for SchemaValidator { 528 + fn default() -> Self { 529 + Self::new() 530 + } 531 + } 532 + 533 + /// Validation context for tracking refs and preventing cycles 534 + struct ValidationContext { 535 + current_nsid: String, 536 + current_def: String, 537 + ref_stack: Vec<String>, 538 + max_depth: usize, 539 + } 540 + 541 + impl ValidationContext { 542 + fn new(nsid: &str, def_name: &str) -> Self { 543 + Self { 544 + current_nsid: nsid.to_string(), 545 + current_def: def_name.to_string(), 546 + ref_stack: Vec::new(), 547 + max_depth: 32, 548 + } 549 + } 550 + } 551 + 552 + /// Normalize a ref string to (nsid, def_name) 553 + fn normalize_ref(ref_str: &str, current_nsid: &str) -> (String, String) { 554 + if let Some(fragment) = ref_str.strip_prefix('#') { 555 + // #option -> (current_nsid, "option") 556 + (current_nsid.to_string(), fragment.to_string()) 557 + } else if let Some((nsid, def)) = ref_str.split_once('#') { 558 + // com.example.foo#bar -> ("com.example.foo", "bar") 559 + (nsid.to_string(), def.to_string()) 560 + } else { 561 + // com.example.foo -> ("com.example.foo", "main") 562 + (ref_str.to_string(), "main".to_string()) 563 + } 564 + } 565 + 566 + /// Validate data against a lexicon def 567 + fn validate_def( 568 + path: &mut ValidationPath, 569 + data: &Data, 570 + def: &crate::lexicon::LexUserType, 571 + registry: &SchemaRegistry, 572 + ctx: &mut ValidationContext, 573 + ) -> Vec<StructuralError> { 574 + use crate::lexicon::LexUserType; 575 + use jacquard_common::types::DataModelType; 576 + 577 + match def { 578 + LexUserType::Object(obj) => { 579 + // Must be an object 580 + let Data::Object(obj_data) = data else { 581 + return vec![StructuralError::TypeMismatch { 582 + path: path.clone(), 583 + expected: DataModelType::Object, 584 + actual: data.data_type(), 585 + }]; 586 + }; 587 + 588 + let mut errors = Vec::new(); 589 + 590 + // Check required fields 591 + if let Some(required) = &obj.required { 592 + for field in required { 593 + if !obj_data.get(field.as_ref()).is_some() { 594 + errors.push(StructuralError::MissingRequiredField { 595 + path: path.clone(), 596 + field: field.clone(), 597 + }); 598 + } 599 + } 600 + } 601 + 602 + // Validate each property that's present 603 + for (name, prop) in &obj.properties { 604 + if let Some(field_data) = obj_data.get(name.as_ref()) { 605 + path.push_field(name.as_ref()); 606 + errors.extend(validate_property(path, field_data, prop, registry, ctx)); 607 + path.pop(); 608 + } 609 + } 610 + 611 + errors 612 + } 613 + // Other def types (Record, Token, etc.) would go here 614 + // For now, just handle Object since that's what our tests use 615 + _ => Vec::new(), 616 + } 617 + } 618 + 619 + /// Validate data against a property schema 620 + fn validate_property( 621 + path: &mut ValidationPath, 622 + data: &Data, 623 + prop: &crate::lexicon::LexObjectProperty, 624 + registry: &SchemaRegistry, 625 + ctx: &mut ValidationContext, 626 + ) -> Vec<StructuralError> { 627 + use crate::lexicon::LexObjectProperty; 628 + use jacquard_common::types::DataModelType; 629 + 630 + match prop { 631 + LexObjectProperty::String(_) => { 632 + // Accept any string type 633 + if !matches!(data.data_type(), DataModelType::String(_)) { 634 + vec![StructuralError::TypeMismatch { 635 + path: path.clone(), 636 + expected: DataModelType::String( 637 + jacquard_common::types::LexiconStringType::String, 638 + ), 639 + actual: data.data_type(), 640 + }] 641 + } else { 642 + Vec::new() 643 + } 644 + } 645 + 646 + LexObjectProperty::Integer(_) => { 647 + if !matches!(data.data_type(), DataModelType::Integer) { 648 + vec![StructuralError::TypeMismatch { 649 + path: path.clone(), 650 + expected: DataModelType::Integer, 651 + actual: data.data_type(), 652 + }] 653 + } else { 654 + Vec::new() 655 + } 656 + } 657 + 658 + LexObjectProperty::Boolean(_) => { 659 + if !matches!(data.data_type(), DataModelType::Boolean) { 660 + vec![StructuralError::TypeMismatch { 661 + path: path.clone(), 662 + expected: DataModelType::Boolean, 663 + actual: data.data_type(), 664 + }] 665 + } else { 666 + Vec::new() 667 + } 668 + } 669 + 670 + LexObjectProperty::Object(obj) => { 671 + let Data::Object(obj_data) = data else { 672 + return vec![StructuralError::TypeMismatch { 673 + path: path.clone(), 674 + expected: DataModelType::Object, 675 + actual: data.data_type(), 676 + }]; 677 + }; 678 + 679 + let mut errors = Vec::new(); 680 + 681 + // Check required fields 682 + if let Some(required) = &obj.required { 683 + for field in required { 684 + if !obj_data.get(field.as_ref()).is_some() { 685 + errors.push(StructuralError::MissingRequiredField { 686 + path: path.clone(), 687 + field: field.clone(), 688 + }); 689 + } 690 + } 691 + } 692 + 693 + // Recursively validate each property 694 + for (name, schema_prop) in &obj.properties { 695 + if let Some(field_data) = obj_data.get(name.as_ref()) { 696 + path.push_field(name.as_ref()); 697 + errors.extend(validate_property( 698 + path, 699 + field_data, 700 + schema_prop, 701 + registry, 702 + ctx, 703 + )); 704 + path.pop(); 705 + } 706 + } 707 + 708 + errors 709 + } 710 + 711 + LexObjectProperty::Array(arr) => { 712 + let Data::Array(array) = data else { 713 + return vec![StructuralError::TypeMismatch { 714 + path: path.clone(), 715 + expected: DataModelType::Array, 716 + actual: data.data_type(), 717 + }]; 718 + }; 719 + 720 + let mut errors = Vec::new(); 721 + for (idx, item) in array.iter().enumerate() { 722 + path.push_index(idx); 723 + errors.extend(validate_array_item(path, item, &arr.items, registry, ctx)); 724 + path.pop(); 725 + } 726 + errors 727 + } 728 + 729 + LexObjectProperty::Union(u) => { 730 + let Data::Object(obj) = data else { 731 + return vec![StructuralError::TypeMismatch { 732 + path: path.clone(), 733 + expected: DataModelType::Object, 734 + actual: data.data_type(), 735 + }]; 736 + }; 737 + 738 + // Get $type discriminator 739 + let Some(type_str) = obj.type_discriminator() else { 740 + return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }]; 741 + }; 742 + 743 + // Try to match against refs 744 + for variant_ref in &u.refs { 745 + let (variant_nsid, variant_def) = 746 + normalize_ref(variant_ref.as_ref(), &ctx.current_nsid); 747 + let full_variant = format!("{}#{}", variant_nsid, variant_def); 748 + 749 + // Match by full ref or just nsid 750 + if type_str == full_variant || type_str == variant_nsid { 751 + // Found match - validate against this variant 752 + let Some(variant_def_type) = registry.get_def(&variant_nsid, &variant_def) 753 + else { 754 + return vec![StructuralError::UnresolvedRef { 755 + path: path.clone(), 756 + ref_nsid: full_variant.into(), 757 + }]; 758 + }; 759 + 760 + path.push_variant(type_str); 761 + let old_nsid = std::mem::replace(&mut ctx.current_nsid, variant_nsid); 762 + let old_def = std::mem::replace(&mut ctx.current_def, variant_def); 763 + 764 + let errors = validate_def(path, data, &variant_def_type, registry, ctx); 765 + 766 + ctx.current_nsid = old_nsid; 767 + ctx.current_def = old_def; 768 + path.pop(); 769 + 770 + return errors; 771 + } 772 + } 773 + 774 + // No match found 775 + if u.closed.unwrap_or(false) { 776 + // Closed union - this is an error 777 + let expected_refs = u 778 + .refs 779 + .iter() 780 + .map(|r| r.as_ref()) 781 + .collect::<Vec<_>>() 782 + .join(", "); 783 + vec![StructuralError::UnionNoMatch { 784 + path: path.clone(), 785 + actual_type: type_str.into(), 786 + expected_refs: expected_refs.into(), 787 + }] 788 + } else { 789 + // Open union - allow unknown variants 790 + Vec::new() 791 + } 792 + } 793 + 794 + LexObjectProperty::Ref(r) => { 795 + // Depth check 796 + if path.depth() >= ctx.max_depth { 797 + return vec![StructuralError::MaxDepthExceeded { 798 + path: path.clone(), 799 + max: ctx.max_depth, 800 + }]; 801 + } 802 + 803 + // Normalize ref 804 + let (ref_nsid, ref_def) = normalize_ref(r.r#ref.as_ref(), &ctx.current_nsid); 805 + let full_ref = format!("{}#{}", ref_nsid, ref_def); 806 + 807 + // Cycle detection 808 + if ctx.ref_stack.contains(&full_ref) { 809 + let stack = ctx.ref_stack.join(" -> "); 810 + return vec![StructuralError::RefCycle { 811 + path: path.clone(), 812 + ref_nsid: full_ref.into(), 813 + stack: stack.into(), 814 + }]; 815 + } 816 + 817 + // Look up ref 818 + let Some(ref_def_type) = registry.get_def(&ref_nsid, &ref_def) else { 819 + return vec![StructuralError::UnresolvedRef { 820 + path: path.clone(), 821 + ref_nsid: full_ref.into(), 822 + }]; 823 + }; 824 + 825 + // Push, validate, pop 826 + ctx.ref_stack.push(full_ref); 827 + let old_nsid = std::mem::replace(&mut ctx.current_nsid, ref_nsid); 828 + let old_def = std::mem::replace(&mut ctx.current_def, ref_def); 829 + 830 + let errors = validate_def(path, data, &ref_def_type, registry, ctx); 831 + 832 + ctx.current_nsid = old_nsid; 833 + ctx.current_def = old_def; 834 + ctx.ref_stack.pop(); 835 + 836 + errors 837 + } 838 + 839 + LexObjectProperty::Bytes(_) => { 840 + if !matches!(data.data_type(), DataModelType::Bytes) { 841 + vec![StructuralError::TypeMismatch { 842 + path: path.clone(), 843 + expected: DataModelType::Bytes, 844 + actual: data.data_type(), 845 + }] 846 + } else { 847 + Vec::new() 848 + } 849 + } 850 + 851 + LexObjectProperty::CidLink(_) => { 852 + if !matches!(data.data_type(), DataModelType::CidLink) { 853 + vec![StructuralError::TypeMismatch { 854 + path: path.clone(), 855 + expected: DataModelType::CidLink, 856 + actual: data.data_type(), 857 + }] 858 + } else { 859 + Vec::new() 860 + } 861 + } 862 + 863 + LexObjectProperty::Blob(_) => { 864 + if !matches!(data.data_type(), DataModelType::Blob) { 865 + vec![StructuralError::TypeMismatch { 866 + path: path.clone(), 867 + expected: DataModelType::Blob, 868 + actual: data.data_type(), 869 + }] 870 + } else { 871 + Vec::new() 872 + } 873 + } 874 + 875 + LexObjectProperty::Unknown(_) => { 876 + // Any type allowed 877 + Vec::new() 878 + } 879 + } 880 + } 881 + 882 + /// Validate array item against array item schema 883 + fn validate_array_item( 884 + path: &mut ValidationPath, 885 + data: &Data, 886 + item_schema: &crate::lexicon::LexArrayItem, 887 + registry: &SchemaRegistry, 888 + ctx: &mut ValidationContext, 889 + ) -> Vec<StructuralError> { 890 + use crate::lexicon::LexArrayItem; 891 + 892 + match item_schema { 893 + LexArrayItem::String(s) => validate_property( 894 + path, 895 + data, 896 + &crate::lexicon::LexObjectProperty::String(s.clone()), 897 + registry, 898 + ctx, 899 + ), 900 + LexArrayItem::Integer(i) => validate_property( 901 + path, 902 + data, 903 + &crate::lexicon::LexObjectProperty::Integer(i.clone()), 904 + registry, 905 + ctx, 906 + ), 907 + LexArrayItem::Boolean(b) => validate_property( 908 + path, 909 + data, 910 + &crate::lexicon::LexObjectProperty::Boolean(b.clone()), 911 + registry, 912 + ctx, 913 + ), 914 + LexArrayItem::Object(o) => validate_property( 915 + path, 916 + data, 917 + &crate::lexicon::LexObjectProperty::Object(o.clone()), 918 + registry, 919 + ctx, 920 + ), 921 + LexArrayItem::Unknown(u) => validate_property( 922 + path, 923 + data, 924 + &crate::lexicon::LexObjectProperty::Unknown(u.clone()), 925 + registry, 926 + ctx, 927 + ), 928 + LexArrayItem::Bytes(b) => validate_property( 929 + path, 930 + data, 931 + &crate::lexicon::LexObjectProperty::Bytes(b.clone()), 932 + registry, 933 + ctx, 934 + ), 935 + LexArrayItem::CidLink(c) => validate_property( 936 + path, 937 + data, 938 + &crate::lexicon::LexObjectProperty::CidLink(c.clone()), 939 + registry, 940 + ctx, 941 + ), 942 + LexArrayItem::Blob(b) => validate_property( 943 + path, 944 + data, 945 + &crate::lexicon::LexObjectProperty::Blob(b.clone()), 946 + registry, 947 + ctx, 948 + ), 949 + LexArrayItem::Ref(r) => validate_property( 950 + path, 951 + data, 952 + &crate::lexicon::LexObjectProperty::Ref(r.clone()), 953 + registry, 954 + ctx, 955 + ), 956 + LexArrayItem::Union(u) => validate_property( 957 + path, 958 + data, 959 + &crate::lexicon::LexObjectProperty::Union(u.clone()), 960 + registry, 961 + ctx, 962 + ), 963 + } 964 + } 965 + 966 + // ============================================================================ 967 + // CONSTRAINT VALIDATION 968 + // ============================================================================ 969 + 970 + /// Validate constraints on data against schema (entry point with optional registry) 971 + fn validate_constraints( 972 + path: &mut ValidationPath, 973 + data: &Data, 974 + nsid: &str, 975 + def_name: &str, 976 + registry: Option<&Arc<SchemaRegistry>>, 977 + ) -> Vec<ConstraintError> { 978 + // Use provided registry or fall back to global inventory 979 + let fallback_registry; 980 + let registry_ref = match registry { 981 + Some(r) => r.as_ref(), 982 + None => { 983 + fallback_registry = SchemaRegistry::from_inventory(); 984 + &fallback_registry 985 + } 986 + }; 987 + 988 + validate_constraints_impl(path, data, nsid, def_name, registry_ref) 989 + } 990 + 991 + /// Internal implementation that takes materialized registry 992 + fn validate_constraints_impl( 993 + path: &mut ValidationPath, 994 + data: &Data, 995 + nsid: &str, 996 + def_name: &str, 997 + registry: &SchemaRegistry, 998 + ) -> Vec<ConstraintError> { 999 + use crate::lexicon::LexUserType; 1000 + 1001 + // Get schema def 1002 + let Some(def) = registry.get_def(nsid, def_name) else { 1003 + return Vec::new(); 1004 + }; 1005 + 1006 + match def { 1007 + LexUserType::Object(obj) => { 1008 + let Data::Object(obj_data) = data else { 1009 + return Vec::new(); 1010 + }; 1011 + 1012 + let mut errors = Vec::new(); 1013 + 1014 + // Check constraints on each property 1015 + for (name, prop) in &obj.properties { 1016 + if let Some(field_data) = obj_data.get(name.as_ref()) { 1017 + path.push_field(name.as_ref()); 1018 + errors.extend(check_property_constraints(path, field_data, prop, registry)); 1019 + path.pop(); 1020 + } 1021 + } 1022 + 1023 + errors 1024 + } 1025 + // Other def types would go here 1026 + _ => Vec::new(), 1027 + } 1028 + } 1029 + 1030 + /// Check constraints on a property 1031 + fn check_property_constraints( 1032 + path: &mut ValidationPath, 1033 + data: &Data, 1034 + prop: &crate::lexicon::LexObjectProperty, 1035 + registry: &SchemaRegistry, 1036 + ) -> Vec<ConstraintError> { 1037 + use crate::lexicon::LexObjectProperty; 1038 + 1039 + match prop { 1040 + LexObjectProperty::String(s) => { 1041 + if let Data::String(str_val) = data { 1042 + check_string_constraints(path, str_val.as_str(), s) 1043 + } else { 1044 + Vec::new() 1045 + } 1046 + } 1047 + 1048 + LexObjectProperty::Integer(i) => { 1049 + if let Data::Integer(int_val) = data { 1050 + check_integer_constraints(path, *int_val, i) 1051 + } else { 1052 + Vec::new() 1053 + } 1054 + } 1055 + 1056 + LexObjectProperty::Array(arr) => { 1057 + if let Data::Array(array) = data { 1058 + let mut errors = check_array_constraints(path, array, arr); 1059 + 1060 + // Also check constraints on array items 1061 + for (idx, item) in array.iter().enumerate() { 1062 + path.push_index(idx); 1063 + errors.extend(check_array_item_constraints( 1064 + path, item, &arr.items, registry, 1065 + )); 1066 + path.pop(); 1067 + } 1068 + 1069 + errors 1070 + } else { 1071 + Vec::new() 1072 + } 1073 + } 1074 + 1075 + LexObjectProperty::Object(obj) => { 1076 + if let Data::Object(obj_data) = data { 1077 + let mut errors = Vec::new(); 1078 + 1079 + // Recursively check nested object properties 1080 + for (name, schema_prop) in &obj.properties { 1081 + if let Some(field_data) = obj_data.get(name.as_ref()) { 1082 + path.push_field(name.as_ref()); 1083 + errors.extend(check_property_constraints( 1084 + path, 1085 + field_data, 1086 + schema_prop, 1087 + registry, 1088 + )); 1089 + path.pop(); 1090 + } 1091 + } 1092 + 1093 + errors 1094 + } else { 1095 + Vec::new() 1096 + } 1097 + } 1098 + 1099 + LexObjectProperty::Ref(r) => { 1100 + // Follow ref and check constraints 1101 + let (ref_nsid, ref_def) = normalize_ref(r.r#ref.as_ref(), ""); // FIXME: need current nsid 1102 + 1103 + if registry.get_def(&ref_nsid, &ref_def).is_some() { 1104 + validate_constraints_impl(path, data, &ref_nsid, &ref_def, registry) 1105 + } else { 1106 + Vec::new() 1107 + } 1108 + } 1109 + 1110 + // Other property types don't have constraints 1111 + _ => Vec::new(), 1112 + } 1113 + } 1114 + 1115 + /// Check string constraints 1116 + fn check_string_constraints( 1117 + path: &ValidationPath, 1118 + value: &str, 1119 + schema: &crate::lexicon::LexString, 1120 + ) -> Vec<ConstraintError> { 1121 + let mut errors = Vec::new(); 1122 + 1123 + // Check byte length constraints 1124 + let byte_len = value.len(); 1125 + 1126 + if let Some(min) = schema.min_length { 1127 + if byte_len < min as usize { 1128 + errors.push(ConstraintError::MinLength { 1129 + path: path.clone(), 1130 + min: min as usize, 1131 + actual: byte_len, 1132 + }); 1133 + } 1134 + } 1135 + 1136 + if let Some(max) = schema.max_length { 1137 + if byte_len > max as usize { 1138 + errors.push(ConstraintError::MaxLength { 1139 + path: path.clone(), 1140 + max: max as usize, 1141 + actual: byte_len, 1142 + }); 1143 + } 1144 + } 1145 + 1146 + // Check grapheme count constraints 1147 + if schema.min_graphemes.is_some() || schema.max_graphemes.is_some() { 1148 + use unicode_segmentation::UnicodeSegmentation; 1149 + let grapheme_count = value.graphemes(true).count(); 1150 + 1151 + if let Some(min) = schema.min_graphemes { 1152 + if grapheme_count < min as usize { 1153 + errors.push(ConstraintError::MinGraphemes { 1154 + path: path.clone(), 1155 + min: min as usize, 1156 + actual: grapheme_count, 1157 + }); 1158 + } 1159 + } 1160 + 1161 + if let Some(max) = schema.max_graphemes { 1162 + if grapheme_count > max as usize { 1163 + errors.push(ConstraintError::MaxGraphemes { 1164 + path: path.clone(), 1165 + max: max as usize, 1166 + actual: grapheme_count, 1167 + }); 1168 + } 1169 + } 1170 + } 1171 + 1172 + errors 1173 + } 1174 + 1175 + /// Check integer constraints 1176 + fn check_integer_constraints( 1177 + path: &ValidationPath, 1178 + value: i64, 1179 + schema: &crate::lexicon::LexInteger, 1180 + ) -> Vec<ConstraintError> { 1181 + let mut errors = Vec::new(); 1182 + 1183 + if let Some(min) = schema.minimum { 1184 + if value < min { 1185 + errors.push(ConstraintError::Minimum { 1186 + path: path.clone(), 1187 + min, 1188 + actual: value, 1189 + }); 1190 + } 1191 + } 1192 + 1193 + if let Some(max) = schema.maximum { 1194 + if value > max { 1195 + errors.push(ConstraintError::Maximum { 1196 + path: path.clone(), 1197 + max, 1198 + actual: value, 1199 + }); 1200 + } 1201 + } 1202 + 1203 + errors 1204 + } 1205 + 1206 + /// Check array length constraints 1207 + fn check_array_constraints( 1208 + path: &ValidationPath, 1209 + array: &jacquard_common::types::value::Array, 1210 + schema: &crate::lexicon::LexArray, 1211 + ) -> Vec<ConstraintError> { 1212 + let mut errors = Vec::new(); 1213 + let len = array.len(); 1214 + 1215 + if let Some(min) = schema.min_length { 1216 + if len < min as usize { 1217 + errors.push(ConstraintError::MinLength { 1218 + path: path.clone(), 1219 + min: min as usize, 1220 + actual: len, 1221 + }); 1222 + } 1223 + } 1224 + 1225 + if let Some(max) = schema.max_length { 1226 + if len > max as usize { 1227 + errors.push(ConstraintError::MaxLength { 1228 + path: path.clone(), 1229 + max: max as usize, 1230 + actual: len, 1231 + }); 1232 + } 1233 + } 1234 + 1235 + errors 1236 + } 1237 + 1238 + /// Check constraints on array items 1239 + fn check_array_item_constraints( 1240 + path: &mut ValidationPath, 1241 + data: &Data, 1242 + item_schema: &crate::lexicon::LexArrayItem, 1243 + registry: &SchemaRegistry, 1244 + ) -> Vec<ConstraintError> { 1245 + use crate::lexicon::LexArrayItem; 1246 + 1247 + match item_schema { 1248 + LexArrayItem::String(s) => check_property_constraints( 1249 + path, 1250 + data, 1251 + &crate::lexicon::LexObjectProperty::String(s.clone()), 1252 + registry, 1253 + ), 1254 + LexArrayItem::Integer(i) => check_property_constraints( 1255 + path, 1256 + data, 1257 + &crate::lexicon::LexObjectProperty::Integer(i.clone()), 1258 + registry, 1259 + ), 1260 + LexArrayItem::Object(o) => check_property_constraints( 1261 + path, 1262 + data, 1263 + &crate::lexicon::LexObjectProperty::Object(o.clone()), 1264 + registry, 1265 + ), 1266 + LexArrayItem::Ref(r) => check_property_constraints( 1267 + path, 1268 + data, 1269 + &crate::lexicon::LexObjectProperty::Ref(r.clone()), 1270 + registry, 1271 + ), 1272 + // Other array item types don't have constraints 1273 + _ => Vec::new(), 1274 + } 1275 + } 1276 + 1277 + #[cfg(test)] 1278 + mod tests;
+1110
crates/jacquard-lexicon/src/validation/tests.rs
··· 1 + //! Tests for Data validation against lexicon schemas 2 + 3 + use super::*; 4 + use crate::{lexicon::*, schema::LexiconSchema}; 5 + use jacquard_common::{ 6 + CowStr, 7 + types::{string::AtprotoStr, value::Data}, 8 + }; 9 + use std::collections::BTreeMap; 10 + 11 + // Helper to create plain string Data 12 + fn data_string(s: &str) -> Data<'static> { 13 + use smol_str::ToSmolStr; 14 + Data::String(AtprotoStr::String(CowStr::Owned(s.to_smolstr()))) 15 + } 16 + 17 + // Test schema: Simple object with required string field 18 + struct SimpleSchema; 19 + 20 + impl LexiconSchema for SimpleSchema { 21 + fn nsid() -> &'static str { 22 + "test.simple" 23 + } 24 + 25 + fn def_name() -> &'static str { 26 + "main" 27 + } 28 + 29 + fn lexicon_doc() -> LexiconDoc<'static> { 30 + LexiconDoc { 31 + lexicon: Lexicon::Lexicon1, 32 + id: CowStr::new_static("test.simple"), 33 + revision: None, 34 + description: None, 35 + defs: { 36 + let mut defs = BTreeMap::new(); 37 + defs.insert( 38 + "main".into(), 39 + LexUserType::Object(LexObject { 40 + description: None, 41 + required: Some(vec!["text".into()]), 42 + nullable: None, 43 + properties: { 44 + let mut props = BTreeMap::new(); 45 + props.insert( 46 + "text".into(), 47 + LexObjectProperty::String(LexString { 48 + description: None, 49 + format: None, 50 + default: None, 51 + min_length: None, 52 + max_length: None, 53 + min_graphemes: None, 54 + max_graphemes: None, 55 + r#enum: None, 56 + r#const: None, 57 + known_values: None, 58 + }), 59 + ); 60 + props 61 + }, 62 + }), 63 + ); 64 + defs 65 + }, 66 + } 67 + } 68 + } 69 + 70 + #[test] 71 + fn test_valid_simple_object() { 72 + let validator = SchemaValidator::new(); 73 + validator 74 + .registry() 75 + .insert("test.simple".to_smolstr(), SimpleSchema::lexicon_doc()); 76 + 77 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 78 + "text".into(), 79 + data_string("hello"), 80 + )]))); 81 + 82 + let result = validator.validate::<SimpleSchema>(&data).unwrap(); 83 + assert!( 84 + result.is_valid(), 85 + "Expected valid, got: {:?}", 86 + result.structural_errors() 87 + ); 88 + } 89 + 90 + #[test] 91 + fn test_missing_required_field() { 92 + let validator = SchemaValidator::new(); 93 + validator 94 + .registry() 95 + .insert("test.simple".to_smolstr(), SimpleSchema::lexicon_doc()); 96 + 97 + // Empty object - missing required 'text' field 98 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::new())); 99 + 100 + let result = validator.validate::<SimpleSchema>(&data).unwrap(); 101 + assert!(!result.is_valid()); 102 + 103 + let errors = result.structural_errors(); 104 + assert_eq!(errors.len(), 1); 105 + assert!(matches!( 106 + &errors[0], 107 + StructuralError::MissingRequiredField { field, .. } if field.as_str() == "text" 108 + )); 109 + } 110 + 111 + #[test] 112 + fn test_type_mismatch() { 113 + let validator = SchemaValidator::new(); 114 + validator 115 + .registry() 116 + .insert("test.simple".to_smolstr(), SimpleSchema::lexicon_doc()); 117 + 118 + // 'text' field is integer instead of string 119 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 120 + "text".into(), 121 + Data::Integer(42), 122 + )]))); 123 + 124 + let result = validator.validate::<SimpleSchema>(&data).unwrap(); 125 + assert!(!result.is_valid()); 126 + 127 + let errors = result.structural_errors(); 128 + assert_eq!(errors.len(), 1); 129 + match &errors[0] { 130 + StructuralError::TypeMismatch { 131 + expected, actual, .. 132 + } => { 133 + assert!(matches!( 134 + expected, 135 + jacquard_common::types::DataModelType::String(_) 136 + )); 137 + assert!(matches!( 138 + actual, 139 + jacquard_common::types::DataModelType::Integer 140 + )); 141 + } 142 + _ => panic!("Expected TypeMismatch error"), 143 + } 144 + } 145 + 146 + // Test schema: Union with $type discriminator 147 + struct UnionSchema; 148 + 149 + impl LexiconSchema for UnionSchema { 150 + fn nsid() -> &'static str { 151 + "test.union" 152 + } 153 + 154 + fn lexicon_doc() -> LexiconDoc<'static> { 155 + LexiconDoc { 156 + lexicon: Lexicon::Lexicon1, 157 + id: CowStr::new_static("test.union"), 158 + revision: None, 159 + description: None, 160 + defs: { 161 + let mut defs = BTreeMap::new(); 162 + defs.insert( 163 + "main".into(), 164 + LexUserType::Object(LexObject { 165 + description: None, 166 + required: Some(vec!["content".into()]), 167 + nullable: None, 168 + properties: { 169 + let mut props = BTreeMap::new(); 170 + props.insert( 171 + "content".into(), 172 + LexObjectProperty::Union(LexRefUnion { 173 + description: None, 174 + refs: vec!["#text".into(), "#image".into()], 175 + closed: Some(true), 176 + }), 177 + ); 178 + props 179 + }, 180 + }), 181 + ); 182 + defs.insert( 183 + "text".into(), 184 + LexUserType::Object(LexObject { 185 + description: None, 186 + required: Some(vec!["value".into()]), 187 + nullable: None, 188 + properties: { 189 + let mut props = BTreeMap::new(); 190 + props.insert( 191 + "value".into(), 192 + LexObjectProperty::String(LexString { 193 + description: None, 194 + format: None, 195 + default: None, 196 + min_length: None, 197 + max_length: None, 198 + min_graphemes: None, 199 + max_graphemes: None, 200 + r#enum: None, 201 + r#const: None, 202 + known_values: None, 203 + }), 204 + ); 205 + props 206 + }, 207 + }), 208 + ); 209 + defs.insert( 210 + "image".into(), 211 + LexUserType::Object(LexObject { 212 + description: None, 213 + required: Some(vec!["url".into()]), 214 + nullable: None, 215 + properties: { 216 + let mut props = BTreeMap::new(); 217 + props.insert( 218 + "url".into(), 219 + LexObjectProperty::String(LexString { 220 + description: None, 221 + format: None, 222 + default: None, 223 + min_length: None, 224 + max_length: None, 225 + min_graphemes: None, 226 + max_graphemes: None, 227 + r#enum: None, 228 + r#const: None, 229 + known_values: None, 230 + }), 231 + ); 232 + props 233 + }, 234 + }), 235 + ); 236 + defs 237 + }, 238 + } 239 + } 240 + } 241 + 242 + #[test] 243 + fn test_union_missing_discriminator() { 244 + let validator = SchemaValidator::new(); 245 + validator 246 + .registry() 247 + .insert("test.union".to_smolstr(), UnionSchema::lexicon_doc()); 248 + 249 + // Union object without $type field 250 + let content = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 251 + "value".into(), 252 + data_string("hello"), 253 + )]))); 254 + 255 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 256 + "content".into(), 257 + content, 258 + )]))); 259 + 260 + let result = validator.validate::<UnionSchema>(&data).unwrap(); 261 + assert!(!result.is_valid()); 262 + 263 + let errors = result.structural_errors(); 264 + assert!( 265 + errors 266 + .iter() 267 + .any(|e| matches!(e, StructuralError::MissingUnionDiscriminator { .. })) 268 + ); 269 + } 270 + 271 + #[test] 272 + fn test_union_invalid_type() { 273 + let validator = SchemaValidator::new(); 274 + validator 275 + .registry() 276 + .insert("test.union".to_smolstr(), UnionSchema::lexicon_doc()); 277 + 278 + // Union with $type that doesn't match any variant 279 + let content = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([ 280 + ("$type".into(), data_string("test.union#unknown")), 281 + ("value".into(), data_string("hello")), 282 + ]))); 283 + 284 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 285 + "content".into(), 286 + content, 287 + )]))); 288 + 289 + let result = validator.validate::<UnionSchema>(&data).unwrap(); 290 + assert!(!result.is_valid()); 291 + 292 + let errors = result.structural_errors(); 293 + assert!( 294 + errors 295 + .iter() 296 + .any(|e| matches!(e, StructuralError::UnionNoMatch { .. })) 297 + ); 298 + } 299 + 300 + #[test] 301 + fn test_union_valid_variant() { 302 + let validator = SchemaValidator::new(); 303 + validator 304 + .registry() 305 + .insert("test.union".to_smolstr(), UnionSchema::lexicon_doc()); 306 + 307 + // Valid text variant 308 + let content = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([ 309 + ("$type".into(), data_string("test.union#text")), 310 + ("value".into(), data_string("hello")), 311 + ]))); 312 + 313 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 314 + "content".into(), 315 + content, 316 + )]))); 317 + 318 + let result = validator.validate::<UnionSchema>(&data).unwrap(); 319 + assert!( 320 + result.is_valid(), 321 + "Expected valid, got: {:?}", 322 + result.structural_errors() 323 + ); 324 + } 325 + 326 + // Test schema: Array validation 327 + struct ArraySchema; 328 + 329 + impl LexiconSchema for ArraySchema { 330 + fn nsid() -> &'static str { 331 + "test.array" 332 + } 333 + 334 + fn lexicon_doc() -> LexiconDoc<'static> { 335 + LexiconDoc { 336 + lexicon: Lexicon::Lexicon1, 337 + id: CowStr::new_static("test.array"), 338 + revision: None, 339 + description: None, 340 + defs: { 341 + let mut defs = BTreeMap::new(); 342 + defs.insert( 343 + "main".into(), 344 + LexUserType::Object(LexObject { 345 + description: None, 346 + required: Some(vec!["items".into()]), 347 + nullable: None, 348 + properties: { 349 + let mut props = BTreeMap::new(); 350 + props.insert( 351 + "items".into(), 352 + LexObjectProperty::Array(LexArray { 353 + description: None, 354 + items: LexArrayItem::String(LexString { 355 + description: None, 356 + format: None, 357 + default: None, 358 + min_length: None, 359 + max_length: None, 360 + min_graphemes: None, 361 + max_graphemes: None, 362 + r#enum: None, 363 + r#const: None, 364 + known_values: None, 365 + }), 366 + min_length: None, 367 + max_length: None, 368 + }), 369 + ); 370 + props 371 + }, 372 + }), 373 + ); 374 + defs 375 + }, 376 + } 377 + } 378 + } 379 + 380 + #[test] 381 + fn test_array_valid_items() { 382 + let validator = SchemaValidator::new(); 383 + validator 384 + .registry() 385 + .insert("test.array".to_smolstr(), ArraySchema::lexicon_doc()); 386 + 387 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 388 + "items".into(), 389 + Data::Array(jacquard_common::types::value::Array(vec![ 390 + data_string("one"), 391 + data_string("two"), 392 + data_string("three"), 393 + ])), 394 + )]))); 395 + 396 + let result = validator.validate::<ArraySchema>(&data).unwrap(); 397 + assert!( 398 + result.is_valid(), 399 + "Expected valid, got: {:?}", 400 + result.structural_errors() 401 + ); 402 + } 403 + 404 + #[test] 405 + fn test_array_invalid_item_type() { 406 + let validator = SchemaValidator::new(); 407 + validator 408 + .registry() 409 + .insert("test.array".to_smolstr(), ArraySchema::lexicon_doc()); 410 + 411 + // Second item is integer instead of string 412 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 413 + "items".into(), 414 + Data::Array(jacquard_common::types::value::Array(vec![ 415 + data_string("one"), 416 + Data::Integer(42), 417 + data_string("three"), 418 + ])), 419 + )]))); 420 + 421 + let result = validator.validate::<ArraySchema>(&data).unwrap(); 422 + assert!(!result.is_valid()); 423 + 424 + let errors = result.structural_errors(); 425 + assert!(errors.iter().any(|e| { 426 + matches!(e, StructuralError::TypeMismatch { expected, actual, .. } 427 + if matches!(expected, jacquard_common::types::DataModelType::String(_)) 428 + && matches!(actual, jacquard_common::types::DataModelType::Integer)) 429 + })); 430 + } 431 + 432 + #[test] 433 + fn test_nested_objects() { 434 + // Test schema with nested object 435 + struct NestedSchema; 436 + impl LexiconSchema for NestedSchema { 437 + fn nsid() -> &'static str { 438 + "test.nested" 439 + } 440 + 441 + fn lexicon_doc() -> LexiconDoc<'static> { 442 + LexiconDoc { 443 + lexicon: Lexicon::Lexicon1, 444 + id: CowStr::new_static("test.nested"), 445 + revision: None, 446 + description: None, 447 + defs: { 448 + let mut defs = BTreeMap::new(); 449 + defs.insert( 450 + "main".into(), 451 + LexUserType::Object(LexObject { 452 + description: None, 453 + required: Some(vec!["meta".into()]), 454 + nullable: None, 455 + properties: { 456 + let mut props = BTreeMap::new(); 457 + props.insert( 458 + "meta".into(), 459 + LexObjectProperty::Object(LexObject { 460 + description: None, 461 + required: Some(vec!["title".into()]), 462 + nullable: None, 463 + properties: { 464 + let mut meta_props = BTreeMap::new(); 465 + meta_props.insert( 466 + "title".into(), 467 + LexObjectProperty::String(LexString { 468 + description: None, 469 + format: None, 470 + default: None, 471 + min_length: None, 472 + max_length: None, 473 + min_graphemes: None, 474 + max_graphemes: None, 475 + r#enum: None, 476 + r#const: None, 477 + known_values: None, 478 + }), 479 + ); 480 + meta_props 481 + }, 482 + }), 483 + ); 484 + props 485 + }, 486 + }), 487 + ); 488 + defs 489 + }, 490 + } 491 + } 492 + } 493 + 494 + let validator = SchemaValidator::new(); 495 + validator 496 + .registry() 497 + .insert("test.nested".to_smolstr(), NestedSchema::lexicon_doc()); 498 + 499 + // Nested object missing required field 500 + let meta = Data::Object(jacquard_common::types::value::Object(BTreeMap::new())); 501 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 502 + "meta".into(), 503 + meta, 504 + )]))); 505 + 506 + let result = validator.validate::<NestedSchema>(&data).unwrap(); 507 + assert!(!result.is_valid()); 508 + 509 + let errors = result.structural_errors(); 510 + assert!(errors.iter().any(|e| matches!( 511 + e, 512 + StructuralError::MissingRequiredField { field, .. } if field.as_str() == "title" 513 + ))); 514 + } 515 + 516 + // ============================================================================ 517 + // CONSTRAINT VALIDATION TESTS (Phase 4) 518 + // ============================================================================ 519 + 520 + // Schema with string constraints 521 + struct StringConstraintSchema; 522 + 523 + impl LexiconSchema for StringConstraintSchema { 524 + fn nsid() -> &'static str { 525 + "test.string.constraints" 526 + } 527 + 528 + fn lexicon_doc() -> LexiconDoc<'static> { 529 + LexiconDoc { 530 + lexicon: Lexicon::Lexicon1, 531 + id: CowStr::new_static("test.string.constraints"), 532 + revision: None, 533 + description: None, 534 + defs: { 535 + let mut defs = BTreeMap::new(); 536 + defs.insert( 537 + "main".into(), 538 + LexUserType::Object(LexObject { 539 + description: None, 540 + required: Some(vec!["text".into()]), 541 + nullable: None, 542 + properties: { 543 + let mut props = BTreeMap::new(); 544 + props.insert( 545 + "text".into(), 546 + LexObjectProperty::String(LexString { 547 + description: None, 548 + format: None, 549 + default: None, 550 + min_length: Some(5), 551 + max_length: Some(20), 552 + min_graphemes: None, 553 + max_graphemes: None, 554 + r#enum: None, 555 + r#const: None, 556 + known_values: None, 557 + }), 558 + ); 559 + props 560 + }, 561 + }), 562 + ); 563 + defs 564 + }, 565 + } 566 + } 567 + } 568 + 569 + // Schema with grapheme constraints 570 + struct GraphemeConstraintSchema; 571 + 572 + impl LexiconSchema for GraphemeConstraintSchema { 573 + fn nsid() -> &'static str { 574 + "test.grapheme.constraints" 575 + } 576 + 577 + fn lexicon_doc() -> LexiconDoc<'static> { 578 + LexiconDoc { 579 + lexicon: Lexicon::Lexicon1, 580 + id: CowStr::new_static("test.grapheme.constraints"), 581 + revision: None, 582 + description: None, 583 + defs: { 584 + let mut defs = BTreeMap::new(); 585 + defs.insert( 586 + "main".into(), 587 + LexUserType::Object(LexObject { 588 + description: None, 589 + required: Some(vec!["text".into()]), 590 + nullable: None, 591 + properties: { 592 + let mut props = BTreeMap::new(); 593 + props.insert( 594 + "text".into(), 595 + LexObjectProperty::String(LexString { 596 + description: None, 597 + format: None, 598 + default: None, 599 + min_length: None, 600 + max_length: None, 601 + min_graphemes: Some(2), 602 + max_graphemes: Some(5), 603 + r#enum: None, 604 + r#const: None, 605 + known_values: None, 606 + }), 607 + ); 608 + props 609 + }, 610 + }), 611 + ); 612 + defs 613 + }, 614 + } 615 + } 616 + } 617 + 618 + // Schema with integer constraints 619 + struct IntegerConstraintSchema; 620 + 621 + impl LexiconSchema for IntegerConstraintSchema { 622 + fn nsid() -> &'static str { 623 + "test.integer.constraints" 624 + } 625 + 626 + fn lexicon_doc() -> LexiconDoc<'static> { 627 + LexiconDoc { 628 + lexicon: Lexicon::Lexicon1, 629 + id: CowStr::new_static("test.integer.constraints"), 630 + revision: None, 631 + description: None, 632 + defs: { 633 + let mut defs = BTreeMap::new(); 634 + defs.insert( 635 + "main".into(), 636 + LexUserType::Object(LexObject { 637 + description: None, 638 + required: Some(vec!["value".into()]), 639 + nullable: None, 640 + properties: { 641 + let mut props = BTreeMap::new(); 642 + props.insert( 643 + "value".into(), 644 + LexObjectProperty::Integer(LexInteger { 645 + description: None, 646 + default: None, 647 + minimum: Some(0), 648 + maximum: Some(100), 649 + r#enum: None, 650 + r#const: None, 651 + }), 652 + ); 653 + props 654 + }, 655 + }), 656 + ); 657 + defs 658 + }, 659 + } 660 + } 661 + } 662 + 663 + // Schema with array length constraints 664 + struct ArrayConstraintSchema; 665 + 666 + impl LexiconSchema for ArrayConstraintSchema { 667 + fn nsid() -> &'static str { 668 + "test.array.constraints" 669 + } 670 + 671 + fn lexicon_doc() -> LexiconDoc<'static> { 672 + LexiconDoc { 673 + lexicon: Lexicon::Lexicon1, 674 + id: CowStr::new_static("test.array.constraints"), 675 + revision: None, 676 + description: None, 677 + defs: { 678 + let mut defs = BTreeMap::new(); 679 + defs.insert( 680 + "main".into(), 681 + LexUserType::Object(LexObject { 682 + description: None, 683 + required: Some(vec!["items".into()]), 684 + nullable: None, 685 + properties: { 686 + let mut props = BTreeMap::new(); 687 + props.insert( 688 + "items".into(), 689 + LexObjectProperty::Array(LexArray { 690 + description: None, 691 + items: LexArrayItem::String(LexString { 692 + description: None, 693 + format: None, 694 + default: None, 695 + min_length: None, 696 + max_length: None, 697 + min_graphemes: None, 698 + max_graphemes: None, 699 + r#enum: None, 700 + r#const: None, 701 + known_values: None, 702 + }), 703 + min_length: Some(2), 704 + max_length: Some(5), 705 + }), 706 + ); 707 + props 708 + }, 709 + }), 710 + ); 711 + defs 712 + }, 713 + } 714 + } 715 + } 716 + 717 + #[test] 718 + fn test_constraint_validation_is_lazy() { 719 + let validator = SchemaValidator::new(); 720 + validator.registry().insert( 721 + "test.string.constraints".to_smolstr(), 722 + StringConstraintSchema::lexicon_doc(), 723 + ); 724 + 725 + // String too long (21 chars, max is 20) 726 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 727 + "text".into(), 728 + data_string("this string is too long!"), 729 + )]))); 730 + 731 + let result = validator 732 + .validate::<StringConstraintSchema>(&data) 733 + .unwrap(); 734 + 735 + // Structurally valid - type is correct, required field present 736 + assert!(result.is_structurally_valid()); 737 + 738 + // But overall invalid due to constraint violation 739 + assert!(!result.is_valid()); 740 + } 741 + 742 + #[test] 743 + fn test_string_max_length() { 744 + let validator = SchemaValidator::new(); 745 + validator.registry().insert( 746 + "test.string.constraints".to_smolstr(), 747 + StringConstraintSchema::lexicon_doc(), 748 + ); 749 + 750 + // String exceeding max_length (25 chars, max is 20) 751 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 752 + "text".into(), 753 + data_string("this string is way too long"), 754 + )]))); 755 + 756 + let result = validator 757 + .validate::<StringConstraintSchema>(&data) 758 + .unwrap(); 759 + 760 + assert!(!result.is_valid()); 761 + assert!(result.is_structurally_valid()); 762 + assert!(result.has_constraint_violations()); 763 + 764 + let constraint_errors = result.constraint_errors(); 765 + assert_eq!(constraint_errors.len(), 1); 766 + assert!(matches!( 767 + &constraint_errors[0], 768 + ConstraintError::MaxLength { max: 20, actual: 27, .. } 769 + )); 770 + } 771 + 772 + #[test] 773 + fn test_string_min_length() { 774 + let validator = SchemaValidator::new(); 775 + validator.registry().insert( 776 + "test.string.constraints".to_smolstr(), 777 + StringConstraintSchema::lexicon_doc(), 778 + ); 779 + 780 + // String below min_length (3 chars, min is 5) 781 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 782 + "text".into(), 783 + data_string("hi"), 784 + )]))); 785 + 786 + let result = validator 787 + .validate::<StringConstraintSchema>(&data) 788 + .unwrap(); 789 + 790 + assert!(!result.is_valid()); 791 + assert!(result.is_structurally_valid()); 792 + 793 + let constraint_errors = result.constraint_errors(); 794 + assert_eq!(constraint_errors.len(), 1); 795 + assert!(matches!( 796 + &constraint_errors[0], 797 + ConstraintError::MinLength { min: 5, actual: 2, .. } 798 + )); 799 + } 800 + 801 + #[test] 802 + fn test_string_max_graphemes() { 803 + let validator = SchemaValidator::new(); 804 + validator.registry().insert( 805 + "test.grapheme.constraints".to_smolstr(), 806 + GraphemeConstraintSchema::lexicon_doc(), 807 + ); 808 + 809 + // 6 emoji graphemes (max is 5) 810 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 811 + "text".into(), 812 + data_string("👍👍👍👍👍👍"), 813 + )]))); 814 + 815 + let result = validator 816 + .validate::<GraphemeConstraintSchema>(&data) 817 + .unwrap(); 818 + 819 + assert!(!result.is_valid()); 820 + assert!(result.is_structurally_valid()); 821 + 822 + let constraint_errors = result.constraint_errors(); 823 + assert_eq!(constraint_errors.len(), 1); 824 + assert!(matches!( 825 + &constraint_errors[0], 826 + ConstraintError::MaxGraphemes { max: 5, actual: 6, .. } 827 + )); 828 + } 829 + 830 + #[test] 831 + fn test_string_min_graphemes() { 832 + let validator = SchemaValidator::new(); 833 + validator.registry().insert( 834 + "test.grapheme.constraints".to_smolstr(), 835 + GraphemeConstraintSchema::lexicon_doc(), 836 + ); 837 + 838 + // 1 emoji grapheme (min is 2) 839 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 840 + "text".into(), 841 + data_string("👍"), 842 + )]))); 843 + 844 + let result = validator 845 + .validate::<GraphemeConstraintSchema>(&data) 846 + .unwrap(); 847 + 848 + assert!(!result.is_valid()); 849 + assert!(result.is_structurally_valid()); 850 + 851 + let constraint_errors = result.constraint_errors(); 852 + assert_eq!(constraint_errors.len(), 1); 853 + assert!(matches!( 854 + &constraint_errors[0], 855 + ConstraintError::MinGraphemes { min: 2, actual: 1, .. } 856 + )); 857 + } 858 + 859 + #[test] 860 + fn test_string_within_constraints() { 861 + let validator = SchemaValidator::new(); 862 + validator.registry().insert( 863 + "test.string.constraints".to_smolstr(), 864 + StringConstraintSchema::lexicon_doc(), 865 + ); 866 + 867 + // Valid string (10 chars, within 5-20 range) 868 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 869 + "text".into(), 870 + data_string("valid text"), 871 + )]))); 872 + 873 + let result = validator 874 + .validate::<StringConstraintSchema>(&data) 875 + .unwrap(); 876 + 877 + assert!(result.is_valid()); 878 + assert!(result.is_structurally_valid()); 879 + assert!(!result.has_constraint_violations()); 880 + } 881 + 882 + #[test] 883 + fn test_integer_maximum() { 884 + let validator = SchemaValidator::new(); 885 + validator.registry().insert( 886 + "test.integer.constraints".to_smolstr(), 887 + IntegerConstraintSchema::lexicon_doc(), 888 + ); 889 + 890 + // Integer exceeding maximum (150 > 100) 891 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 892 + "value".into(), 893 + Data::Integer(150), 894 + )]))); 895 + 896 + let result = validator 897 + .validate::<IntegerConstraintSchema>(&data) 898 + .unwrap(); 899 + 900 + assert!(!result.is_valid()); 901 + assert!(result.is_structurally_valid()); 902 + 903 + let constraint_errors = result.constraint_errors(); 904 + assert_eq!(constraint_errors.len(), 1); 905 + assert!(matches!( 906 + &constraint_errors[0], 907 + ConstraintError::Maximum { max: 100, actual: 150, .. } 908 + )); 909 + } 910 + 911 + #[test] 912 + fn test_integer_minimum() { 913 + let validator = SchemaValidator::new(); 914 + validator.registry().insert( 915 + "test.integer.constraints".to_smolstr(), 916 + IntegerConstraintSchema::lexicon_doc(), 917 + ); 918 + 919 + // Integer below minimum (-5 < 0) 920 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 921 + "value".into(), 922 + Data::Integer(-5), 923 + )]))); 924 + 925 + let result = validator 926 + .validate::<IntegerConstraintSchema>(&data) 927 + .unwrap(); 928 + 929 + assert!(!result.is_valid()); 930 + assert!(result.is_structurally_valid()); 931 + 932 + let constraint_errors = result.constraint_errors(); 933 + assert_eq!(constraint_errors.len(), 1); 934 + assert!(matches!( 935 + &constraint_errors[0], 936 + ConstraintError::Minimum { min: 0, actual: -5, .. } 937 + )); 938 + } 939 + 940 + #[test] 941 + fn test_integer_within_constraints() { 942 + let validator = SchemaValidator::new(); 943 + validator.registry().insert( 944 + "test.integer.constraints".to_smolstr(), 945 + IntegerConstraintSchema::lexicon_doc(), 946 + ); 947 + 948 + // Valid integer (50 is within 0-100 range) 949 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 950 + "value".into(), 951 + Data::Integer(50), 952 + )]))); 953 + 954 + let result = validator 955 + .validate::<IntegerConstraintSchema>(&data) 956 + .unwrap(); 957 + 958 + assert!(result.is_valid()); 959 + assert!(result.is_structurally_valid()); 960 + assert!(!result.has_constraint_violations()); 961 + } 962 + 963 + #[test] 964 + fn test_array_max_length() { 965 + let validator = SchemaValidator::new(); 966 + validator.registry().insert( 967 + "test.array.constraints".to_smolstr(), 968 + ArrayConstraintSchema::lexicon_doc(), 969 + ); 970 + 971 + // Array with too many items (6 items, max is 5) 972 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 973 + "items".into(), 974 + Data::Array(jacquard_common::types::value::Array(vec![ 975 + data_string("one"), 976 + data_string("two"), 977 + data_string("three"), 978 + data_string("four"), 979 + data_string("five"), 980 + data_string("six"), 981 + ])), 982 + )]))); 983 + 984 + let result = validator 985 + .validate::<ArrayConstraintSchema>(&data) 986 + .unwrap(); 987 + 988 + assert!(!result.is_valid()); 989 + assert!(result.is_structurally_valid()); 990 + 991 + let constraint_errors = result.constraint_errors(); 992 + assert_eq!(constraint_errors.len(), 1); 993 + assert!(matches!( 994 + &constraint_errors[0], 995 + ConstraintError::MaxLength { max: 5, actual: 6, .. } 996 + )); 997 + } 998 + 999 + #[test] 1000 + fn test_array_min_length() { 1001 + let validator = SchemaValidator::new(); 1002 + validator.registry().insert( 1003 + "test.array.constraints".to_smolstr(), 1004 + ArrayConstraintSchema::lexicon_doc(), 1005 + ); 1006 + 1007 + // Array with too few items (1 item, min is 2) 1008 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 1009 + "items".into(), 1010 + Data::Array(jacquard_common::types::value::Array(vec![data_string( 1011 + "one", 1012 + )])), 1013 + )]))); 1014 + 1015 + let result = validator 1016 + .validate::<ArrayConstraintSchema>(&data) 1017 + .unwrap(); 1018 + 1019 + assert!(!result.is_valid()); 1020 + assert!(result.is_structurally_valid()); 1021 + 1022 + let constraint_errors = result.constraint_errors(); 1023 + assert_eq!(constraint_errors.len(), 1); 1024 + assert!(matches!( 1025 + &constraint_errors[0], 1026 + ConstraintError::MinLength { min: 2, actual: 1, .. } 1027 + )); 1028 + } 1029 + 1030 + #[test] 1031 + fn test_array_within_constraints() { 1032 + let validator = SchemaValidator::new(); 1033 + validator.registry().insert( 1034 + "test.array.constraints".to_smolstr(), 1035 + ArrayConstraintSchema::lexicon_doc(), 1036 + ); 1037 + 1038 + // Valid array (3 items, within 2-5 range) 1039 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 1040 + "items".into(), 1041 + Data::Array(jacquard_common::types::value::Array(vec![ 1042 + data_string("one"), 1043 + data_string("two"), 1044 + data_string("three"), 1045 + ])), 1046 + )]))); 1047 + 1048 + let result = validator 1049 + .validate::<ArrayConstraintSchema>(&data) 1050 + .unwrap(); 1051 + 1052 + assert!(result.is_valid()); 1053 + assert!(result.is_structurally_valid()); 1054 + assert!(!result.has_constraint_violations()); 1055 + } 1056 + 1057 + #[test] 1058 + fn test_structurally_invalid_skips_constraints() { 1059 + let validator = SchemaValidator::new(); 1060 + validator.registry().insert( 1061 + "test.string.constraints".to_smolstr(), 1062 + StringConstraintSchema::lexicon_doc(), 1063 + ); 1064 + 1065 + // Structurally invalid: integer instead of string 1066 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 1067 + "text".into(), 1068 + Data::Integer(42), 1069 + )]))); 1070 + 1071 + let result = validator 1072 + .validate::<StringConstraintSchema>(&data) 1073 + .unwrap(); 1074 + 1075 + assert!(!result.is_valid()); 1076 + assert!(!result.is_structurally_valid()); 1077 + 1078 + // Structural errors should be present 1079 + assert_eq!(result.structural_errors().len(), 1); 1080 + 1081 + // Constraint checking should be skipped or return empty 1082 + // (implementation detail: may or may not compute constraints for structurally invalid data) 1083 + } 1084 + 1085 + #[test] 1086 + fn test_structurally_valid_with_constraint_errors() { 1087 + let validator = SchemaValidator::new(); 1088 + validator.registry().insert( 1089 + "test.string.constraints".to_smolstr(), 1090 + StringConstraintSchema::lexicon_doc(), 1091 + ); 1092 + 1093 + // Structurally valid but violates constraints 1094 + let data = Data::Object(jacquard_common::types::value::Object(BTreeMap::from([( 1095 + "text".into(), 1096 + data_string("too long string here!!!"), 1097 + )]))); 1098 + 1099 + let result = validator 1100 + .validate::<StringConstraintSchema>(&data) 1101 + .unwrap(); 1102 + 1103 + assert!(!result.is_valid()); 1104 + assert!(result.is_structurally_valid()); 1105 + assert!(result.has_constraint_violations()); 1106 + 1107 + // Both structural and constraint errors should be separate 1108 + assert_eq!(result.structural_errors().len(), 0); 1109 + assert!(result.constraint_errors().len() > 0); 1110 + }