Our Personal Data Server from scratch! tranquil.farm
atproto pds rust postgresql fun oauth

feat(lexicon): record validation engine #47

merged opened by oyster.cafe targeting main from feat/real-lex-schema-validation
Labels

None yet.

assignee

None yet.

Participants 1
AT URI
at://did:plc:3fwecdnvtcscjnrx2p4n7alz/sh.tangled.repo.pull/3mgvbqsbigu22
+896
Diff #1
+896
crates/tranquil-lexicon/src/validate.rs
··· 1 + use crate::formats::validate_format; 2 + use crate::registry::LexiconRegistry; 3 + use crate::schema::{ 4 + LexArray, LexBlob, LexBytes, LexDef, LexObject, LexProperty, LexString, LexUnion, ParsedRef, 5 + parse_ref, 6 + }; 7 + use thiserror::Error; 8 + use unicode_segmentation::UnicodeSegmentation; 9 + 10 + const MAX_RECURSION_DEPTH: u32 = 32; 11 + 12 + #[derive(Debug, Error)] 13 + pub enum LexValidationError { 14 + #[error("Lexicon not found: {0}")] 15 + LexiconNotFound(String), 16 + #[error("Missing required field: {path}")] 17 + MissingRequired { path: String }, 18 + #[error("Invalid field at {path}: {message}")] 19 + InvalidField { path: String, message: String }, 20 + #[error("Recursion depth exceeded at {path}")] 21 + RecursionDepthExceeded { path: String }, 22 + } 23 + 24 + impl LexValidationError { 25 + fn field(path: &str, message: impl Into<String>) -> Self { 26 + Self::InvalidField { 27 + path: path.to_string(), 28 + message: message.into(), 29 + } 30 + } 31 + } 32 + 33 + fn resolve_union_ref(reference: &str, context_nsid: &str) -> String { 34 + match parse_ref(reference) { 35 + ParsedRef::Local(local) => format!("{}#{}", context_nsid, local), 36 + ParsedRef::Qualified { nsid, fragment } => format!("{}#{}", nsid, fragment), 37 + ParsedRef::Bare(nsid) => nsid.to_string(), 38 + } 39 + } 40 + 41 + fn ref_to_context_nsid<'a>(reference: &'a str, current_context: &'a str) -> &'a str { 42 + match parse_ref(reference) { 43 + ParsedRef::Local(_) => current_context, 44 + ParsedRef::Qualified { nsid, .. } | ParsedRef::Bare(nsid) => nsid, 45 + } 46 + } 47 + 48 + pub fn validate_record( 49 + registry: &LexiconRegistry, 50 + nsid: &str, 51 + value: &serde_json::Value, 52 + ) -> Result<(), LexValidationError> { 53 + let doc = registry 54 + .get_record_def(nsid) 55 + .ok_or_else(|| LexValidationError::LexiconNotFound(nsid.to_string()))?; 56 + 57 + let LexDef::Record(rec) = doc 58 + .defs 59 + .get("main") 60 + .expect("get_record_def guarantees main exists") 61 + else { 62 + unreachable!("get_record_def guarantees main is Record") 63 + }; 64 + 65 + validate_object(registry, nsid, &rec.record, value, "", 0) 66 + } 67 + 68 + fn validate_object( 69 + registry: &LexiconRegistry, 70 + context_nsid: &str, 71 + schema: &LexObject, 72 + value: &serde_json::Value, 73 + path: &str, 74 + depth: u32, 75 + ) -> Result<(), LexValidationError> { 76 + if depth > MAX_RECURSION_DEPTH { 77 + return Err(LexValidationError::RecursionDepthExceeded { 78 + path: path.to_string(), 79 + }); 80 + } 81 + 82 + let obj = value 83 + .as_object() 84 + .ok_or_else(|| LexValidationError::field(path, "expected an object"))?; 85 + 86 + schema.required.iter().try_for_each(|field| { 87 + let is_present = obj 88 + .get(field.as_str()) 89 + .is_some_and(|v| !v.is_null() || schema.nullable.contains(field)); 90 + if is_present { 91 + Ok(()) 92 + } else { 93 + Err(LexValidationError::MissingRequired { 94 + path: field_path(path, field), 95 + }) 96 + } 97 + })?; 98 + 99 + schema 100 + .properties 101 + .iter() 102 + .filter(|(key, _)| obj.contains_key(key.as_str())) 103 + .try_for_each(|(key, prop)| { 104 + let field_val = &obj[key.as_str()]; 105 + let fp = field_path(path, key); 106 + 107 + if schema.nullable.contains(key) && field_val.is_null() { 108 + return Ok(()); 109 + } 110 + 111 + validate_property(registry, context_nsid, prop, field_val, &fp, depth + 1) 112 + }) 113 + } 114 + 115 + fn validate_property( 116 + registry: &LexiconRegistry, 117 + context_nsid: &str, 118 + prop: &LexProperty, 119 + value: &serde_json::Value, 120 + path: &str, 121 + depth: u32, 122 + ) -> Result<(), LexValidationError> { 123 + if depth > MAX_RECURSION_DEPTH { 124 + return Err(LexValidationError::RecursionDepthExceeded { 125 + path: path.to_string(), 126 + }); 127 + } 128 + 129 + match prop { 130 + LexProperty::String(lex_str) => validate_string(lex_str, value, path), 131 + LexProperty::Integer(lex_int) => { 132 + let n = value 133 + .as_i64() 134 + .or_else(|| { 135 + value.as_f64().and_then(|f| { 136 + (f.fract() == 0.0 && (i64::MIN as f64..=i64::MAX as f64).contains(&f)) 137 + .then_some(f as i64) 138 + }) 139 + }) 140 + .ok_or_else(|| LexValidationError::field(path, "expected an integer"))?; 141 + if let Some(min) = lex_int.minimum 142 + && n < min 143 + { 144 + return Err(LexValidationError::field( 145 + path, 146 + format!("value {} below minimum {}", n, min), 147 + )); 148 + } 149 + if let Some(max) = lex_int.maximum 150 + && n > max 151 + { 152 + return Err(LexValidationError::field( 153 + path, 154 + format!("value {} above maximum {}", n, max), 155 + )); 156 + } 157 + if let Some(ref enum_vals) = lex_int.enum_values 158 + && !enum_vals.contains(&n) 159 + { 160 + return Err(LexValidationError::field( 161 + path, 162 + format!("value {} not in enum", n), 163 + )); 164 + } 165 + if let Some(const_val) = lex_int.const_value 166 + && n != const_val 167 + { 168 + return Err(LexValidationError::field( 169 + path, 170 + format!("expected const value {}", const_val), 171 + )); 172 + } 173 + Ok(()) 174 + } 175 + LexProperty::Boolean {} => value 176 + .is_boolean() 177 + .then_some(()) 178 + .ok_or_else(|| LexValidationError::field(path, "expected a boolean")), 179 + LexProperty::CidLink {} => validate_cid_link(value, path), 180 + LexProperty::Blob(lex_blob) => validate_blob_ref(lex_blob, value, path), 181 + LexProperty::Unknown {} => Ok(()), 182 + LexProperty::Bytes(lex_bytes) => validate_bytes(lex_bytes, value, path), 183 + LexProperty::Ref(lex_ref) => validate_ref( 184 + registry, 185 + context_nsid, 186 + &lex_ref.reference, 187 + value, 188 + path, 189 + depth, 190 + ), 191 + LexProperty::Union(union_def) => { 192 + validate_union(registry, context_nsid, union_def, value, path, depth) 193 + } 194 + LexProperty::Array(array_def) => { 195 + validate_array(registry, context_nsid, array_def, value, path, depth) 196 + } 197 + LexProperty::Object(obj_def) => { 198 + validate_object(registry, context_nsid, obj_def, value, path, depth) 199 + } 200 + } 201 + } 202 + 203 + fn validate_string( 204 + lex_str: &LexString, 205 + value: &serde_json::Value, 206 + path: &str, 207 + ) -> Result<(), LexValidationError> { 208 + let s = value 209 + .as_str() 210 + .ok_or_else(|| LexValidationError::field(path, "expected a string"))?; 211 + 212 + if let Some(max_len) = lex_str.max_length 213 + && s.len() as u64 > max_len 214 + { 215 + return Err(LexValidationError::field( 216 + path, 217 + format!("string length {} exceeds max_length {}", s.len(), max_len), 218 + )); 219 + } 220 + 221 + if let Some(min_len) = lex_str.min_length 222 + && (s.len() as u64) < min_len 223 + { 224 + return Err(LexValidationError::field( 225 + path, 226 + format!("string length {} below min_length {}", s.len(), min_len), 227 + )); 228 + } 229 + 230 + if lex_str.max_graphemes.is_some() || lex_str.min_graphemes.is_some() { 231 + let count = s.graphemes(true).count() as u64; 232 + if let Some(max_graphemes) = lex_str.max_graphemes 233 + && count > max_graphemes 234 + { 235 + return Err(LexValidationError::field( 236 + path, 237 + format!( 238 + "grapheme count {} exceeds max_graphemes {}", 239 + count, max_graphemes 240 + ), 241 + )); 242 + } 243 + if let Some(min_graphemes) = lex_str.min_graphemes 244 + && count < min_graphemes 245 + { 246 + return Err(LexValidationError::field( 247 + path, 248 + format!( 249 + "grapheme count {} below min_graphemes {}", 250 + count, min_graphemes 251 + ), 252 + )); 253 + } 254 + } 255 + 256 + if let Some(ref format) = lex_str.format 257 + && !validate_format(format, s) 258 + { 259 + return Err(LexValidationError::field( 260 + path, 261 + format!("invalid format: {:?}", format), 262 + )); 263 + } 264 + 265 + if let Some(ref enum_vals) = lex_str.enum_values 266 + && !enum_vals.iter().any(|v| v == s) 267 + { 268 + return Err(LexValidationError::field( 269 + path, 270 + format!("value '{}' not in enum", s), 271 + )); 272 + } 273 + 274 + if let Some(ref const_val) = lex_str.const_value 275 + && s != const_val.as_str() 276 + { 277 + return Err(LexValidationError::field( 278 + path, 279 + format!("expected const value '{}'", const_val), 280 + )); 281 + } 282 + 283 + Ok(()) 284 + } 285 + 286 + fn validate_cid_link(value: &serde_json::Value, path: &str) -> Result<(), LexValidationError> { 287 + let obj = value 288 + .as_object() 289 + .ok_or_else(|| LexValidationError::field(path, "expected cid-link object"))?; 290 + 291 + if !obj.contains_key("$link") { 292 + return Err(LexValidationError::field(path, "cid-link missing $link")); 293 + } 294 + 295 + Ok(()) 296 + } 297 + 298 + fn validate_blob_ref( 299 + lex_blob: &LexBlob, 300 + value: &serde_json::Value, 301 + path: &str, 302 + ) -> Result<(), LexValidationError> { 303 + let obj = value 304 + .as_object() 305 + .ok_or_else(|| LexValidationError::field(path, "expected blob object"))?; 306 + 307 + let has_type = obj 308 + .get("$type") 309 + .and_then(|v| v.as_str()) 310 + .is_some_and(|t| t == "blob"); 311 + 312 + let has_ref = obj.contains_key("ref") && obj.contains_key("mimeType"); 313 + 314 + let has_cid = obj.contains_key("cid"); 315 + 316 + if !has_type && !has_ref && !has_cid { 317 + return Err(LexValidationError::field( 318 + path, 319 + "invalid blob reference structure", 320 + )); 321 + } 322 + 323 + if let Some(ref accept) = lex_blob.accept { 324 + let mime_type = obj.get("mimeType").and_then(|v| v.as_str()).unwrap_or(""); 325 + let matched = accept 326 + .iter() 327 + .any(|pattern| match pattern.strip_suffix("/*") { 328 + Some(prefix) => { 329 + mime_type.starts_with(prefix) 330 + && mime_type.as_bytes().get(prefix.len()) == Some(&b'/') 331 + } 332 + None => mime_type == pattern, 333 + }); 334 + if !mime_type.is_empty() && !matched { 335 + return Err(LexValidationError::field( 336 + path, 337 + format!("blob mimeType '{}' not in accepted types", mime_type), 338 + )); 339 + } 340 + } 341 + 342 + if let Some(max_size) = lex_blob.max_size { 343 + if let Some(size) = obj.get("size").and_then(|v| v.as_u64()) { 344 + if size > max_size { 345 + return Err(LexValidationError::field( 346 + path, 347 + format!("blob size {} exceeds max_size {}", size, max_size), 348 + )); 349 + } 350 + } 351 + } 352 + 353 + Ok(()) 354 + } 355 + 356 + fn validate_bytes( 357 + lex_bytes: &LexBytes, 358 + value: &serde_json::Value, 359 + path: &str, 360 + ) -> Result<(), LexValidationError> { 361 + let obj = value 362 + .as_object() 363 + .ok_or_else(|| LexValidationError::field(path, "expected bytes object with $bytes key"))?; 364 + 365 + let encoded = obj 366 + .get("$bytes") 367 + .and_then(|v| v.as_str()) 368 + .ok_or_else(|| LexValidationError::field(path, "bytes object missing $bytes key"))?; 369 + 370 + let byte_len = encoded.len() as u64 * 3 / 4; 371 + 372 + if let Some(max_len) = lex_bytes.max_length 373 + && byte_len > max_len 374 + { 375 + return Err(LexValidationError::field( 376 + path, 377 + format!("bytes length ~{} exceeds max_length {}", byte_len, max_len), 378 + )); 379 + } 380 + 381 + if let Some(min_len) = lex_bytes.min_length 382 + && byte_len < min_len 383 + { 384 + return Err(LexValidationError::field( 385 + path, 386 + format!("bytes length ~{} below min_length {}", byte_len, min_len), 387 + )); 388 + } 389 + 390 + Ok(()) 391 + } 392 + 393 + fn validate_ref( 394 + registry: &LexiconRegistry, 395 + context_nsid: &str, 396 + reference: &str, 397 + value: &serde_json::Value, 398 + path: &str, 399 + depth: u32, 400 + ) -> Result<(), LexValidationError> { 401 + let target_context = ref_to_context_nsid(reference, context_nsid); 402 + match registry.resolve_ref(reference, context_nsid) { 403 + Some(resolved) => { 404 + if resolved.is_token() { 405 + Ok(()) 406 + } else if let Some(obj) = resolved.as_object() { 407 + validate_object(registry, target_context, obj, value, path, depth + 1) 408 + } else { 409 + Ok(()) 410 + } 411 + } 412 + None => Ok(()), 413 + } 414 + } 415 + 416 + fn validate_union( 417 + registry: &LexiconRegistry, 418 + context_nsid: &str, 419 + union_def: &LexUnion, 420 + value: &serde_json::Value, 421 + path: &str, 422 + depth: u32, 423 + ) -> Result<(), LexValidationError> { 424 + let obj = value 425 + .as_object() 426 + .ok_or_else(|| LexValidationError::field(path, "union value must be an object"))?; 427 + 428 + let type_str = obj 429 + .get("$type") 430 + .and_then(|v| v.as_str()) 431 + .ok_or_else(|| LexValidationError::field(path, "union object missing $type"))?; 432 + 433 + let matched_ref = union_def.refs.iter().find(|r| { 434 + let resolved = resolve_union_ref(r, context_nsid); 435 + resolved == type_str 436 + }); 437 + 438 + match matched_ref { 439 + Some(reference) => validate_ref(registry, context_nsid, reference, value, path, depth), 440 + None => { 441 + if union_def.closed { 442 + Err(LexValidationError::field( 443 + path, 444 + format!("union type '{}' not in allowed refs", type_str), 445 + )) 446 + } else { 447 + Ok(()) 448 + } 449 + } 450 + } 451 + } 452 + 453 + fn validate_array( 454 + registry: &LexiconRegistry, 455 + context_nsid: &str, 456 + array_def: &LexArray, 457 + value: &serde_json::Value, 458 + path: &str, 459 + depth: u32, 460 + ) -> Result<(), LexValidationError> { 461 + let arr = value 462 + .as_array() 463 + .ok_or_else(|| LexValidationError::field(path, "expected an array"))?; 464 + 465 + if let Some(max_len) = array_def.max_length 466 + && arr.len() as u64 > max_len 467 + { 468 + return Err(LexValidationError::field( 469 + path, 470 + format!("array length {} exceeds max_length {}", arr.len(), max_len), 471 + )); 472 + } 473 + 474 + if let Some(min_len) = array_def.min_length 475 + && (arr.len() as u64) < min_len 476 + { 477 + return Err(LexValidationError::field( 478 + path, 479 + format!("array length {} below min_length {}", arr.len(), min_len), 480 + )); 481 + } 482 + 483 + arr.iter().enumerate().try_for_each(|(i, item)| { 484 + let item_path = format!("{}/{}", path, i); 485 + validate_property( 486 + registry, 487 + context_nsid, 488 + &array_def.items, 489 + item, 490 + &item_path, 491 + depth + 1, 492 + ) 493 + }) 494 + } 495 + 496 + fn field_path(parent: &str, field: &str) -> String { 497 + if parent.is_empty() { 498 + field.to_string() 499 + } else { 500 + format!("{}/{}", parent, field) 501 + } 502 + } 503 + 504 + #[cfg(test)] 505 + mod tests { 506 + use super::*; 507 + use crate::test_schemas::test_registry; 508 + use serde_json::json; 509 + 510 + #[test] 511 + fn test_validate_valid_record() { 512 + let registry = test_registry(); 513 + let record = json!({ 514 + "$type": "com.test.basic", 515 + "text": "Hello, world!", 516 + "createdAt": "2024-01-01T00:00:00.000Z" 517 + }); 518 + assert!(validate_record(&registry, "com.test.basic", &record).is_ok()); 519 + } 520 + 521 + #[test] 522 + fn test_validate_missing_required() { 523 + let registry = test_registry(); 524 + let record = json!({ 525 + "$type": "com.test.basic", 526 + "createdAt": "2024-01-01T00:00:00.000Z" 527 + }); 528 + let err = validate_record(&registry, "com.test.basic", &record).unwrap_err(); 529 + assert!(matches!(err, LexValidationError::MissingRequired { .. })); 530 + } 531 + 532 + #[test] 533 + fn test_validate_string_too_long_bytes() { 534 + let registry = test_registry(); 535 + let record = json!({ 536 + "$type": "com.test.basic", 537 + "text": "a".repeat(101), 538 + "createdAt": "2024-01-01T00:00:00.000Z" 539 + }); 540 + let err = validate_record(&registry, "com.test.basic", &record).unwrap_err(); 541 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 542 + } 543 + 544 + #[test] 545 + fn test_validate_string_too_many_graphemes() { 546 + let registry = test_registry(); 547 + let record = json!({ 548 + "$type": "com.test.basic", 549 + "text": "a".repeat(51), 550 + "createdAt": "2024-01-01T00:00:00.000Z" 551 + }); 552 + let err = validate_record(&registry, "com.test.basic", &record).unwrap_err(); 553 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 554 + } 555 + 556 + #[test] 557 + fn test_validate_grapheme_counting_emoji() { 558 + let registry = test_registry(); 559 + let emoji_text = "👨‍👩‍👧‍👦".repeat(11); 560 + let record = json!({ 561 + "$type": "com.test.profile", 562 + "displayName": emoji_text 563 + }); 564 + let err = validate_record(&registry, "com.test.profile", &record).unwrap_err(); 565 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 566 + } 567 + 568 + #[test] 569 + fn test_validate_integer_bounds() { 570 + let registry = test_registry(); 571 + let record = json!({ 572 + "$type": "com.test.basic", 573 + "text": "ok", 574 + "createdAt": "2024-01-01T00:00:00.000Z", 575 + "count": 101 576 + }); 577 + let err = validate_record(&registry, "com.test.basic", &record).unwrap_err(); 578 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 579 + 580 + let record_neg = json!({ 581 + "$type": "com.test.basic", 582 + "text": "ok", 583 + "createdAt": "2024-01-01T00:00:00.000Z", 584 + "count": -1 585 + }); 586 + let err = validate_record(&registry, "com.test.basic", &record_neg).unwrap_err(); 587 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 588 + } 589 + 590 + #[test] 591 + fn test_validate_integer_float_coercion() { 592 + let registry = test_registry(); 593 + let record = json!({ 594 + "$type": "com.test.basic", 595 + "text": "ok", 596 + "createdAt": "2024-01-01T00:00:00.000Z", 597 + "count": 5.0 598 + }); 599 + assert!(validate_record(&registry, "com.test.basic", &record).is_ok()); 600 + 601 + let record_frac = json!({ 602 + "$type": "com.test.basic", 603 + "text": "ok", 604 + "createdAt": "2024-01-01T00:00:00.000Z", 605 + "count": 5.5 606 + }); 607 + let err = validate_record(&registry, "com.test.basic", &record_frac).unwrap_err(); 608 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 609 + } 610 + 611 + #[test] 612 + fn test_validate_boolean() { 613 + let registry = test_registry(); 614 + let record = json!({ 615 + "$type": "com.test.basic", 616 + "text": "ok", 617 + "createdAt": "2024-01-01T00:00:00.000Z", 618 + "active": "not-a-bool" 619 + }); 620 + let err = validate_record(&registry, "com.test.basic", &record).unwrap_err(); 621 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 622 + } 623 + 624 + #[test] 625 + fn test_validate_array_max_length() { 626 + let registry = test_registry(); 627 + let record = json!({ 628 + "$type": "com.test.basic", 629 + "text": "ok", 630 + "createdAt": "2024-01-01T00:00:00.000Z", 631 + "tags": ["a", "b", "c", "d"] 632 + }); 633 + let err = validate_record(&registry, "com.test.basic", &record).unwrap_err(); 634 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 635 + } 636 + 637 + #[test] 638 + fn test_validate_array_within_limit() { 639 + let registry = test_registry(); 640 + let record = json!({ 641 + "$type": "com.test.basic", 642 + "text": "ok", 643 + "createdAt": "2024-01-01T00:00:00.000Z", 644 + "tags": ["a", "b", "c"] 645 + }); 646 + assert!(validate_record(&registry, "com.test.basic", &record).is_ok()); 647 + } 648 + 649 + #[test] 650 + fn test_validate_cross_schema_ref() { 651 + let registry = test_registry(); 652 + let record = json!({ 653 + "$type": "com.test.withref", 654 + "subject": { 655 + "uri": "at://did:plc:abc/com.test.basic/123", 656 + "cid": "bafyreiabcdef" 657 + }, 658 + "createdAt": "2024-01-01T00:00:00.000Z" 659 + }); 660 + assert!(validate_record(&registry, "com.test.withref", &record).is_ok()); 661 + } 662 + 663 + #[test] 664 + fn test_validate_cross_schema_ref_missing_field() { 665 + let registry = test_registry(); 666 + let record = json!({ 667 + "$type": "com.test.withref", 668 + "subject": { 669 + "cid": "bafyreiabcdef" 670 + }, 671 + "createdAt": "2024-01-01T00:00:00.000Z" 672 + }); 673 + let err = validate_record(&registry, "com.test.withref", &record).unwrap_err(); 674 + assert!(matches!(err, LexValidationError::MissingRequired { .. })); 675 + } 676 + 677 + #[test] 678 + fn test_validate_local_ref_resolution() { 679 + let registry = test_registry(); 680 + let record = json!({ 681 + "$type": "com.test.withreply", 682 + "text": "reply", 683 + "createdAt": "2024-01-01T00:00:00.000Z", 684 + "reply": { 685 + "root": { 686 + "uri": "at://did:plc:abc/com.test.basic/123", 687 + "cid": "bafyreiabcdef" 688 + }, 689 + "parent": { 690 + "uri": "at://did:plc:abc/com.test.basic/456", 691 + "cid": "bafyreiabcdef" 692 + } 693 + } 694 + }); 695 + assert!(validate_record(&registry, "com.test.withreply", &record).is_ok()); 696 + } 697 + 698 + #[test] 699 + fn test_validate_union_bare_nsid_ref() { 700 + let registry = test_registry(); 701 + let record = json!({ 702 + "$type": "com.test.withreply", 703 + "text": "with images", 704 + "createdAt": "2024-01-01T00:00:00.000Z", 705 + "embed": { 706 + "$type": "com.test.images", 707 + "images": [ 708 + { 709 + "image": { 710 + "$type": "blob", 711 + "ref": { "$link": "bafyreiabcdef" }, 712 + "mimeType": "image/jpeg", 713 + "size": 12345 714 + }, 715 + "alt": "test" 716 + } 717 + ] 718 + } 719 + }); 720 + assert!(validate_record(&registry, "com.test.withreply", &record).is_ok()); 721 + 722 + let bad_embed = json!({ 723 + "$type": "com.test.withreply", 724 + "text": "bad", 725 + "createdAt": "2024-01-01T00:00:00.000Z", 726 + "embed": { 727 + "$type": "com.test.images", 728 + "images": "not-an-array" 729 + } 730 + }); 731 + assert!( 732 + validate_record(&registry, "com.test.withreply", &bad_embed).is_err(), 733 + "union with bare NSID ref must validate the matched schema" 734 + ); 735 + } 736 + 737 + #[test] 738 + fn test_validate_cross_schema_local_ref_in_union() { 739 + let registry = test_registry(); 740 + let record = json!({ 741 + "$type": "com.test.withreply", 742 + "text": "external", 743 + "createdAt": "2024-01-01T00:00:00.000Z", 744 + "embed": { 745 + "$type": "com.test.external", 746 + "external": { 747 + "uri": "https://example.com", 748 + "title": "Example", 749 + "description": "A test" 750 + } 751 + } 752 + }); 753 + assert!(validate_record(&registry, "com.test.withreply", &record).is_ok()); 754 + 755 + let bad_external = json!({ 756 + "$type": "com.test.withreply", 757 + "text": "bad", 758 + "createdAt": "2024-01-01T00:00:00.000Z", 759 + "embed": { 760 + "$type": "com.test.external", 761 + "external": { 762 + "title": "missing uri and description" 763 + } 764 + } 765 + }); 766 + assert!( 767 + validate_record(&registry, "com.test.withreply", &bad_external).is_err(), 768 + "local #ref in cross-schema union must resolve against the correct schema" 769 + ); 770 + } 771 + 772 + #[test] 773 + fn test_validate_gate_with_union_fragment_ref() { 774 + let registry = test_registry(); 775 + let record = json!({ 776 + "$type": "com.test.withgate", 777 + "post": "at://did:plc:abc/com.test.basic/123", 778 + "createdAt": "2024-01-01T00:00:00.000Z", 779 + "rules": [ 780 + { "$type": "com.test.withgate#disableRule" } 781 + ] 782 + }); 783 + assert!(validate_record(&registry, "com.test.withgate", &record).is_ok()); 784 + } 785 + 786 + #[test] 787 + fn test_validate_did_format() { 788 + let registry = test_registry(); 789 + let record = json!({ 790 + "$type": "com.test.withdid", 791 + "subject": "did:plc:abc123", 792 + "createdAt": "2024-01-01T00:00:00.000Z" 793 + }); 794 + assert!(validate_record(&registry, "com.test.withdid", &record).is_ok()); 795 + 796 + let bad_did = json!({ 797 + "$type": "com.test.withdid", 798 + "subject": "not-a-did", 799 + "createdAt": "2024-01-01T00:00:00.000Z" 800 + }); 801 + assert!(validate_record(&registry, "com.test.withdid", &bad_did).is_err()); 802 + } 803 + 804 + #[test] 805 + fn test_validate_nullable_field() { 806 + let registry = test_registry(); 807 + let record = json!({ 808 + "$type": "com.test.nullable", 809 + "name": "test", 810 + "value": null 811 + }); 812 + assert!(validate_record(&registry, "com.test.nullable", &record).is_ok()); 813 + } 814 + 815 + #[test] 816 + fn test_validate_unknown_lexicon() { 817 + let registry = test_registry(); 818 + let record = json!({"$type": "com.example.nonexistent"}); 819 + let err = validate_record(&registry, "com.example.nonexistent", &record).unwrap_err(); 820 + assert!(matches!(err, LexValidationError::LexiconNotFound(_))); 821 + } 822 + 823 + #[test] 824 + fn test_validate_extra_properties_allowed() { 825 + let registry = test_registry(); 826 + let record = json!({ 827 + "$type": "com.test.basic", 828 + "text": "ok", 829 + "createdAt": "2024-01-01T00:00:00.000Z", 830 + "unknownField": "this is fine" 831 + }); 832 + assert!(validate_record(&registry, "com.test.basic", &record).is_ok()); 833 + } 834 + 835 + #[test] 836 + fn test_validate_no_required_fields() { 837 + let registry = test_registry(); 838 + let record = json!({"$type": "com.test.profile"}); 839 + assert!(validate_record(&registry, "com.test.profile", &record).is_ok()); 840 + } 841 + 842 + #[test] 843 + fn test_validate_profile_display_name_graphemes() { 844 + let registry = test_registry(); 845 + let record = json!({ 846 + "$type": "com.test.profile", 847 + "displayName": "a".repeat(11) 848 + }); 849 + let err = validate_record(&registry, "com.test.profile", &record).unwrap_err(); 850 + assert!(matches!(err, LexValidationError::InvalidField { .. })); 851 + } 852 + 853 + #[test] 854 + fn test_required_nullable_field_accepts_null() { 855 + let registry = test_registry(); 856 + let record = json!({ 857 + "$type": "com.test.requirednullable", 858 + "name": "test", 859 + "value": null 860 + }); 861 + assert!( 862 + validate_record(&registry, "com.test.requirednullable", &record).is_ok(), 863 + "a field that is both required and nullable must accept null values" 864 + ); 865 + } 866 + 867 + #[test] 868 + fn test_required_nullable_field_rejects_absent() { 869 + let registry = test_registry(); 870 + let record = json!({ 871 + "$type": "com.test.requirednullable", 872 + "name": "test" 873 + }); 874 + assert!( 875 + matches!( 876 + validate_record(&registry, "com.test.requirednullable", &record).unwrap_err(), 877 + LexValidationError::MissingRequired { .. } 878 + ), 879 + "a field that is required+nullable must still be present (even if null)" 880 + ); 881 + } 882 + 883 + #[test] 884 + fn test_required_nullable_field_accepts_value() { 885 + let registry = test_registry(); 886 + let record = json!({ 887 + "$type": "com.test.requirednullable", 888 + "name": "test", 889 + "value": "hello" 890 + }); 891 + assert!( 892 + validate_record(&registry, "com.test.requirednullable", &record).is_ok(), 893 + "a field that is required+nullable must accept non-null values" 894 + ); 895 + } 896 + }

History

2 rounds 0 comments
sign up or login to add to the discussion
1 commit
expand
feat(lexicon): record validation engine
expand 0 comments
pull request successfully merged
1 commit
expand
feat(lexicon): record validation engine
expand 0 comments