OCaml HTML5 parser/serialiser based on Python's JustHTML

fixes

Changed files
+123 -86
lib
htmlrw_check
test
+54 -86
lib/htmlrw_check/specialized/srcset_sizes_checker.ml
··· 1 1 (** Srcset and sizes attribute validation checker. *) 2 2 3 + (** Quote helper for consistent message formatting. *) 4 + let q = Error_code.q 5 + 3 6 (** Valid CSS length units for sizes attribute *) 4 7 let valid_length_units = [ 5 8 "em"; "ex"; "ch"; "rem"; "cap"; "ic"; ··· 400 403 (* Empty sizes is invalid *) 401 404 if String.trim value = "" then begin 402 405 Message_collector.add_typed collector 403 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Must not be empty." element_name)))); 406 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Must not be empty." (q "") (q "sizes") (q element_name))))); 404 407 false 405 408 end else begin 406 409 (* Split on comma and check each entry *) ··· 410 413 (* Check if starts with comma (empty first entry) *) 411 414 if first_entry = "" then begin 412 415 Message_collector.add_typed collector 413 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Starts with empty source size." value element_name)))); 416 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Starts with empty source size." (q value) (q "sizes") (q element_name))))); 414 417 false 415 418 end else begin 416 419 (* Check for trailing comma *) 417 420 let last_entry = String.trim (List.nth entries (List.length entries - 1)) in 418 421 if List.length entries > 1 && last_entry = "" then begin 419 - (* Generate abbreviated context - show last ~25 chars with ellipsis if needed *) 420 - let context = 421 - if String.length value > 25 then 422 - "\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25 423 - else value 424 - in 425 422 Message_collector.add_typed collector 426 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)))); 423 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected media condition before %s at %s." (q value) (q "sizes") (q element_name) (q "") (q value))))); 427 424 false 428 425 end else begin 429 426 let valid = ref true in ··· 442 439 (* Context is the first entry with a comma *) 443 440 let context = (String.trim first) ^ "," in 444 441 Message_collector.add_typed collector 445 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)))); 442 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected media condition before %s at %s." (q value) (q "sizes") (q element_name) (q "") (q context))))); 446 443 valid := false 447 444 end; 448 445 (* Check for multiple entries without media conditions. ··· 454 451 (* Multiple defaults - report as "Expected media condition" *) 455 452 let context = (String.trim first) ^ "," in 456 453 Message_collector.add_typed collector 457 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)))); 454 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected media condition before %s at %s." (q value) (q "sizes") (q element_name) (q "") (q context))))); 458 455 valid := false 459 456 end 460 457 end ··· 468 465 (* Check for invalid media condition *) 469 466 (match has_invalid_media_condition trimmed with 470 467 | Some err_msg -> 471 - (* Generate context: "entry," with ellipsis if needed *) 472 - let context = (String.trim entry) ^ "," in 473 - let context = 474 - if String.length context > 25 then 475 - "\xe2\x80\xa6" ^ String.sub context (String.length context - 25) 25 476 - else context 477 - in 468 + let context = trimmed ^ "," in 478 469 Message_collector.add_typed collector 479 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name err_msg context)))); 470 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: %s at %s." (q value) (q "sizes") (q element_name) err_msg (q context))))); 480 471 valid := false 481 472 | None -> ()); 482 473 ··· 508 499 let prev_entries = List.filter (fun e -> String.trim e <> "" && e <> entry) entries in 509 500 let context = 510 501 if List.length prev_entries > 0 then 511 - let prev_value = String.concat ", " (List.map String.trim prev_entries) ^ "," in 512 - if String.length prev_value > 25 then 513 - "\xe2\x80\xa6" ^ String.sub prev_value (String.length prev_value - 25) 25 514 - else prev_value 502 + String.concat ", " (List.map String.trim prev_entries) ^ "," 515 503 else value 516 504 in 517 505 Message_collector.add_typed collector 518 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)))); 506 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected media condition before %s at %s." (q value) (q "sizes") (q element_name) (q "") (q context))))); 519 507 valid := false 520 508 end 521 509 (* If there's extra junk after the size, report BadCssNumber error for it *) 522 510 else if extra_parts <> [] then begin 523 - let junk = String.concat " " extra_parts in 524 511 let last_junk = List.nth extra_parts (List.length extra_parts - 1) in 525 512 let first_char = if String.length last_junk > 0 then last_junk.[0] else 'x' in 526 - (* Context depends on whether this is the last entry: 527 - - For non-last entries: entry with trailing comma, truncated from beginning 528 - - For last entry: full value truncated from beginning (no trailing comma) *) 529 513 let is_last_entry = idx = num_entries - 1 in 530 514 let context = 531 - if is_last_entry then begin 532 - (* Last entry: use full value truncated *) 533 - if String.length value > 25 then 534 - "\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25 535 - else value 536 - end else begin 537 - (* Non-last entry: use entry with comma, truncated *) 538 - let entry_with_comma = trimmed ^ "," in 539 - if String.length entry_with_comma > 25 then 540 - "\xe2\x80\xa6" ^ String.sub entry_with_comma (String.length entry_with_comma - 25) 25 541 - else entry_with_comma 542 - end 515 + if is_last_entry then value 516 + else trimmed ^ "," 543 517 in 544 - let _ = junk in 545 518 Message_collector.add_typed collector 546 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context)))); 519 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw %s instead at %s." (q value) (q "sizes") (q element_name) (q (String.make 1 first_char)) (q context))))); 547 520 valid := false 548 521 end 549 522 else ··· 556 529 in 557 530 let _ = full_context in 558 531 Message_collector.add_typed collector 559 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected positive size value but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val size_val)))); 532 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected positive size value but found %s at %s." (q value) (q "sizes") (q element_name) (q size_val) (q size_val))))); 560 533 valid := false 561 534 | CssCommentAfterSign (found, context) -> 562 535 (* e.g., +/**/50vw - expected number after sign *) 563 536 Message_collector.add_typed collector 564 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected number but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name found context)))); 537 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected number but found %s at %s." (q value) (q "sizes") (q element_name) (q found) (q context))))); 565 538 valid := false 566 539 | CssCommentBeforeUnit (found, context) -> 567 540 (* e.g., 50/**/vw - expected units after number *) 568 - let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in 541 + let units_list = List.map q valid_length_units in 569 542 let units_str = String.concat ", " units_list in 570 543 Message_collector.add_typed collector 571 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found context)))); 544 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected units (one of %s) but found %s at %s." (q value) (q "sizes") (q element_name) units_str (q found) (q context))))); 572 545 valid := false 573 546 | BadScientificNotation -> 574 547 (* For scientific notation with bad exponent, show what char was expected vs found *) ··· 579 552 (* Find the period in the exponent *) 580 553 let _ = context in 581 554 Message_collector.add_typed collector 582 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a digit but saw \xe2\x80\x9c.\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val)))); 555 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Bad CSS number token: Expected a digit but saw %s instead at %s." (q value) (q "sizes") (q element_name) (q ".") (q size_val))))); 583 556 valid := false 584 557 | BadCssNumber (first_char, context) -> 585 558 (* Value doesn't start with a digit or minus sign *) ··· 589 562 in 590 563 let _ = full_context in 591 564 Message_collector.add_typed collector 592 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context)))); 565 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw %s instead at %s." (q value) (q "sizes") (q element_name) (q (String.make 1 first_char)) (q context))))); 593 566 valid := false 594 567 | InvalidUnit (found_unit, _context) -> 595 568 (* Generate the full list of expected units *) 596 - let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in 569 + let units_list = List.map q valid_length_units in 597 570 let units_str = String.concat ", " units_list in 598 571 (* Context should be the full entry, with comma only if there are multiple entries *) 599 572 let full_context = ··· 603 576 (* When found_unit is empty, say "no units" instead of quoting empty string *) 604 577 let found_str = 605 578 if found_unit = "" then "no units" 606 - else Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" found_unit 579 + else q found_unit 607 580 in 608 581 Message_collector.add_typed collector 609 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found_str full_context)))); 582 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad source size list: Expected units (one of %s) but found %s at %s." (q value) (q "sizes") (q element_name) units_str found_str (q full_context))))); 610 583 valid := false 611 584 end 612 585 end ··· 633 606 (* Show just the number part (without the 'w') *) 634 607 let num_part_for_msg = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 635 608 Message_collector.add_typed collector 636 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part_for_msg srcset_value)))); 609 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number without leading plus sign but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_part_for_msg) (q srcset_value))))); 637 610 false 638 611 end else 639 612 (try 640 613 let n = int_of_string num_part in 641 614 if n <= 0 then begin 642 615 Message_collector.add_typed collector 643 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)))); 616 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number greater than zero but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_part) (q srcset_value))))); 644 617 false 645 618 end else begin 646 619 (* Check for uppercase W - compare original desc with lowercase version *) 647 620 let original_last = desc.[String.length desc - 1] in 648 621 if original_last = 'W' then begin 649 622 Message_collector.add_typed collector 650 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name desc srcset_value)))); 623 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected width descriptor but found %s at %s. (When the %s attribute is present, all image candidate strings must specify a width.)" (q srcset_value) (q "srcset") (q element_name) (q desc) (q srcset_value) (q "sizes"))))); 651 624 false 652 625 end else true 653 626 end ··· 655 628 (* Check for scientific notation, decimal, or other non-integer values *) 656 629 if String.contains num_part 'e' || String.contains num_part 'E' || String.contains num_part '.' then begin 657 630 Message_collector.add_typed collector 658 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected integer but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)))); 631 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected integer but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_part) (q srcset_value))))); 659 632 false 660 633 end else begin 661 634 Message_collector.add_typed collector 662 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Invalid width descriptor." srcset_value element_name)))); 635 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad srcset descriptor: Invalid width descriptor." (q srcset_value) (q "srcset") (q element_name))))); 663 636 false 664 637 end) 665 638 | 'x' -> ··· 669 642 (* Extract the number part including the plus sign *) 670 643 let num_with_plus = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 671 644 Message_collector.add_typed collector 672 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_with_plus srcset_value)))); 645 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number without leading plus sign but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_with_plus) (q srcset_value))))); 673 646 false 674 647 end else begin 675 648 (try ··· 680 653 let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 681 654 let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in 682 655 Message_collector.add_typed collector 683 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value)))); 656 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad positive floating point number: Expected a digit but saw %s instead at %s." (q srcset_value) (q "srcset") (q element_name) (q first_char) (q srcset_value))))); 684 657 false 685 658 end else if n = 0.0 then begin 686 659 (* Check if it's -0 (starts with minus) - report as "greater than zero" error *) ··· 688 661 let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 689 662 if String.length orig_num_part > 0 && orig_num_part.[0] = '-' then begin 690 663 Message_collector.add_typed collector 691 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name orig_num_part srcset_value)))) 664 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number greater than zero but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q orig_num_part) (q srcset_value))))) 692 665 end else begin 693 666 Message_collector.add_typed collector 694 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Zero is not a valid positive floating point number at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name srcset_value)))) 667 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad positive floating point number: Zero is not a valid positive floating point number at %s." (q srcset_value) (q "srcset") (q element_name) (q srcset_value))))) 695 668 end; 696 669 false 697 670 end else if n < 0.0 then begin 698 671 Message_collector.add_typed collector 699 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)))); 672 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number greater than zero but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q num_part) (q srcset_value))))); 700 673 false 701 674 end else if n = neg_infinity || n = infinity then begin 702 675 (* Infinity is not a valid float - report as parse error with first char from ORIGINAL desc *) ··· 704 677 let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 705 678 let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in 706 679 Message_collector.add_typed collector 707 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value)))); 680 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad positive floating point number: Expected a digit but saw %s instead at %s." (q srcset_value) (q "srcset") (q element_name) (q first_char) (q srcset_value))))); 708 681 false 709 682 end else true 710 683 with _ -> 711 684 Message_collector.add_typed collector 712 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Invalid density descriptor." srcset_value element_name)))); 685 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad srcset descriptor: Invalid density descriptor." (q srcset_value) (q "srcset") (q element_name))))); 713 686 false) 714 687 end 715 688 | 'h' -> ··· 729 702 in 730 703 if has_sizes then 731 704 Message_collector.add_typed collector 732 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name trimmed_desc context)))) 705 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected width descriptor but found %s at %s. (When the %s attribute is present, all image candidate strings must specify a width.)" (q srcset_value) (q "srcset") (q element_name) (q trimmed_desc) (q context) (q "sizes"))))) 733 706 else 734 707 Message_collector.add_typed collector 735 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name)))); 708 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad srcset descriptor: Height descriptor %s is not allowed." (q srcset_value) (q "srcset") (q element_name) (q "h"))))); 736 709 false 737 710 | _ -> 738 711 (* Unknown descriptor - find context in srcset_value *) ··· 749 722 with Not_found -> trimmed_desc ^ ")" 750 723 else trimmed_desc 751 724 in 752 - (* Try to find the context: show trailing portion ending with descriptor and comma *) 725 + (* Find context: the entry containing the error with trailing comma *) 753 726 let context = 754 727 try 755 728 let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in 756 729 (* Get the context ending with the descriptor and the comma after *) 757 730 let end_pos = min (pos + String.length trimmed_desc + 1) (String.length srcset_value) in 758 - (* Show trailing portion with ellipsis if needed *) 759 - let max_context = 15 in 760 - if end_pos > max_context then 761 - "\xe2\x80\xa6" ^ String.sub srcset_value (end_pos - max_context) max_context 762 - else 763 - String.trim (String.sub srcset_value 0 end_pos) 731 + String.trim (String.sub srcset_value 0 end_pos) 764 732 with Not_found -> srcset_value 765 733 in 766 734 Message_collector.add_typed collector 767 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number followed by \xe2\x80\x9cw\xe2\x80\x9d or \xe2\x80\x9cx\xe2\x80\x9d but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name found_desc context)))); 735 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected number followed by %s or %s but found %s at %s." (q srcset_value) (q "srcset") (q element_name) (q "w") (q "x") (q found_desc) (q context))))); 768 736 false 769 737 end 770 738 ··· 800 768 (* Check for empty srcset *) 801 769 if String.trim value = "" then begin 802 770 Message_collector.add_typed collector 803 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must contain one or more image candidate strings." value element_name)))) 771 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Must contain one or more image candidate strings." (q value) (q "srcset") (q element_name))))) 804 772 end; 805 773 806 774 (* Check for leading comma *) 807 775 if String.length value > 0 && value.[0] = ',' then begin 808 776 Message_collector.add_typed collector 809 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Starts with empty image-candidate string." value element_name)))) 777 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Starts with empty image-candidate string." (q value) (q "srcset") (q element_name))))) 810 778 end; 811 779 812 780 (* Check for trailing comma(s) / empty entries *) ··· 823 791 if trailing_commas > 1 then 824 792 (* Multiple trailing commas: "Empty image-candidate string at" *) 825 793 Message_collector.add_typed collector 826 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Empty image-candidate string at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name value)))) 794 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Empty image-candidate string at %s." (q value) (q "srcset") (q element_name) (q value))))) 827 795 else 828 796 (* Single trailing comma: "Ends with empty image-candidate string." *) 829 797 Message_collector.add_typed collector 830 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Ends with empty image-candidate string." value element_name)))) 798 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Ends with empty image-candidate string." (q value) (q "srcset") (q element_name))))) 831 799 end; 832 800 833 801 List.iter (fun entry -> ··· 845 813 let scheme_colon = scheme ^ ":" in 846 814 if url_lower = scheme_colon then 847 815 Message_collector.add_typed collector 848 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad image-candidate URL: \xe2\x80\x9c%s\xe2\x80\x9d: Expected a slash (\"/\")." value element_name url)))) 816 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad image-candidate URL: %s: Expected a slash (\"/\")." (q value) (q "srcset") (q element_name) (q url))))) 849 817 ) special_schemes 850 818 in 851 819 match parts with ··· 857 825 begin match Hashtbl.find_opt seen_descriptors "explicit-1x" with 858 826 | Some first_url -> 859 827 Message_collector.add_typed collector 860 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Density for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to density for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name url first_url)))) 828 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Density for image %s is identical to density for image %s." (q value) (q "srcset") (q element_name) (q url) (q first_url))))) 861 829 | None -> 862 830 Hashtbl.add seen_descriptors "implicit-1x" url 863 831 end ··· 868 836 if rest <> [] then begin 869 837 let extra_desc = List.hd rest in 870 838 Message_collector.add_typed collector 871 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected single descriptor but found extraneous descriptor \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name extra_desc value)))) 839 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value))))) 872 840 end; 873 841 874 842 let desc_lower = String.lowercase_ascii (String.trim desc) in ··· 907 875 value 908 876 in 909 877 Message_collector.add_typed collector 910 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name trimmed_desc entry_context)))) 878 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected width descriptor but found %s at %s. (When the %s attribute is present, all image candidate strings must specify a width.)" (q value) (q "srcset") (q element_name) (q trimmed_desc) (q entry_context) (q "sizes"))))) 911 879 end 912 880 end; 913 881 ··· 919 887 begin match Hashtbl.find_opt seen_descriptors normalized with 920 888 | Some first_url -> 921 889 Message_collector.add_typed collector 922 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url)))) 890 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url))))) 923 891 | None -> 924 892 begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with 925 893 | Some first_url -> 926 894 (* Explicit 1x conflicts with implicit 1x *) 927 895 Message_collector.add_typed collector 928 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url)))) 896 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url))))) 929 897 | None -> 930 898 Hashtbl.add seen_descriptors normalized url; 931 899 if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url ··· 946 914 (match !no_descriptor_url with 947 915 | Some url when has_sizes -> 948 916 Message_collector.add_typed collector 949 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name url)))) 917 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: No width specified for image %s. (When the %s attribute is present, all image candidate strings must specify a width.)" (q value) (q "srcset") (q element_name) (q url) (q "sizes"))))) 950 918 | _ -> ()); 951 919 952 920 (* Check: if sizes is present and srcset uses x descriptors, that's an error. 953 921 Only report if we haven't already reported the detailed error. *) 954 922 if has_sizes && !has_x_descriptor && not !x_with_sizes_error_reported then 955 923 Message_collector.add_typed collector 956 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name)))); 924 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: When the %s attribute is present, all image candidate strings must specify a width." (q value) (q "srcset") (q element_name) (q "sizes"))))); 957 925 958 926 (* Check for mixing w and x descriptors *) 959 927 if !has_w_descriptor && !has_x_descriptor then 960 928 Message_collector.add_typed collector 961 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Mixing width and density descriptors is not allowed." value element_name)))) 929 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Mixing width and density descriptors is not allowed." (q value) (q "srcset") (q element_name))))) 962 930 963 931 let start_element _state ~element collector = 964 932 match element.Element.tag with
+69
test/expected_message.ml
··· 48 48 require_severity = true; 49 49 } 50 50 51 + (** Unicode ellipsis character *) 52 + let ellipsis = "\xe2\x80\xa6" 53 + 51 54 (** Normalize Unicode curly quotes to ASCII for comparison *) 52 55 let normalize_quotes s = 53 56 let buf = Buffer.create (String.length s) in ··· 70 73 end 71 74 done; 72 75 Buffer.contents buf 76 + 77 + (** Unicode curly quotes *) 78 + let left_curly_quote = "\xe2\x80\x9c" 79 + let right_curly_quote = "\xe2\x80\x9d" 80 + 81 + (** Check if expected message (with potential ellipsis truncation) matches actual. 82 + When expected has ellipsis followed by text in curly quotes, we check if actual 83 + has a value that ends with that text. 84 + This handles Nu validator's message truncation for long attribute values. *) 85 + let truncation_aware_match expected actual = 86 + (* Look for pattern: left_curly_quote + ellipsis in expected *) 87 + let quote_ellipsis = left_curly_quote ^ ellipsis in 88 + try 89 + let pos = Str.search_forward (Str.regexp_string quote_ellipsis) expected 0 in 90 + (* Found quote+ellipsis pattern - extract what comes after ellipsis until closing curly quote *) 91 + let start_after_ellipsis = pos + String.length quote_ellipsis in 92 + let end_quote_pos = 93 + try Str.search_forward (Str.regexp_string right_curly_quote) expected start_after_ellipsis 94 + with Not_found -> String.length expected 95 + in 96 + let truncated_suffix = String.sub expected start_after_ellipsis (end_quote_pos - start_after_ellipsis) in 97 + 98 + (* Build expected prefix (everything before the truncated quote) and suffix (everything after) *) 99 + let prefix = String.sub expected 0 pos in 100 + let suffix_start = end_quote_pos + String.length right_curly_quote in 101 + let suffix = 102 + if suffix_start < String.length expected then 103 + String.sub expected suffix_start (String.length expected - suffix_start) 104 + else "" 105 + in 106 + 107 + (* Check if actual starts with prefix and ends with suffix *) 108 + let actual_starts_with_prefix = 109 + String.length actual >= String.length prefix && 110 + String.sub actual 0 (String.length prefix) = prefix 111 + in 112 + let actual_ends_with_suffix = 113 + String.length actual >= String.length suffix && 114 + String.sub actual (String.length actual - String.length suffix) (String.length suffix) = suffix 115 + in 116 + 117 + (* If prefix and suffix match, extract the middle (the quoted value in actual) *) 118 + if actual_starts_with_prefix && actual_ends_with_suffix then begin 119 + (* Find the quoted value in actual at the same position *) 120 + let actual_quote_start = String.length prefix in 121 + try 122 + (* Check actual has left curly quote at expected position *) 123 + if String.sub actual actual_quote_start (String.length left_curly_quote) = left_curly_quote then begin 124 + let actual_value_start = actual_quote_start + String.length left_curly_quote in 125 + let actual_value_end = 126 + Str.search_forward (Str.regexp_string right_curly_quote) actual actual_value_start 127 + in 128 + let actual_value = String.sub actual actual_value_start (actual_value_end - actual_value_start) in 129 + (* Check if actual value ends with the truncated suffix from expected *) 130 + String.length actual_value >= String.length truncated_suffix && 131 + String.sub actual_value (String.length actual_value - String.length truncated_suffix) (String.length truncated_suffix) = truncated_suffix 132 + end else false 133 + with _ -> false 134 + end else false 135 + with Not_found -> 136 + (* No ellipsis truncation pattern found *) 137 + false 73 138 74 139 (** Pattern matchers for Nu validator messages. 75 140 Each returns (error_code option, element option, attribute option) *) ··· 366 431 367 432 (* Check message text *) 368 433 let exact_text_match = actual_norm = expected_norm in 434 + (* Truncation-aware match: expected may have ellipsis where actual has full value *) 435 + let truncation_match = truncation_aware_match expected.message actual.Htmlrw_check.text in 369 436 let substring_match = 370 437 try let _ = Str.search_forward (Str.regexp_string expected_norm) actual_norm 0 in true 371 438 with Not_found -> false ··· 380 447 Code_match 381 448 else if exact_text_match then 382 449 Message_match 450 + else if truncation_match then 451 + Message_match (* Treat truncation match same as message match *) 383 452 else if substring_match && not strictness.require_exact_message then 384 453 Substring_match 385 454 else