Punycode (RFC3492) in OCaml
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
5
6(* Crowbar-based fuzz testing for Punycode encoding/decoding *)
7
8open Crowbar
9
10(* Test that encode_utf8 never crashes on arbitrary input *)
11let test_encode_no_crash input =
12 (try ignore (Punycode.encode_utf8 input) with Punycode.Error _ -> ());
13 check true
14
15(* Test that decode_utf8 never crashes on arbitrary input *)
16let test_decode_no_crash input =
17 (try ignore (Punycode.decode_utf8 input) with Punycode.Error _ -> ());
18 check true
19
20(* Test roundtrip: encode then decode should give back original (case-insensitive)
21 IDNA/Punycode lowercases ASCII characters during encoding per RFC 5891 *)
22let test_roundtrip input =
23 (try
24 let encoded = Punycode.encode_utf8 input in
25 let decoded = Punycode.decode_utf8 encoded in
26 (* Compare lowercase versions since IDNA lowercases ASCII *)
27 check_eq ~pp:Format.pp_print_string
28 (String.lowercase_ascii input)
29 (String.lowercase_ascii decoded)
30 with Punycode.Error _ ->
31 (* Some inputs might not encode/decode, that's ok *)
32 check true)
33
34(* Test ASCII-only strings (should pass through mostly unchanged) *)
35let test_ascii_string input =
36 if String.length input > 0 then begin
37 let ascii_only =
38 String.init
39 (String.length input mod 64)
40 (fun i ->
41 Char.chr (Char.code input.[i mod String.length input] mod 128))
42 in
43 if String.length ascii_only > 0 then
44 (try ignore (Punycode.encode_utf8 ascii_only) with Punycode.Error _ -> ())
45 end;
46 check true
47
48(* Test inputs starting with ACE prefix "xn--" *)
49let test_ace_prefix input =
50 let ace_input = "xn--" ^ input in
51 (try ignore (Punycode.decode_utf8 ace_input) with Punycode.Error _ -> ());
52 check true
53
54let () =
55 add_test ~name:"punycode: encode no crash" [ bytes ] test_encode_no_crash;
56 add_test ~name:"punycode: decode no crash" [ bytes ] test_decode_no_crash;
57 add_test ~name:"punycode: roundtrip" [ bytes ] test_roundtrip;
58 add_test ~name:"punycode: ascii string" [ bytes ] test_ascii_string;
59 add_test ~name:"punycode: ace prefix" [ bytes ] test_ace_prefix