Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/bpf: Convert glob_match() to bpf arena

Increase arena test coverage.
Convert glob_match() to bpf arena in two steps:
1.
Copy paste lib/glob.c into bpf_arena_strsearch.h
Copy paste lib/globtests.c into progs/arena_strsearch.c

2.
Add __arena to pointers
Add __arg_arena to global functions that accept arena pointers
Add cond_break to loops

The test also serves as a good example of what's possible
with bpf arena and how existing algorithms can be converted.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20251111032931.21430-1-alexei.starovoitov@gmail.com

authored by

Alexei Starovoitov and committed by
Andrii Nakryiko
63066b7a fea3f5e8

+304
+128
tools/testing/selftests/bpf/bpf_arena_strsearch.h
··· 1 + /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #pragma once 4 + #include "bpf_arena_common.h" 5 + 6 + __noinline int bpf_arena_strlen(const char __arena *s __arg_arena) 7 + { 8 + const char __arena *sc; 9 + 10 + for (sc = s; *sc != '\0'; ++sc) 11 + cond_break; 12 + return sc - s; 13 + } 14 + 15 + /** 16 + * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) 17 + * @pat: Shell-style pattern to match, e.g. "*.[ch]". 18 + * @str: String to match. The pattern must match the entire string. 19 + * 20 + * Perform shell-style glob matching, returning true (1) if the match 21 + * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). 22 + * 23 + * Pattern metacharacters are ?, *, [ and \. 24 + * (And, inside character classes, !, - and ].) 25 + * 26 + * This is small and simple implementation intended for device blacklists 27 + * where a string is matched against a number of patterns. Thus, it 28 + * does not preprocess the patterns. It is non-recursive, and run-time 29 + * is at most quadratic: strlen(@str)*strlen(@pat). 30 + * 31 + * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); 32 + * it takes 6 passes over the pattern before matching the string. 33 + * 34 + * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT 35 + * treat / or leading . specially; it isn't actually used for pathnames. 36 + * 37 + * Note that according to glob(7) (and unlike bash), character classes 38 + * are complemented by a leading !; this does not support the regex-style 39 + * [^a-z] syntax. 40 + * 41 + * An opening bracket without a matching close is matched literally. 42 + */ 43 + __noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena) 44 + { 45 + /* 46 + * Backtrack to previous * on mismatch and retry starting one 47 + * character later in the string. Because * matches all characters 48 + * (no exception for /), it can be easily proved that there's 49 + * never a need to backtrack multiple levels. 50 + */ 51 + char const __arena *back_pat = NULL, *back_str; 52 + 53 + /* 54 + * Loop over each token (character or class) in pat, matching 55 + * it against the remaining unmatched tail of str. Return false 56 + * on mismatch, or true after matching the trailing nul bytes. 57 + */ 58 + for (;;) { 59 + unsigned char c = *str++; 60 + unsigned char d = *pat++; 61 + 62 + switch (d) { 63 + case '?': /* Wildcard: anything but nul */ 64 + if (c == '\0') 65 + return false; 66 + break; 67 + case '*': /* Any-length wildcard */ 68 + if (*pat == '\0') /* Optimize trailing * case */ 69 + return true; 70 + back_pat = pat; 71 + back_str = --str; /* Allow zero-length match */ 72 + break; 73 + case '[': { /* Character class */ 74 + bool match = false, inverted = (*pat == '!'); 75 + char const __arena *class = pat + inverted; 76 + unsigned char a = *class++; 77 + 78 + /* 79 + * Iterate over each span in the character class. 80 + * A span is either a single character a, or a 81 + * range a-b. The first span may begin with ']'. 82 + */ 83 + do { 84 + unsigned char b = a; 85 + 86 + if (a == '\0') /* Malformed */ 87 + goto literal; 88 + 89 + if (class[0] == '-' && class[1] != ']') { 90 + b = class[1]; 91 + 92 + if (b == '\0') 93 + goto literal; 94 + 95 + class += 2; 96 + /* Any special action if a > b? */ 97 + } 98 + match |= (a <= c && c <= b); 99 + cond_break; 100 + } while ((a = *class++) != ']'); 101 + 102 + if (match == inverted) 103 + goto backtrack; 104 + pat = class; 105 + } 106 + break; 107 + case '\\': 108 + d = *pat++; 109 + __attribute__((__fallthrough__)); 110 + default: /* Literal character */ 111 + literal: 112 + if (c == d) { 113 + if (d == '\0') 114 + return true; 115 + break; 116 + } 117 + backtrack: 118 + if (c == '\0' || !back_pat) 119 + return false; /* No point continuing */ 120 + /* Try again from last *, one character later in str. */ 121 + pat = back_pat; 122 + str = ++back_str; 123 + break; 124 + } 125 + cond_break; 126 + } 127 + return false; 128 + }
+30
tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include <test_progs.h> 4 + #include "arena_strsearch.skel.h" 5 + 6 + static void test_arena_str(void) 7 + { 8 + LIBBPF_OPTS(bpf_test_run_opts, opts); 9 + struct arena_strsearch *skel; 10 + int ret; 11 + 12 + skel = arena_strsearch__open_and_load(); 13 + if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load")) 14 + return; 15 + 16 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts); 17 + ASSERT_OK(ret, "ret_add"); 18 + ASSERT_OK(opts.retval, "retval"); 19 + if (skel->bss->skip) { 20 + printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__); 21 + test__skip(); 22 + } 23 + arena_strsearch__destroy(skel); 24 + } 25 + 26 + void test_arena_strsearch(void) 27 + { 28 + if (test__start_subtest("arena_strsearch")) 29 + test_arena_str(); 30 + }
+146
tools/testing/selftests/bpf/progs/arena_strsearch.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include <vmlinux.h> 4 + #include "bpf_experimental.h" 5 + 6 + struct { 7 + __uint(type, BPF_MAP_TYPE_ARENA); 8 + __uint(map_flags, BPF_F_MMAPABLE); 9 + __uint(max_entries, 100); /* number of pages */ 10 + } arena SEC(".maps"); 11 + 12 + #include "bpf_arena_strsearch.h" 13 + 14 + struct glob_test { 15 + char const __arena *pat, *str; 16 + bool expected; 17 + }; 18 + 19 + static bool test(char const __arena *pat, char const __arena *str, bool expected) 20 + { 21 + bool match = glob_match(pat, str); 22 + bool success = match == expected; 23 + 24 + /* bpf_printk("glob_match %s %s res %d ok %d", pat, str, match, success); */ 25 + return success; 26 + } 27 + 28 + /* 29 + * The tests are all jammed together in one array to make it simpler 30 + * to place that array in the .init.rodata section. The obvious 31 + * "array of structures containing char *" has no way to force the 32 + * pointed-to strings to be in a particular section. 33 + * 34 + * Anyway, a test consists of: 35 + * 1. Expected glob_match result: '1' or '0'. 36 + * 2. Pattern to match: null-terminated string 37 + * 3. String to match against: null-terminated string 38 + * 39 + * The list of tests is terminated with a final '\0' instead of 40 + * a glob_match result character. 41 + */ 42 + static const char __arena glob_tests[] = 43 + /* Some basic tests */ 44 + "1" "a\0" "a\0" 45 + "0" "a\0" "b\0" 46 + "0" "a\0" "aa\0" 47 + "0" "a\0" "\0" 48 + "1" "\0" "\0" 49 + "0" "\0" "a\0" 50 + /* Simple character class tests */ 51 + "1" "[a]\0" "a\0" 52 + "0" "[a]\0" "b\0" 53 + "0" "[!a]\0" "a\0" 54 + "1" "[!a]\0" "b\0" 55 + "1" "[ab]\0" "a\0" 56 + "1" "[ab]\0" "b\0" 57 + "0" "[ab]\0" "c\0" 58 + "1" "[!ab]\0" "c\0" 59 + "1" "[a-c]\0" "b\0" 60 + "0" "[a-c]\0" "d\0" 61 + /* Corner cases in character class parsing */ 62 + "1" "[a-c-e-g]\0" "-\0" 63 + "0" "[a-c-e-g]\0" "d\0" 64 + "1" "[a-c-e-g]\0" "f\0" 65 + "1" "[]a-ceg-ik[]\0" "a\0" 66 + "1" "[]a-ceg-ik[]\0" "]\0" 67 + "1" "[]a-ceg-ik[]\0" "[\0" 68 + "1" "[]a-ceg-ik[]\0" "h\0" 69 + "0" "[]a-ceg-ik[]\0" "f\0" 70 + "0" "[!]a-ceg-ik[]\0" "h\0" 71 + "0" "[!]a-ceg-ik[]\0" "]\0" 72 + "1" "[!]a-ceg-ik[]\0" "f\0" 73 + /* Simple wild cards */ 74 + "1" "?\0" "a\0" 75 + "0" "?\0" "aa\0" 76 + "0" "??\0" "a\0" 77 + "1" "?x?\0" "axb\0" 78 + "0" "?x?\0" "abx\0" 79 + "0" "?x?\0" "xab\0" 80 + /* Asterisk wild cards (backtracking) */ 81 + "0" "*??\0" "a\0" 82 + "1" "*??\0" "ab\0" 83 + "1" "*??\0" "abc\0" 84 + "1" "*??\0" "abcd\0" 85 + "0" "??*\0" "a\0" 86 + "1" "??*\0" "ab\0" 87 + "1" "??*\0" "abc\0" 88 + "1" "??*\0" "abcd\0" 89 + "0" "?*?\0" "a\0" 90 + "1" "?*?\0" "ab\0" 91 + "1" "?*?\0" "abc\0" 92 + "1" "?*?\0" "abcd\0" 93 + "1" "*b\0" "b\0" 94 + "1" "*b\0" "ab\0" 95 + "0" "*b\0" "ba\0" 96 + "1" "*b\0" "bb\0" 97 + "1" "*b\0" "abb\0" 98 + "1" "*b\0" "bab\0" 99 + "1" "*bc\0" "abbc\0" 100 + "1" "*bc\0" "bc\0" 101 + "1" "*bc\0" "bbc\0" 102 + "1" "*bc\0" "bcbc\0" 103 + /* Multiple asterisks (complex backtracking) */ 104 + "1" "*ac*\0" "abacadaeafag\0" 105 + "1" "*ac*ae*ag*\0" "abacadaeafag\0" 106 + "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" 107 + "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" 108 + "1" "*abcd*\0" "abcabcabcabcdefg\0" 109 + "1" "*ab*cd*\0" "abcabcabcabcdefg\0" 110 + "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" 111 + "0" "*abcd*\0" "abcabcabcabcefg\0" 112 + "0" "*ab*cd*\0" "abcabcabcabcefg\0"; 113 + 114 + bool skip = false; 115 + 116 + SEC("syscall") 117 + int arena_strsearch(void *ctx) 118 + { 119 + unsigned successes = 0; 120 + unsigned n = 0; 121 + char const __arena *p = glob_tests; 122 + 123 + /* 124 + * Tests are jammed together in a string. The first byte is '1' 125 + * or '0' to indicate the expected outcome, or '\0' to indicate the 126 + * end of the tests. Then come two null-terminated strings: the 127 + * pattern and the string to match it against. 128 + */ 129 + while (*p) { 130 + bool expected = *p++ & 1; 131 + char const __arena *pat = p; 132 + 133 + cond_break; 134 + p += bpf_arena_strlen(p) + 1; 135 + successes += test(pat, p, expected); 136 + p += bpf_arena_strlen(p) + 1; 137 + n++; 138 + } 139 + 140 + n -= successes; 141 + /* bpf_printk("glob: %u self-tests passed, %u failed\n", successes, n); */ 142 + 143 + return n ? -1 : 0; 144 + } 145 + 146 + char _license[] SEC("license") = "GPL";