Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __TASK_LOCAL_DATA_H
3#define __TASK_LOCAL_DATA_H
4
5#include <errno.h>
6#include <fcntl.h>
7#include <sched.h>
8#include <stdatomic.h>
9#include <stddef.h>
10#include <stdlib.h>
11#include <string.h>
12#include <unistd.h>
13#include <sys/syscall.h>
14#include <sys/types.h>
15
16#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
17#include <pthread.h>
18#endif
19
20#include <bpf/bpf.h>
21
22/*
23 * OPTIONS
24 *
25 * Define the option before including the header
26 *
27 * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically
28 *
29 * Thread-specific memory for storing TLD is allocated lazily on the first call to
30 * tld_get_data(). The thread that calls it must also call tld_free() on thread exit
31 * to prevent memory leak. Pthread will be included if the option is defined. A pthread
32 * key will be registered with a destructor that calls tld_free().
33 *
34 *
35 * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically
36 * (default: 64 bytes)
37 *
38 * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using
39 * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be
40 * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated
41 * for these TLDs. This additional memory is allocated for every thread that calls
42 * tld_get_data() even if no tld_create_key are actually called, so be mindful of
43 * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory
44 * will be allocated for TLDs created with it.
45 *
46 *
47 * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62)
48 *
49 * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store,
50 * TLD_MAX_DATA_CNT.
51 *
52 *
53 * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc()
54 *
55 * When allocating the memory for storing TLDs, we need to make sure there is a memory
56 * region of the X bytes within a page. This is due to the limit posed by UPTR: memory
57 * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The
58 * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses
59 * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage
60 * as not all memory allocator can use the exact amount of memory requested to fulfill
61 * aligned_alloc(). For example, some may round the size up to the alignment. Enable the
62 * option to always use aligned_alloc() if the implementation has low memory overhead.
63 */
64
65#define TLD_PAGE_SIZE getpagesize()
66#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1))
67
68#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
69#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
70
71#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x))
72
73#ifndef TLD_DYN_DATA_SIZE
74#define TLD_DYN_DATA_SIZE 64
75#endif
76
77#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1)
78
79#ifndef TLD_NAME_LEN
80#define TLD_NAME_LEN 62
81#endif
82
83#ifdef __cplusplus
84extern "C" {
85#endif
86
87typedef struct {
88 __s16 off;
89} tld_key_t;
90
91struct tld_metadata {
92 char name[TLD_NAME_LEN];
93 _Atomic __u16 size;
94};
95
96struct tld_meta_u {
97 _Atomic __u8 cnt;
98 __u16 size;
99 struct tld_metadata metadata[];
100};
101
102struct tld_data_u {
103 __u64 start; /* offset of tld_data_u->data in a page */
104 char data[];
105};
106
107struct tld_map_value {
108 void *data;
109 struct tld_meta_u *meta;
110};
111
112struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak));
113__thread struct tld_data_u *tld_data_p __attribute__((weak));
114__thread void *tld_data_alloc_p __attribute__((weak));
115
116#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
117pthread_key_t tld_pthread_key __attribute__((weak));
118
119static void tld_free(void);
120
121static void __tld_thread_exit_handler(void *unused)
122{
123 tld_free();
124}
125#endif
126
127static int __tld_init_meta_p(void)
128{
129 struct tld_meta_u *meta, *uninit = NULL;
130 int err = 0;
131
132 meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE);
133 if (!meta) {
134 err = -ENOMEM;
135 goto out;
136 }
137
138 memset(meta, 0, TLD_PAGE_SIZE);
139 meta->size = TLD_DYN_DATA_SIZE;
140
141 if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) {
142 free(meta);
143 goto out;
144 }
145
146#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
147 pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
148#endif
149out:
150 return err;
151}
152
153static int __tld_init_data_p(int map_fd)
154{
155 bool use_aligned_alloc = false;
156 struct tld_map_value map_val;
157 struct tld_data_u *data;
158 void *data_alloc = NULL;
159 int err, tid_fd = -1;
160
161 tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL);
162 if (tid_fd < 0) {
163 err = -errno;
164 goto out;
165 }
166
167#ifdef TLD_DATA_USE_ALIGNED_ALLOC
168 use_aligned_alloc = true;
169#endif
170
171 /*
172 * tld_meta_p->size = TLD_DYN_DATA_SIZE +
173 * total size of TLDs defined via TLD_DEFINE_KEY()
174 */
175 data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ?
176 aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) :
177 malloc(tld_meta_p->size * 2);
178 if (!data_alloc) {
179 err = -ENOMEM;
180 goto out;
181 }
182
183 /*
184 * Always pass a page-aligned address to UPTR since the size of tld_map_value::data
185 * is a page in BTF. If data_alloc spans across two pages, use the page that contains large
186 * enough memory.
187 */
188 if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) {
189 map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc);
190 data = data_alloc;
191 data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) +
192 offsetof(struct tld_data_u, data);
193 } else {
194 map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
195 data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
196 data->start = offsetof(struct tld_data_u, data);
197 }
198 map_val.meta = TLD_READ_ONCE(tld_meta_p);
199
200 err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0);
201 if (err) {
202 free(data_alloc);
203 goto out;
204 }
205
206 tld_data_p = data;
207 tld_data_alloc_p = data_alloc;
208#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
209 pthread_setspecific(tld_pthread_key, (void *)1);
210#endif
211out:
212 if (tid_fd >= 0)
213 close(tid_fd);
214 return err;
215}
216
217static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
218{
219 int err, i, sz, off = 0;
220 __u8 cnt;
221
222 if (!TLD_READ_ONCE(tld_meta_p)) {
223 err = __tld_init_meta_p();
224 if (err)
225 return (tld_key_t){err};
226 }
227
228 for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
229retry:
230 cnt = atomic_load(&tld_meta_p->cnt);
231 if (i < cnt) {
232 /* A metadata is not ready until size is updated with a non-zero value */
233 while (!(sz = atomic_load(&tld_meta_p->metadata[i].size)))
234 sched_yield();
235
236 if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN))
237 return (tld_key_t){-EEXIST};
238
239 off += TLD_ROUND_UP(sz, 8);
240 continue;
241 }
242
243 /*
244 * TLD_DEFINE_KEY() is given memory upto a page while at most
245 * TLD_DYN_DATA_SIZE is allocated for tld_create_key()
246 */
247 if (dyn_data) {
248 if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size)
249 return (tld_key_t){-E2BIG};
250 } else {
251 if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u))
252 return (tld_key_t){-E2BIG};
253 tld_meta_p->size += TLD_ROUND_UP(size, 8);
254 }
255
256 /*
257 * Only one tld_create_key() can increase the current cnt by one and
258 * takes the latest available slot. Other threads will check again if a new
259 * TLD can still be added, and then compete for the new slot after the
260 * succeeding thread update the size.
261 */
262 if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1))
263 goto retry;
264
265 strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN);
266 atomic_store(&tld_meta_p->metadata[i].size, size);
267 return (tld_key_t){(__s16)off};
268 }
269
270 return (tld_key_t){-ENOSPC};
271}
272
273/**
274 * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD.
275 *
276 * @name: The name of the TLD
277 * @size: The size of the TLD
278 * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN
279 *
280 * The macro can only be used in file scope.
281 *
282 * A global variable key of opaque type, tld_key_t, will be declared and initialized before
283 * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key
284 * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD.
285 * bpf programs can also fetch the same key by name.
286 *
287 * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just
288 * enough memory will be allocated for each thread on the first call to tld_get_data().
289 */
290#define TLD_DEFINE_KEY(key, name, size) \
291tld_key_t key; \
292 \
293__attribute__((constructor)) \
294void __tld_define_key_##key(void) \
295{ \
296 key = __tld_create_key(name, size, false); \
297}
298
299/**
300 * tld_create_key() - Create a TLD and return a key associated with the TLD.
301 *
302 * @name: The name the TLD
303 * @size: The size of the TLD
304 *
305 * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check
306 * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to
307 * locate the TLD. bpf programs can also fetch the same key by name.
308 *
309 * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is
310 * not known statically or a TLD needs to be created conditionally)
311 *
312 * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs
313 * created dynamically with tld_create_key(). Since only a user page is pinned to the
314 * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE -
315 * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page.
316 */
317__attribute__((unused))
318static tld_key_t tld_create_key(const char *name, size_t size)
319{
320 return __tld_create_key(name, size, true);
321}
322
323__attribute__((unused))
324static inline bool tld_key_is_err(tld_key_t key)
325{
326 return key.off < 0;
327}
328
329__attribute__((unused))
330static inline int tld_key_err_or_zero(tld_key_t key)
331{
332 return tld_key_is_err(key) ? key.off : 0;
333}
334
335/**
336 * tld_get_data() - Get a pointer to the TLD associated with the given key of the
337 * calling thread.
338 *
339 * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map
340 * of task local data.
341 * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key().
342 *
343 * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD
344 * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte
345 * aligned.
346 *
347 * Threads that call tld_get_data() must call tld_free() on exit to prevent
348 * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined.
349 */
350__attribute__((unused))
351static void *tld_get_data(int map_fd, tld_key_t key)
352{
353 if (!TLD_READ_ONCE(tld_meta_p))
354 return NULL;
355
356 /* tld_data_p is allocated on the first invocation of tld_get_data() */
357 if (!tld_data_p && __tld_init_data_p(map_fd))
358 return NULL;
359
360 return tld_data_p->data + key.off;
361}
362
363/**
364 * tld_free() - Free task local data memory of the calling thread
365 *
366 * For the calling thread, all pointers to TLDs acquired before will become invalid.
367 *
368 * Users must call tld_free() on thread exit to prevent memory leak. Alternatively,
369 * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered
370 * to free the memory automatically.
371 */
372__attribute__((unused))
373static void tld_free(void)
374{
375 if (tld_data_alloc_p) {
376 free(tld_data_alloc_p);
377 tld_data_alloc_p = NULL;
378 tld_data_p = NULL;
379 }
380}
381
382#ifdef __cplusplus
383} /* extern "C" */
384#endif
385
386#endif /* __TASK_LOCAL_DATA_H */