1commit 6d1e3fb07b45e2e31e469b16cf21b24bccf8914c
2Author: Andreas K. Hüttel <dilfridge@gentoo.org>
3Date: Wed Jan 31 02:12:43 2024 +0100
4
5 Replace advisories directory
6
7 Signed-off-by: Andreas K. Hüttel <dilfridge@gentoo.org>
8
9diff --git a/advisories/GLIBC-SA-2023-0001 b/advisories/GLIBC-SA-2023-0001
10deleted file mode 100644
11index 3d19c91b6a..0000000000
12--- a/advisories/GLIBC-SA-2023-0001
13+++ /dev/null
14@@ -1,14 +0,0 @@
15-printf: incorrect output for integers with thousands separator and width field
16-
17-When the printf family of functions is called with a format specifier
18-that uses an <apostrophe> (enable grouping) and a minimum width
19-specifier, the resulting output could be larger than reasonably expected
20-by a caller that computed a tight bound on the buffer size. The
21-resulting larger than expected output could result in a buffer overflow
22-in the printf family of functions.
23-
24-CVE-Id: CVE-2023-25139
25-Public-Date: 2023-02-02
26-Vulnerable-Commit: e88b9f0e5cc50cab57a299dc7efe1a4eb385161d (2.37)
27-Fix-Commit: c980549cc6a1c03c23cc2fe3e7b0fe626a0364b0 (2.38)
28-Fix-Commit: 07b9521fc6369d000216b96562ff7c0ed32a16c4 (2.37-4)
29diff --git a/advisories/GLIBC-SA-2023-0002 b/advisories/GLIBC-SA-2023-0002
30deleted file mode 100644
31index 5122669a64..0000000000
32--- a/advisories/GLIBC-SA-2023-0002
33+++ /dev/null
34@@ -1,15 +0,0 @@
35-getaddrinfo: Stack read overflow in no-aaaa mode
36-
37-If the system is configured in no-aaaa mode via /etc/resolv.conf,
38-getaddrinfo is called for the AF_UNSPEC address family, and a DNS
39-response is received over TCP that is larger than 2048 bytes,
40-getaddrinfo may potentially disclose stack contents via the returned
41-address data, or crash.
42-
43-CVE-Id: CVE-2023-4527
44-Public-Date: 2023-09-12
45-Vulnerable-Commit: f282cdbe7f436c75864e5640a409a10485e9abb2 (2.36)
46-Fix-Commit: bd77dd7e73e3530203be1c52c8a29d08270cb25d (2.39)
47-Fix-Commit: 4ea972b7edd7e36610e8cde18bf7a8149d7bac4f (2.36-113)
48-Fix-Commit: b7529346025a130fee483d42178b5c118da971bb (2.37-38)
49-Fix-Commit: b25508dd774b617f99419bdc3cf2ace4560cd2d6 (2.38-19)
50diff --git a/advisories/GLIBC-SA-2023-0003 b/advisories/GLIBC-SA-2023-0003
51deleted file mode 100644
52index d3aef80348..0000000000
53--- a/advisories/GLIBC-SA-2023-0003
54+++ /dev/null
55@@ -1,15 +0,0 @@
56-getaddrinfo: Potential use-after-free
57-
58-When an NSS plugin only implements the _gethostbyname2_r and
59-_getcanonname_r callbacks, getaddrinfo could use memory that was freed
60-during buffer resizing, potentially causing a crash or read or write to
61-arbitrary memory.
62-
63-CVE-Id: CVE-2023-4806
64-Public-Date: 2023-09-12
65-Fix-Commit: 973fe93a5675c42798b2161c6f29c01b0e243994 (2.39)
66-Fix-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420)
67-Fix-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270)
68-Fix-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115)
69-Fix-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39)
70-Fix-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20)
71diff --git a/advisories/GLIBC-SA-2023-0004 b/advisories/GLIBC-SA-2023-0004
72deleted file mode 100644
73index 5286a7aa54..0000000000
74--- a/advisories/GLIBC-SA-2023-0004
75+++ /dev/null
76@@ -1,16 +0,0 @@
77-tunables: local privilege escalation through buffer overflow
78-
79-If a tunable of the form NAME=NAME=VAL is passed in the environment of a
80-setuid program and NAME is valid, it may result in a buffer overflow,
81-which could be exploited to achieve escalated privileges. This flaw was
82-introduced in glibc 2.34.
83-
84-CVE-Id: CVE-2023-4911
85-Public-Date: 2023-10-03
86-Vulnerable-Commit: 2ed18c5b534d9e92fc006202a5af0df6b72e7aca (2.34)
87-Fix-Commit: 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa (2.39)
88-Fix-Commit: dcc367f148bc92e7f3778a125f7a416b093964d9 (2.34-423)
89-Fix-Commit: c84018a05aec80f5ee6f682db0da1130b0196aef (2.35-274)
90-Fix-Commit: 22955ad85186ee05834e47e665056148ca07699c (2.36-118)
91-Fix-Commit: b4e23c75aea756b4bddc4abcf27a1c6dca8b6bd3 (2.37-45)
92-Fix-Commit: 750a45a783906a19591fb8ff6b7841470f1f5701 (2.38-27)
93diff --git a/advisories/GLIBC-SA-2023-0005 b/advisories/GLIBC-SA-2023-0005
94deleted file mode 100644
95index cc4eb90b82..0000000000
96--- a/advisories/GLIBC-SA-2023-0005
97+++ /dev/null
98@@ -1,18 +0,0 @@
99-getaddrinfo: DoS due to memory leak
100-
101-The fix for CVE-2023-4806 introduced a memory leak when an application
102-calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED
103-flags set.
104-
105-CVE-Id: CVE-2023-5156
106-Public-Date: 2023-09-25
107-Vulnerable-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420)
108-Vulnerable-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270)
109-Vulnerable-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115)
110-Vulnerable-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39)
111-Vulnerable-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20)
112-Fix-Commit: 8006457ab7e1cd556b919f477348a96fe88f2e49 (2.34-421)
113-Fix-Commit: 17092c0311f954e6f3c010f73ce3a78c24ac279a (2.35-272)
114-Fix-Commit: 856bac55f98dc840e7c27cfa82262b933385de90 (2.36-116)
115-Fix-Commit: 4473d1b87d04b25cdd0e0354814eeaa421328268 (2.37-42)
116-Fix-Commit: 5ee59ca371b99984232d7584fe2b1a758b4421d3 (2.38-24)
117diff --git a/advisories/GLIBC-SA-2024-0001 b/advisories/GLIBC-SA-2024-0001
118deleted file mode 100644
119index 28931c75ae..0000000000
120--- a/advisories/GLIBC-SA-2024-0001
121+++ /dev/null
122@@ -1,15 +0,0 @@
123-syslog: Heap buffer overflow in __vsyslog_internal
124-
125-__vsyslog_internal did not handle a case where printing a SYSLOG_HEADER
126-containing a long program name failed to update the required buffer
127-size, leading to the allocation and overflow of a too-small buffer on
128-the heap.
129-
130-CVE-Id: CVE-2023-6246
131-Public-Date: 2024-01-30
132-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37)
133-Fix-Commit: 6bd0e4efcc78f3c0115e5ea9739a1642807450da (2.39)
134-Fix-Commit: 23514c72b780f3da097ecf33a793b7ba9c2070d2 (2.38-42)
135-Fix-Commit: 97a4292aa4a2642e251472b878d0ec4c46a0e59a (2.37-57)
136-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16)
137-Fix-Commit: d1a83b6767f68b3cb5b4b4ea2617254acd040c82 (2.36-126)
138diff --git a/advisories/GLIBC-SA-2024-0002 b/advisories/GLIBC-SA-2024-0002
139deleted file mode 100644
140index 940bfcf2fc..0000000000
141--- a/advisories/GLIBC-SA-2024-0002
142+++ /dev/null
143@@ -1,15 +0,0 @@
144-syslog: Heap buffer overflow in __vsyslog_internal
145-
146-__vsyslog_internal used the return value of snprintf/vsnprintf to
147-calculate buffer sizes for memory allocation. If these functions (for
148-any reason) failed and returned -1, the resulting buffer would be too
149-small to hold output.
150-
151-CVE-Id: CVE-2023-6779
152-Public-Date: 2024-01-30
153-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37)
154-Fix-Commit: 7e5a0c286da33159d47d0122007aac016f3e02cd (2.39)
155-Fix-Commit: d0338312aace5bbfef85e03055e1212dd0e49578 (2.38-43)
156-Fix-Commit: 67062eccd9a65d7fda9976a56aeaaf6c25a80214 (2.37-58)
157-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16)
158-Fix-Commit: 2bc9d7c002bdac38b5c2a3f11b78e309d7765b83 (2.36-127)
159diff --git a/advisories/GLIBC-SA-2024-0003 b/advisories/GLIBC-SA-2024-0003
160deleted file mode 100644
161index b43a5150ab..0000000000
162--- a/advisories/GLIBC-SA-2024-0003
163+++ /dev/null
164@@ -1,13 +0,0 @@
165-syslog: Integer overflow in __vsyslog_internal
166-
167-__vsyslog_internal calculated a buffer size by adding two integers, but
168-did not first check if the addition would overflow.
169-
170-CVE-Id: CVE-2023-6780
171-Public-Date: 2024-01-30
172-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37)
173-Fix-Commit: ddf542da94caf97ff43cc2875c88749880b7259b (2.39)
174-Fix-Commit: d37c2b20a4787463d192b32041c3406c2bd91de0 (2.38-44)
175-Fix-Commit: 2b58cba076e912961ceaa5fa58588e4b10f791c0 (2.37-59)
176-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16)
177-Fix-Commit: b9b7d6a27aa0632f334352fa400771115b3c69b7 (2.36-128)
178diff --git a/advisories/README b/advisories/README
179deleted file mode 100644
180index 94e68b1350..0000000000
181--- a/advisories/README
182+++ /dev/null
183@@ -1,73 +0,0 @@
184-GNU C Library Security Advisory Format
185-======================================
186-
187-Security advisories in this directory follow a simple git commit log
188-format, with a heading and free-format description augmented with tags
189-to allow parsing key information. References to code changes are
190-specific to the glibc repository and follow a specific format:
191-
192- Tag-name: <commit-ref> (release-version)
193-
194-The <commit-ref> indicates a specific commit in the repository. The
195-release-version indicates the publicly consumable release in which this
196-commit is known to exist. The release-version is derived from the
197-git-describe format, (i.e. stripped out from glibc-2.34.NNN-gxxxx) and
198-is of the form 2.34-NNN. If the -NNN suffix is absent, it means that
199-the change is in that release tarball, otherwise the change is on the
200-release/2.YY/master branch and not in any released tarball.
201-
202-The following tags are currently being used:
203-
204-CVE-Id:
205-This is the CVE-Id assigned under the CVE Program
206-(https://www.cve.org/).
207-
208-Public-Date:
209-The date this issue became publicly known.
210-
211-Vulnerable-Commit:
212-The commit that introduced this vulnerability. There could be multiple
213-entries, one for each release branch in the glibc repository; the
214-release-version portion of this tag should tell you which branch this is
215-on.
216-
217-Fix-Commit:
218-The commit that fixed this vulnerability. There could be multiple
219-entries for each release branch in the glibc repository, indicating that
220-all of those commits contributed to fixing that issue in each of those
221-branches.
222-
223-Adding an Advisory
224-------------------
225-
226-An advisory for a CVE needs to be added on the master branch in two steps:
227-
228-1. Add the text of the advisory without any Fix-Commit tags along with
229- the fix for the CVE. Add the Vulnerable-Commit tag, if applicable.
230- The advisories directory does not exist in release branches, so keep
231- the advisory text commit distinct from the code changes, to ease
232- backports. Ask for the GLIBC-SA advisory number from the security
233- team.
234-
235-2. Finish all backports on release branches and then back on the msater
236- branch, add all commit refs to the advisory using the Fix-Commit
237- tags. Don't bother adding the release-version subscript since the
238- next step will overwrite it.
239-
240-3. Run the process-advisories.sh script in the scripts directory on the
241- advisory:
242-
243- scripts/process-advisories.sh update GLIBC-SA-YYYY-NNNN
244-
245- (replace YYYY-NNNN with the actual advisory number).
246-
247-4. Verify the updated advisory and push the result.
248-
249-Getting a NEWS snippet from advisories
250---------------------------------------
251-
252-Run:
253-
254- scripts/process-advisories.sh news
255-
256-and copy the content into the NEWS file.
257
258commit 63295e4fda1f6dab4bf7442706fe303bf283036c
259Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
260Date: Mon Feb 5 16:10:24 2024 +0000
261
262 arm: Remove wrong ldr from _dl_start_user (BZ 31339)
263
264 The commit 49d877a80b29d3002887b084eec6676d9f5fec18 (arm: Remove
265 _dl_skip_args usage) removed the _SKIP_ARGS literal, which was
266 previously loader to r4 on loader _start. However, the cleanup did not
267 remove the following 'ldr r4, [sl, r4]' on _dl_start_user, used to check
268 to skip the arguments after ld self-relocations.
269
270 In my testing, the kernel initially set r4 to 0, which makes the
271 ldr instruction just read the _GLOBAL_OFFSET_TABLE_. However, since r4
272 is a callee-saved register; a different runtime might not zero
273 initialize it and thus trigger an invalid memory access.
274
275 Checked on arm-linux-gnu.
276
277 Reported-by: Adrian Ratiu <adrian.ratiu@collabora.com>
278 Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
279 (cherry picked from commit 1e25112dc0cb2515d27d8d178b1ecce778a9d37a)
280
281diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
282index b857bbc868..dd1a0f6b6e 100644
283--- a/sysdeps/arm/dl-machine.h
284+++ b/sysdeps/arm/dl-machine.h
285@@ -139,7 +139,6 @@ _start:\n\
286 _dl_start_user:\n\
287 adr r6, .L_GET_GOT\n\
288 add sl, sl, r6\n\
289- ldr r4, [sl, r4]\n\
290 @ save the entry point in another register\n\
291 mov r6, r0\n\
292 @ get the original arg count\n\
293
294commit 312e159626b67fe11f39e83e222cf4348a3962f3
295Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
296Date: Thu Feb 1 14:29:53 2024 -0300
297
298 mips: FIx clone3 implementation (BZ 31325)
299
300 For o32 we need to setup a minimal stack frame to allow cprestore
301 on __thread_start_clone3 (which instruct the linker to save the
302 gp for PIC). Also, there is no guarantee by kABI that $8 will be
303 preserved after syscall execution, so we need to save it on the
304 provided stack.
305
306 Checked on mipsel-linux-gnu.
307
308 Reported-by: Khem Raj <raj.khem@gmail.com>
309 Tested-by: Khem Raj <raj.khem@gmail.com>
310 (cherry picked from commit bbd248ac0d75efdef8fe61ea69b1fb25fb95b6e7)
311
312diff --git a/sysdeps/unix/sysv/linux/mips/clone3.S b/sysdeps/unix/sysv/linux/mips/clone3.S
313index e9fec2fa47..481b8ae963 100644
314--- a/sysdeps/unix/sysv/linux/mips/clone3.S
315+++ b/sysdeps/unix/sysv/linux/mips/clone3.S
316@@ -37,11 +37,6 @@
317
318 .text
319 .set nomips16
320-#if _MIPS_SIM == _ABIO32
321-# define EXTRA_LOCALS 1
322-#else
323-# define EXTRA_LOCALS 0
324-#endif
325 #define FRAMESZ ((NARGSAVE*SZREG)+ALSZ)&ALMASK
326 GPOFF= FRAMESZ-(1*SZREG)
327 NESTED(__clone3, SZREG, sp)
328@@ -68,8 +63,31 @@ NESTED(__clone3, SZREG, sp)
329 beqz a0, L(error) /* No NULL cl_args pointer. */
330 beqz a2, L(error) /* No NULL function pointer. */
331
332+#if _MIPS_SIM == _ABIO32
333+ /* Both stack and stack_size on clone_args are defined as uint64_t, and
334+ there is no need to handle values larger than to 32 bits for o32. */
335+# if __BYTE_ORDER == __BIG_ENDIAN
336+# define CL_STACKPOINTER_OFFSET 44
337+# define CL_STACKSIZE_OFFSET 52
338+# else
339+# define CL_STACKPOINTER_OFFSET 40
340+# define CL_STACKSIZE_OFFSET 48
341+# endif
342+
343+ /* For o32 we need to setup a minimal stack frame to allow cprestore
344+ on __thread_start_clone3. Also there is no guarantee by kABI that
345+ $8 will be preserved after syscall execution (so we need to save it
346+ on the provided stack). */
347+ lw t0, CL_STACKPOINTER_OFFSET(a0) /* Load the stack pointer. */
348+ lw t1, CL_STACKSIZE_OFFSET(a0) /* Load the stack_size. */
349+ addiu t1, -32 /* Update the stack size. */
350+ addu t2, t1, t0 /* Calculate the thread stack. */
351+ sw a3, 0(t2) /* Save argument pointer. */
352+ sw t1, CL_STACKSIZE_OFFSET(a0) /* Save the new stack size. */
353+#else
354 move $8, a3 /* a3 is set to 0/1 for syscall success/error
355 while a4/$8 is returned unmodified. */
356+#endif
357
358 /* Do the system call, the kernel expects:
359 v0: system call number
360@@ -125,7 +143,11 @@ L(thread_start_clone3):
361
362 /* Restore the arg for user's function. */
363 move t9, a2 /* Function pointer. */
364+#if _MIPS_SIM == _ABIO32
365+ PTR_L a0, 0(sp)
366+#else
367 move a0, $8 /* Argument pointer. */
368+#endif
369
370 /* Call the user's function. */
371 jal t9
372
373commit d0724994de40934c552f1f68de89053848a44927
374Author: Xi Ruoyao <xry111@xry111.site>
375Date: Thu Feb 22 21:26:55 2024 +0100
376
377 math: Update mips64 ulps
378
379 Signed-off-by: Andreas K. Hüttel <dilfridge@gentoo.org>
380 (cherry picked from commit e2a65ecc4b30a797df7dc6529f09b712aa256029)
381
382diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps
383index 78969745b2..933aba4735 100644
384--- a/sysdeps/mips/mips64/libm-test-ulps
385+++ b/sysdeps/mips/mips64/libm-test-ulps
386@@ -1066,17 +1066,17 @@ double: 1
387 ldouble: 1
388
389 Function: "j0":
390-double: 2
391+double: 3
392 float: 9
393 ldouble: 2
394
395 Function: "j0_downward":
396-double: 5
397+double: 6
398 float: 9
399 ldouble: 9
400
401 Function: "j0_towardzero":
402-double: 6
403+double: 7
404 float: 9
405 ldouble: 9
406
407@@ -1146,6 +1146,7 @@ float: 6
408 ldouble: 8
409
410 Function: "log":
411+double: 1
412 float: 1
413 ldouble: 1
414
415
416commit e0910f1d3278f05439fb434ee528fc9be1b6bd5e
417Author: Stefan Liebler <stli@linux.ibm.com>
418Date: Thu Feb 22 15:03:27 2024 +0100
419
420 S390: Do not clobber r7 in clone [BZ #31402]
421
422 Starting with commit e57d8fc97b90127de4ed3e3a9cdf663667580935
423 "S390: Always use svc 0"
424 clone clobbers the call-saved register r7 in error case:
425 function or stack is NULL.
426
427 This patch restores the saved registers also in the error case.
428 Furthermore the existing test misc/tst-clone is extended to check
429 all error cases and that clone does not clobber registers in this
430 error case.
431
432 (cherry picked from commit 02782fd12849b6673cb5c2728cb750e8ec295aa3)
433
434diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S
435index 4c882ef2ee..a7a863242c 100644
436--- a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S
437+++ b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S
438@@ -53,6 +53,7 @@ ENTRY(__clone)
439 br %r14
440 error:
441 lhi %r2,-EINVAL
442+ lm %r6,%r7,24(%r15) /* Load registers. */
443 j SYSCALL_ERROR_LABEL
444 PSEUDO_END (__clone)
445
446diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S
447index 4eb104be71..c552a6b8de 100644
448--- a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S
449+++ b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S
450@@ -54,6 +54,7 @@ ENTRY(__clone)
451 br %r14
452 error:
453 lghi %r2,-EINVAL
454+ lmg %r6,%r7,48(%r15) /* Restore registers. */
455 jg SYSCALL_ERROR_LABEL
456 PSEUDO_END (__clone)
457
458diff --git a/sysdeps/unix/sysv/linux/tst-clone.c b/sysdeps/unix/sysv/linux/tst-clone.c
459index 470676ab2b..2bc7124983 100644
460--- a/sysdeps/unix/sysv/linux/tst-clone.c
461+++ b/sysdeps/unix/sysv/linux/tst-clone.c
462@@ -16,12 +16,16 @@
463 License along with the GNU C Library; if not, see
464 <https://www.gnu.org/licenses/>. */
465
466-/* BZ #2386 */
467+/* BZ #2386, BZ #31402 */
468 #include <errno.h>
469 #include <stdio.h>
470 #include <stdlib.h>
471 #include <unistd.h>
472 #include <sched.h>
473+#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */
474+#include <support/check.h>
475+
476+volatile unsigned v = 0xdeadbeef;
477
478 int child_fn(void *arg)
479 {
480@@ -30,22 +34,67 @@ int child_fn(void *arg)
481 }
482
483 static int
484-do_test (void)
485+__attribute__((noinline))
486+do_clone (int (*fn)(void *), void *stack)
487 {
488 int result;
489+ unsigned int a = v;
490+ unsigned int b = v;
491+ unsigned int c = v;
492+ unsigned int d = v;
493+ unsigned int e = v;
494+ unsigned int f = v;
495+ unsigned int g = v;
496+ unsigned int h = v;
497+ unsigned int i = v;
498+ unsigned int j = v;
499+ unsigned int k = v;
500+ unsigned int l = v;
501+ unsigned int m = v;
502+ unsigned int n = v;
503+ unsigned int o = v;
504+
505+ result = clone (fn, stack, 0, NULL);
506+
507+ /* Check that clone does not clobber call-saved registers. */
508+ TEST_VERIFY (a == v && b == v && c == v && d == v && e == v && f == v
509+ && g == v && h == v && i == v && j == v && k == v && l == v
510+ && m == v && n == v && o == v);
511+
512+ return result;
513+}
514+
515+static void
516+__attribute__((noinline))
517+do_test_single (int (*fn)(void *), void *stack)
518+{
519+ printf ("%s (fn=%p, stack=%p)\n", __FUNCTION__, fn, stack);
520+ errno = 0;
521+
522+ int result = do_clone (fn, stack);
523+
524+ TEST_COMPARE (errno, EINVAL);
525+ TEST_COMPARE (result, -1);
526+}
527
528- result = clone (child_fn, NULL, 0, NULL);
529+static int
530+do_test (void)
531+{
532+ char st[128 * 1024] __attribute__ ((aligned));
533+ void *stack = NULL;
534+#if _STACK_GROWS_DOWN
535+ stack = st + sizeof (st);
536+#elif _STACK_GROWS_UP
537+ stack = st;
538+#else
539+# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
540+#endif
541
542- if (errno != EINVAL || result != -1)
543- {
544- printf ("FAIL: clone()=%d (wanted -1) errno=%d (wanted %d)\n",
545- result, errno, EINVAL);
546- return 1;
547- }
548+ do_test_single (child_fn, NULL);
549+ do_test_single (NULL, stack);
550+ do_test_single (NULL, NULL);
551
552- puts ("All OK");
553 return 0;
554 }
555
556-#define TEST_FUNCTION do_test ()
557-#include "../test-skeleton.c"
558+#include <support/test-driver.c>
559
560commit 1b9c1a0047fb26a65a9b2a7b8cd977243f7d353c
561Author: Jakub Jelinek <jakub@redhat.com>
562Date: Wed Jan 31 19:17:27 2024 +0100
563
564 Use gcc __builtin_stdc_* builtins in stdbit.h if possible
565
566 The following patch uses the GCC 14 __builtin_stdc_* builtins in stdbit.h
567 for the type-generic macros, so that when compiled with GCC 14 or later,
568 it supports not just 8/16/32/64-bit unsigned integers, but also 128-bit
569 (if target supports them) and unsigned _BitInt (any supported precision).
570 And so that the macros don't expand arguments multiple times and can be
571 evaluated in constant expressions.
572
573 The new testcase is gcc's gcc/testsuite/gcc.dg/builtin-stdc-bit-1.c
574 adjusted to test stdbit.h and the type-generic macros in there instead
575 of the builtins and adjusted to use glibc test framework rather than
576 gcc style tests with __builtin_abort ().
577
578 Signed-off-by: Jakub Jelinek <jakub@redhat.com>
579 Reviewed-by: Joseph Myers <josmyers@redhat.com>
580 (cherry picked from commit da89496337b97e6a2aaf1e81d55cf998f6db1070)
581
582diff --git a/manual/stdbit.texi b/manual/stdbit.texi
583index fe41c671d8..6c75ed9a20 100644
584--- a/manual/stdbit.texi
585+++ b/manual/stdbit.texi
586@@ -32,7 +32,13 @@ and @code{unsigned long long int}. In addition, there is a
587 corresponding type-generic macro (not listed below), named the same as
588 the functions but without any suffix such as @samp{_uc}. The
589 type-generic macro can only be used with an argument of an unsigned
590-integer type with a width of 8, 16, 32 or 64 bits.
591+integer type with a width of 8, 16, 32 or 64 bits, or when using
592+a compiler with support for
593+@uref{https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html,@code{__builtin_stdc_bit_ceil}},
594+etc.@:, built-in functions such as GCC 14.1 or later
595+any unsigned integer type those built-in functions support.
596+In GCC 14.1 that includes support for @code{unsigned __int128} and
597+@code{unsigned _BitInt(@var{n})} if supported by the target.
598
599 @deftypefun {unsigned int} stdc_leading_zeros_uc (unsigned char @var{x})
600 @deftypefunx {unsigned int} stdc_leading_zeros_us (unsigned short @var{x})
601diff --git a/stdlib/Makefile b/stdlib/Makefile
602index d587f054d1..9898cc5d8a 100644
603--- a/stdlib/Makefile
604+++ b/stdlib/Makefile
605@@ -308,6 +308,7 @@ tests := \
606 tst-setcontext10 \
607 tst-setcontext11 \
608 tst-stdbit-Wconversion \
609+ tst-stdbit-builtins \
610 tst-stdc_bit_ceil \
611 tst-stdc_bit_floor \
612 tst-stdc_bit_width \
613diff --git a/stdlib/stdbit.h b/stdlib/stdbit.h
614index f334eb174d..2801590c63 100644
615--- a/stdlib/stdbit.h
616+++ b/stdlib/stdbit.h
617@@ -64,9 +64,13 @@ extern unsigned int stdc_leading_zeros_ul (unsigned long int __x)
618 __extension__
619 extern unsigned int stdc_leading_zeros_ull (unsigned long long int __x)
620 __THROW __attribute_const__;
621-#define stdc_leading_zeros(x) \
622+#if __glibc_has_builtin (__builtin_stdc_leading_zeros)
623+# define stdc_leading_zeros(x) (__builtin_stdc_leading_zeros (x))
624+#else
625+# define stdc_leading_zeros(x) \
626 (stdc_leading_zeros_ull (x) \
627 - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x))))
628+#endif
629
630 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
631 static __always_inline unsigned int
632@@ -116,9 +120,13 @@ extern unsigned int stdc_leading_ones_ul (unsigned long int __x)
633 __extension__
634 extern unsigned int stdc_leading_ones_ull (unsigned long long int __x)
635 __THROW __attribute_const__;
636-#define stdc_leading_ones(x) \
637+#if __glibc_has_builtin (__builtin_stdc_leading_ones)
638+# define stdc_leading_ones(x) (__builtin_stdc_leading_ones (x))
639+#else
640+# define stdc_leading_ones(x) \
641 (stdc_leading_ones_ull ((unsigned long long int) (x) \
642 << 8 * (sizeof (0ULL) - sizeof (x))))
643+#endif
644
645 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
646 static __always_inline unsigned int
647@@ -168,11 +176,15 @@ extern unsigned int stdc_trailing_zeros_ul (unsigned long int __x)
648 __extension__
649 extern unsigned int stdc_trailing_zeros_ull (unsigned long long int __x)
650 __THROW __attribute_const__;
651-#define stdc_trailing_zeros(x) \
652+#if __glibc_has_builtin (__builtin_stdc_trailing_zeros)
653+# define stdc_trailing_zeros(x) (__builtin_stdc_trailing_zeros (x))
654+#else
655+# define stdc_trailing_zeros(x) \
656 (sizeof (x) == 8 ? stdc_trailing_zeros_ull (x) \
657 : sizeof (x) == 4 ? stdc_trailing_zeros_ui (x) \
658 : sizeof (x) == 2 ? stdc_trailing_zeros_us (__pacify_uint16 (x)) \
659 : stdc_trailing_zeros_uc (__pacify_uint8 (x)))
660+#endif
661
662 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
663 static __always_inline unsigned int
664@@ -222,7 +234,11 @@ extern unsigned int stdc_trailing_ones_ul (unsigned long int __x)
665 __extension__
666 extern unsigned int stdc_trailing_ones_ull (unsigned long long int __x)
667 __THROW __attribute_const__;
668-#define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x))
669+#if __glibc_has_builtin (__builtin_stdc_trailing_ones)
670+# define stdc_trailing_ones(x) (__builtin_stdc_trailing_ones (x))
671+#else
672+# define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x))
673+#endif
674
675 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
676 static __always_inline unsigned int
677@@ -272,11 +288,15 @@ extern unsigned int stdc_first_leading_zero_ul (unsigned long int __x)
678 __extension__
679 extern unsigned int stdc_first_leading_zero_ull (unsigned long long int __x)
680 __THROW __attribute_const__;
681-#define stdc_first_leading_zero(x) \
682+#if __glibc_has_builtin (__builtin_stdc_first_leading_zero)
683+# define stdc_first_leading_zero(x) (__builtin_stdc_first_leading_zero (x))
684+#else
685+# define stdc_first_leading_zero(x) \
686 (sizeof (x) == 8 ? stdc_first_leading_zero_ull (x) \
687 : sizeof (x) == 4 ? stdc_first_leading_zero_ui (x) \
688 : sizeof (x) == 2 ? stdc_first_leading_zero_us (__pacify_uint16 (x)) \
689 : stdc_first_leading_zero_uc (__pacify_uint8 (x)))
690+#endif
691
692 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
693 static __always_inline unsigned int
694@@ -326,11 +346,15 @@ extern unsigned int stdc_first_leading_one_ul (unsigned long int __x)
695 __extension__
696 extern unsigned int stdc_first_leading_one_ull (unsigned long long int __x)
697 __THROW __attribute_const__;
698-#define stdc_first_leading_one(x) \
699+#if __glibc_has_builtin (__builtin_stdc_first_leading_one)
700+# define stdc_first_leading_one(x) (__builtin_stdc_first_leading_one (x))
701+#else
702+# define stdc_first_leading_one(x) \
703 (sizeof (x) == 8 ? stdc_first_leading_one_ull (x) \
704 : sizeof (x) == 4 ? stdc_first_leading_one_ui (x) \
705 : sizeof (x) == 2 ? stdc_first_leading_one_us (__pacify_uint16 (x)) \
706 : stdc_first_leading_one_uc (__pacify_uint8 (x)))
707+#endif
708
709 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
710 static __always_inline unsigned int
711@@ -380,11 +404,15 @@ extern unsigned int stdc_first_trailing_zero_ul (unsigned long int __x)
712 __extension__
713 extern unsigned int stdc_first_trailing_zero_ull (unsigned long long int __x)
714 __THROW __attribute_const__;
715-#define stdc_first_trailing_zero(x) \
716+#if __glibc_has_builtin (__builtin_stdc_first_trailing_zero)
717+# define stdc_first_trailing_zero(x) (__builtin_stdc_first_trailing_zero (x))
718+#else
719+# define stdc_first_trailing_zero(x) \
720 (sizeof (x) == 8 ? stdc_first_trailing_zero_ull (x) \
721 : sizeof (x) == 4 ? stdc_first_trailing_zero_ui (x) \
722 : sizeof (x) == 2 ? stdc_first_trailing_zero_us (__pacify_uint16 (x)) \
723 : stdc_first_trailing_zero_uc (__pacify_uint8 (x)))
724+#endif
725
726 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
727 static __always_inline unsigned int
728@@ -434,11 +462,15 @@ extern unsigned int stdc_first_trailing_one_ul (unsigned long int __x)
729 __extension__
730 extern unsigned int stdc_first_trailing_one_ull (unsigned long long int __x)
731 __THROW __attribute_const__;
732-#define stdc_first_trailing_one(x) \
733+#if __glibc_has_builtin (__builtin_stdc_first_trailing_one)
734+# define stdc_first_trailing_one(x) (__builtin_stdc_first_trailing_one (x))
735+#else
736+# define stdc_first_trailing_one(x) \
737 (sizeof (x) == 8 ? stdc_first_trailing_one_ull (x) \
738 : sizeof (x) == 4 ? stdc_first_trailing_one_ui (x) \
739 : sizeof (x) == 2 ? stdc_first_trailing_one_us (__pacify_uint16 (x)) \
740 : stdc_first_trailing_one_uc (__pacify_uint8 (x)))
741+#endif
742
743 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
744 static __always_inline unsigned int
745@@ -488,9 +520,13 @@ extern unsigned int stdc_count_zeros_ul (unsigned long int __x)
746 __extension__
747 extern unsigned int stdc_count_zeros_ull (unsigned long long int __x)
748 __THROW __attribute_const__;
749-#define stdc_count_zeros(x) \
750+#if __glibc_has_builtin (__builtin_stdc_count_zeros)
751+# define stdc_count_zeros(x) (__builtin_stdc_count_zeros (x))
752+#else
753+# define stdc_count_zeros(x) \
754 (stdc_count_zeros_ull (x) \
755 - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x))))
756+#endif
757
758 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll)
759 static __always_inline unsigned int
760@@ -540,7 +576,11 @@ extern unsigned int stdc_count_ones_ul (unsigned long int __x)
761 __extension__
762 extern unsigned int stdc_count_ones_ull (unsigned long long int __x)
763 __THROW __attribute_const__;
764-#define stdc_count_ones(x) (stdc_count_ones_ull (x))
765+#if __glibc_has_builtin (__builtin_stdc_count_ones)
766+# define stdc_count_ones(x) (__builtin_stdc_count_ones (x))
767+#else
768+# define stdc_count_ones(x) (stdc_count_ones_ull (x))
769+#endif
770
771 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll)
772 static __always_inline unsigned int
773@@ -590,10 +630,14 @@ extern bool stdc_has_single_bit_ul (unsigned long int __x)
774 __extension__
775 extern bool stdc_has_single_bit_ull (unsigned long long int __x)
776 __THROW __attribute_const__;
777-#define stdc_has_single_bit(x) \
778+#if __glibc_has_builtin (__builtin_stdc_has_single_bit)
779+# define stdc_has_single_bit(x) (__builtin_stdc_has_single_bit (x))
780+#else
781+# define stdc_has_single_bit(x) \
782 ((bool) (sizeof (x) <= sizeof (unsigned int) \
783 ? stdc_has_single_bit_ui (x) \
784 : stdc_has_single_bit_ull (x)))
785+#endif
786
787 static __always_inline bool
788 __hsb64_inline (uint64_t __x)
789@@ -641,7 +685,11 @@ extern unsigned int stdc_bit_width_ul (unsigned long int __x)
790 __extension__
791 extern unsigned int stdc_bit_width_ull (unsigned long long int __x)
792 __THROW __attribute_const__;
793-#define stdc_bit_width(x) (stdc_bit_width_ull (x))
794+#if __glibc_has_builtin (__builtin_stdc_bit_width)
795+# define stdc_bit_width(x) (__builtin_stdc_bit_width (x))
796+#else
797+# define stdc_bit_width(x) (stdc_bit_width_ull (x))
798+#endif
799
800 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
801 static __always_inline unsigned int
802@@ -691,7 +739,11 @@ extern unsigned long int stdc_bit_floor_ul (unsigned long int __x)
803 __extension__
804 extern unsigned long long int stdc_bit_floor_ull (unsigned long long int __x)
805 __THROW __attribute_const__;
806-#define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x))
807+#if __glibc_has_builtin (__builtin_stdc_bit_floor)
808+# define stdc_bit_floor(x) (__builtin_stdc_bit_floor (x))
809+#else
810+# define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x))
811+#endif
812
813 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
814 static __always_inline uint64_t
815@@ -743,7 +795,11 @@ extern unsigned long int stdc_bit_ceil_ul (unsigned long int __x)
816 __extension__
817 extern unsigned long long int stdc_bit_ceil_ull (unsigned long long int __x)
818 __THROW __attribute_const__;
819-#define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x))
820+#if __glibc_has_builtin (__builtin_stdc_bit_ceil)
821+# define stdc_bit_ceil(x) (__builtin_stdc_bit_ceil (x))
822+#else
823+# define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x))
824+#endif
825
826 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
827 static __always_inline uint64_t
828diff --git a/stdlib/tst-stdbit-builtins.c b/stdlib/tst-stdbit-builtins.c
829new file mode 100644
830index 0000000000..536841ca8a
831--- /dev/null
832+++ b/stdlib/tst-stdbit-builtins.c
833@@ -0,0 +1,778 @@
834+/* Test <stdbit.h> type-generic macros with compiler __builtin_stdc_* support.
835+ Copyright (C) 2024 Free Software Foundation, Inc.
836+ This file is part of the GNU C Library.
837+
838+ The GNU C Library is free software; you can redistribute it and/or
839+ modify it under the terms of the GNU Lesser General Public
840+ License as published by the Free Software Foundation; either
841+ version 2.1 of the License, or (at your option) any later version.
842+
843+ The GNU C Library is distributed in the hope that it will be useful,
844+ but WITHOUT ANY WARRANTY; without even the implied warranty of
845+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
846+ Lesser General Public License for more details.
847+
848+ You should have received a copy of the GNU Lesser General Public
849+ License along with the GNU C Library; if not, see
850+ <https://www.gnu.org/licenses/>. */
851+
852+#include <stdbit.h>
853+#include <limits.h>
854+#include <support/check.h>
855+
856+#if __glibc_has_builtin (__builtin_stdc_leading_zeros) \
857+ && __glibc_has_builtin (__builtin_stdc_leading_ones) \
858+ && __glibc_has_builtin (__builtin_stdc_trailing_zeros) \
859+ && __glibc_has_builtin (__builtin_stdc_trailing_ones) \
860+ && __glibc_has_builtin (__builtin_stdc_first_leading_zero) \
861+ && __glibc_has_builtin (__builtin_stdc_first_leading_one) \
862+ && __glibc_has_builtin (__builtin_stdc_first_trailing_zero) \
863+ && __glibc_has_builtin (__builtin_stdc_first_trailing_one) \
864+ && __glibc_has_builtin (__builtin_stdc_count_zeros) \
865+ && __glibc_has_builtin (__builtin_stdc_count_ones) \
866+ && __glibc_has_builtin (__builtin_stdc_has_single_bit) \
867+ && __glibc_has_builtin (__builtin_stdc_bit_width) \
868+ && __glibc_has_builtin (__builtin_stdc_bit_floor) \
869+ && __glibc_has_builtin (__builtin_stdc_bit_ceil)
870+
871+# if !defined (BITINT_MAXWIDTH) && defined (__BITINT_MAXWIDTH__)
872+# define BITINT_MAXWIDTH __BITINT_MAXWIDTH__
873+# endif
874+
875+typedef unsigned char uc;
876+typedef unsigned short us;
877+typedef unsigned int ui;
878+typedef unsigned long int ul;
879+typedef unsigned long long int ull;
880+
881+# define expr_has_type(e, t) _Generic (e, default : 0, t : 1)
882+
883+static int
884+do_test (void)
885+{
886+ TEST_COMPARE (stdc_leading_zeros ((uc) 0), CHAR_BIT);
887+ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((uc) 0), ui), 1);
888+ TEST_COMPARE (stdc_leading_zeros ((us) 0), sizeof (short) * CHAR_BIT);
889+ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((us) 0), ui), 1);
890+ TEST_COMPARE (stdc_leading_zeros (0U), sizeof (int) * CHAR_BIT);
891+ TEST_COMPARE (expr_has_type (stdc_leading_zeros (0U), ui), 1);
892+ TEST_COMPARE (stdc_leading_zeros (0UL), sizeof (long int) * CHAR_BIT);
893+ TEST_COMPARE (expr_has_type (stdc_leading_zeros (0UL), ui), 1);
894+ TEST_COMPARE (stdc_leading_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
895+ TEST_COMPARE (expr_has_type (stdc_leading_zeros (0ULL), ui), 1);
896+ TEST_COMPARE (stdc_leading_zeros ((uc) ~0U), 0);
897+ TEST_COMPARE (stdc_leading_zeros ((us) ~0U), 0);
898+ TEST_COMPARE (stdc_leading_zeros (~0U), 0);
899+ TEST_COMPARE (stdc_leading_zeros (~0UL), 0);
900+ TEST_COMPARE (stdc_leading_zeros (~0ULL), 0);
901+ TEST_COMPARE (stdc_leading_zeros ((uc) 3), CHAR_BIT - 2);
902+ TEST_COMPARE (stdc_leading_zeros ((us) 9), sizeof (short) * CHAR_BIT - 4);
903+ TEST_COMPARE (stdc_leading_zeros (34U), sizeof (int) * CHAR_BIT - 6);
904+ TEST_COMPARE (stdc_leading_zeros (130UL), sizeof (long int) * CHAR_BIT - 8);
905+ TEST_COMPARE (stdc_leading_zeros (512ULL),
906+ sizeof (long long int) * CHAR_BIT - 10);
907+ TEST_COMPARE (stdc_leading_ones ((uc) 0), 0);
908+ TEST_COMPARE (expr_has_type (stdc_leading_ones ((uc) 0), ui), 1);
909+ TEST_COMPARE (stdc_leading_ones ((us) 0), 0);
910+ TEST_COMPARE (expr_has_type (stdc_leading_ones ((us) 0), ui), 1);
911+ TEST_COMPARE (stdc_leading_ones (0U), 0);
912+ TEST_COMPARE (expr_has_type (stdc_leading_ones (0U), ui), 1);
913+ TEST_COMPARE (stdc_leading_ones (0UL), 0);
914+ TEST_COMPARE (expr_has_type (stdc_leading_ones (0UL), ui), 1);
915+ TEST_COMPARE (stdc_leading_ones (0ULL), 0);
916+ TEST_COMPARE (expr_has_type (stdc_leading_ones (0ULL), ui), 1);
917+ TEST_COMPARE (stdc_leading_ones ((uc) ~0U), CHAR_BIT);
918+ TEST_COMPARE (stdc_leading_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
919+ TEST_COMPARE (stdc_leading_ones (~0U), sizeof (int) * CHAR_BIT);
920+ TEST_COMPARE (stdc_leading_ones (~0UL), sizeof (long int) * CHAR_BIT);
921+ TEST_COMPARE (stdc_leading_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
922+ TEST_COMPARE (stdc_leading_ones ((uc) ~3), CHAR_BIT - 2);
923+ TEST_COMPARE (stdc_leading_ones ((us) ~9), sizeof (short) * CHAR_BIT - 4);
924+ TEST_COMPARE (stdc_leading_ones (~34U), sizeof (int) * CHAR_BIT - 6);
925+ TEST_COMPARE (stdc_leading_ones (~130UL), sizeof (long int) * CHAR_BIT - 8);
926+ TEST_COMPARE (stdc_leading_ones (~512ULL),
927+ sizeof (long long int) * CHAR_BIT - 10);
928+ TEST_COMPARE (stdc_trailing_zeros ((uc) 0), CHAR_BIT);
929+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((uc) 0), ui), 1);
930+ TEST_COMPARE (stdc_trailing_zeros ((us) 0), sizeof (short) * CHAR_BIT);
931+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((us) 0), ui), 1);
932+ TEST_COMPARE (stdc_trailing_zeros (0U), sizeof (int) * CHAR_BIT);
933+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0U), ui), 1);
934+ TEST_COMPARE (stdc_trailing_zeros (0UL), sizeof (long int) * CHAR_BIT);
935+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0UL), ui), 1);
936+ TEST_COMPARE (stdc_trailing_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
937+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0ULL), ui), 1);
938+ TEST_COMPARE (stdc_trailing_zeros ((uc) ~0U), 0);
939+ TEST_COMPARE (stdc_trailing_zeros ((us) ~0U), 0);
940+ TEST_COMPARE (stdc_trailing_zeros (~0U), 0);
941+ TEST_COMPARE (stdc_trailing_zeros (~0UL), 0);
942+ TEST_COMPARE (stdc_trailing_zeros (~0ULL), 0);
943+ TEST_COMPARE (stdc_trailing_zeros ((uc) 2), 1);
944+ TEST_COMPARE (stdc_trailing_zeros ((us) 24), 3);
945+ TEST_COMPARE (stdc_trailing_zeros (32U), 5);
946+ TEST_COMPARE (stdc_trailing_zeros (128UL), 7);
947+ TEST_COMPARE (stdc_trailing_zeros (512ULL), 9);
948+ TEST_COMPARE (stdc_trailing_ones ((uc) 0), 0);
949+ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((uc) 0), ui), 1);
950+ TEST_COMPARE (stdc_trailing_ones ((us) 0), 0);
951+ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((us) 0), ui), 1);
952+ TEST_COMPARE (stdc_trailing_ones (0U), 0);
953+ TEST_COMPARE (expr_has_type (stdc_trailing_ones (0U), ui), 1);
954+ TEST_COMPARE (stdc_trailing_ones (0UL), 0);
955+ TEST_COMPARE (expr_has_type (stdc_trailing_ones (0UL), ui), 1);
956+ TEST_COMPARE (stdc_trailing_ones (0ULL), 0);
957+ TEST_COMPARE (expr_has_type (stdc_trailing_ones (0ULL), ui), 1);
958+ TEST_COMPARE (stdc_trailing_ones ((uc) ~0U), CHAR_BIT);
959+ TEST_COMPARE (stdc_trailing_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
960+ TEST_COMPARE (stdc_trailing_ones (~0U), sizeof (int) * CHAR_BIT);
961+ TEST_COMPARE (stdc_trailing_ones (~0UL), sizeof (long int) * CHAR_BIT);
962+ TEST_COMPARE (stdc_trailing_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
963+ TEST_COMPARE (stdc_trailing_ones ((uc) 5), 1);
964+ TEST_COMPARE (stdc_trailing_ones ((us) 15), 4);
965+ TEST_COMPARE (stdc_trailing_ones (127U), 7);
966+ TEST_COMPARE (stdc_trailing_ones (511UL), 9);
967+ TEST_COMPARE (stdc_trailing_ones (~0ULL >> 2),
968+ sizeof (long long int) * CHAR_BIT - 2);
969+ TEST_COMPARE (stdc_first_leading_zero ((uc) 0), 1);
970+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((uc) 0), ui), 1);
971+ TEST_COMPARE (stdc_first_leading_zero ((us) 0), 1);
972+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((us) 0), ui), 1);
973+ TEST_COMPARE (stdc_first_leading_zero (0U), 1);
974+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0U), ui), 1);
975+ TEST_COMPARE (stdc_first_leading_zero (0UL), 1);
976+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0UL), ui), 1);
977+ TEST_COMPARE (stdc_first_leading_zero (0ULL), 1);
978+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0ULL), ui), 1);
979+ TEST_COMPARE (stdc_first_leading_zero ((uc) ~0U), 0);
980+ TEST_COMPARE (stdc_first_leading_zero ((us) ~0U), 0);
981+ TEST_COMPARE (stdc_first_leading_zero (~0U), 0);
982+ TEST_COMPARE (stdc_first_leading_zero (~0UL), 0);
983+ TEST_COMPARE (stdc_first_leading_zero (~0ULL), 0);
984+ TEST_COMPARE (stdc_first_leading_zero ((uc) ~3U), CHAR_BIT - 1);
985+ TEST_COMPARE (stdc_first_leading_zero ((us) ~15U),
986+ sizeof (short) * CHAR_BIT - 3);
987+ TEST_COMPARE (stdc_first_leading_zero (~63U), sizeof (int) * CHAR_BIT - 5);
988+ TEST_COMPARE (stdc_first_leading_zero (~255UL),
989+ sizeof (long int) * CHAR_BIT - 7);
990+ TEST_COMPARE (stdc_first_leading_zero (~1023ULL),
991+ sizeof (long long int) * CHAR_BIT - 9);
992+ TEST_COMPARE (stdc_first_leading_one ((uc) 0), 0);
993+ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((uc) 0), ui), 1);
994+ TEST_COMPARE (stdc_first_leading_one ((us) 0), 0);
995+ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((us) 0), ui), 1);
996+ TEST_COMPARE (stdc_first_leading_one (0U), 0);
997+ TEST_COMPARE (expr_has_type (stdc_first_leading_one (0U), ui), 1);
998+ TEST_COMPARE (stdc_first_leading_one (0UL), 0);
999+ TEST_COMPARE (expr_has_type (stdc_first_leading_one (0UL), ui), 1);
1000+ TEST_COMPARE (stdc_first_leading_one (0ULL), 0);
1001+ TEST_COMPARE (expr_has_type (stdc_first_leading_one (0ULL), ui), 1);
1002+ TEST_COMPARE (stdc_first_leading_one ((uc) ~0U), 1);
1003+ TEST_COMPARE (stdc_first_leading_one ((us) ~0U), 1);
1004+ TEST_COMPARE (stdc_first_leading_one (~0U), 1);
1005+ TEST_COMPARE (stdc_first_leading_one (~0UL), 1);
1006+ TEST_COMPARE (stdc_first_leading_one (~0ULL), 1);
1007+ TEST_COMPARE (stdc_first_leading_one ((uc) 3), CHAR_BIT - 1);
1008+ TEST_COMPARE (stdc_first_leading_one ((us) 9),
1009+ sizeof (short) * CHAR_BIT - 3);
1010+ TEST_COMPARE (stdc_first_leading_one (34U), sizeof (int) * CHAR_BIT - 5);
1011+ TEST_COMPARE (stdc_first_leading_one (130UL),
1012+ sizeof (long int) * CHAR_BIT - 7);
1013+ TEST_COMPARE (stdc_first_leading_one (512ULL),
1014+ sizeof (long long int) * CHAR_BIT - 9);
1015+ TEST_COMPARE (stdc_first_trailing_zero ((uc) 0), 1);
1016+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((uc) 0), ui), 1);
1017+ TEST_COMPARE (stdc_first_trailing_zero ((us) 0), 1);
1018+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((us) 0), ui), 1);
1019+ TEST_COMPARE (stdc_first_trailing_zero (0U), 1);
1020+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0U), ui), 1);
1021+ TEST_COMPARE (stdc_first_trailing_zero (0UL), 1);
1022+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0UL), ui), 1);
1023+ TEST_COMPARE (stdc_first_trailing_zero (0ULL), 1);
1024+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0ULL), ui), 1);
1025+ TEST_COMPARE (stdc_first_trailing_zero ((uc) ~0U), 0);
1026+ TEST_COMPARE (stdc_first_trailing_zero ((us) ~0U), 0);
1027+ TEST_COMPARE (stdc_first_trailing_zero (~0U), 0);
1028+ TEST_COMPARE (stdc_first_trailing_zero (~0UL), 0);
1029+ TEST_COMPARE (stdc_first_trailing_zero (~0ULL), 0);
1030+ TEST_COMPARE (stdc_first_trailing_zero ((uc) 2), 1);
1031+ TEST_COMPARE (stdc_first_trailing_zero ((us) 15), 5);
1032+ TEST_COMPARE (stdc_first_trailing_zero (63U), 7);
1033+ TEST_COMPARE (stdc_first_trailing_zero (128UL), 1);
1034+ TEST_COMPARE (stdc_first_trailing_zero (511ULL), 10);
1035+ TEST_COMPARE (stdc_first_trailing_one ((uc) 0), 0);
1036+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((uc) 0), ui), 1);
1037+ TEST_COMPARE (stdc_first_trailing_one ((us) 0), 0);
1038+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((us) 0), ui), 1);
1039+ TEST_COMPARE (stdc_first_trailing_one (0U), 0);
1040+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0U), ui), 1);
1041+ TEST_COMPARE (stdc_first_trailing_one (0UL), 0);
1042+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0UL), ui), 1);
1043+ TEST_COMPARE (stdc_first_trailing_one (0ULL), 0);
1044+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0ULL), ui), 1);
1045+ TEST_COMPARE (stdc_first_trailing_one ((uc) ~0U), 1);
1046+ TEST_COMPARE (stdc_first_trailing_one ((us) ~0U), 1);
1047+ TEST_COMPARE (stdc_first_trailing_one (~0U), 1);
1048+ TEST_COMPARE (stdc_first_trailing_one (~0UL), 1);
1049+ TEST_COMPARE (stdc_first_trailing_one (~0ULL), 1);
1050+ TEST_COMPARE (stdc_first_trailing_one ((uc) 4), 3);
1051+ TEST_COMPARE (stdc_first_trailing_one ((us) 96), 6);
1052+ TEST_COMPARE (stdc_first_trailing_one (127U), 1);
1053+ TEST_COMPARE (stdc_first_trailing_one (511UL), 1);
1054+ TEST_COMPARE (stdc_first_trailing_one (~0ULL << 12), 13);
1055+ TEST_COMPARE (stdc_count_zeros ((uc) 0), CHAR_BIT);
1056+ TEST_COMPARE (expr_has_type (stdc_count_zeros ((uc) 0), ui), 1);
1057+ TEST_COMPARE (stdc_count_zeros ((us) 0), sizeof (short) * CHAR_BIT);
1058+ TEST_COMPARE (expr_has_type (stdc_count_zeros ((us) 0), ui), 1);
1059+ TEST_COMPARE (stdc_count_zeros (0U), sizeof (int) * CHAR_BIT);
1060+ TEST_COMPARE (expr_has_type (stdc_count_zeros (0U), ui), 1);
1061+ TEST_COMPARE (stdc_count_zeros (0UL), sizeof (long int) * CHAR_BIT);
1062+ TEST_COMPARE (expr_has_type (stdc_count_zeros (0UL), ui), 1);
1063+ TEST_COMPARE (stdc_count_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
1064+ TEST_COMPARE (expr_has_type (stdc_count_zeros (0ULL), ui), 1);
1065+ TEST_COMPARE (stdc_count_zeros ((uc) ~0U), 0);
1066+ TEST_COMPARE (stdc_count_zeros ((us) ~0U), 0);
1067+ TEST_COMPARE (stdc_count_zeros (~0U), 0);
1068+ TEST_COMPARE (stdc_count_zeros (~0UL), 0);
1069+ TEST_COMPARE (stdc_count_zeros (~0ULL), 0);
1070+ TEST_COMPARE (stdc_count_zeros ((uc) 1U), CHAR_BIT - 1);
1071+ TEST_COMPARE (stdc_count_zeros ((us) 42), sizeof (short) * CHAR_BIT - 3);
1072+ TEST_COMPARE (stdc_count_zeros (291U), sizeof (int) * CHAR_BIT - 4);
1073+ TEST_COMPARE (stdc_count_zeros (~1315UL), 5);
1074+ TEST_COMPARE (stdc_count_zeros (3363ULL),
1075+ sizeof (long long int) * CHAR_BIT - 6);
1076+ TEST_COMPARE (stdc_count_ones ((uc) 0), 0);
1077+ TEST_COMPARE (expr_has_type (stdc_count_ones ((uc) 0), ui), 1);
1078+ TEST_COMPARE (stdc_count_ones ((us) 0), 0);
1079+ TEST_COMPARE (expr_has_type (stdc_count_ones ((us) 0), ui), 1);
1080+ TEST_COMPARE (stdc_count_ones (0U), 0);
1081+ TEST_COMPARE (expr_has_type (stdc_count_ones (0U), ui), 1);
1082+ TEST_COMPARE (stdc_count_ones (0UL), 0);
1083+ TEST_COMPARE (expr_has_type (stdc_count_ones (0UL), ui), 1);
1084+ TEST_COMPARE (stdc_count_ones (0ULL), 0);
1085+ TEST_COMPARE (expr_has_type (stdc_count_ones (0ULL), ui), 1);
1086+ TEST_COMPARE (stdc_count_ones ((uc) ~0U), CHAR_BIT);
1087+ TEST_COMPARE (stdc_count_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
1088+ TEST_COMPARE (stdc_count_ones (~0U), sizeof (int) * CHAR_BIT);
1089+ TEST_COMPARE (stdc_count_ones (~0UL), sizeof (long int) * CHAR_BIT);
1090+ TEST_COMPARE (stdc_count_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
1091+ TEST_COMPARE (stdc_count_ones ((uc) ~1U), CHAR_BIT - 1);
1092+ TEST_COMPARE (stdc_count_ones ((us) ~42), sizeof (short) * CHAR_BIT - 3);
1093+ TEST_COMPARE (stdc_count_ones (~291U), sizeof (int) * CHAR_BIT - 4);
1094+ TEST_COMPARE (stdc_count_ones (1315UL), 5);
1095+ TEST_COMPARE (stdc_count_ones (~3363ULL),
1096+ sizeof (long long int) * CHAR_BIT - 6);
1097+ TEST_COMPARE (stdc_has_single_bit ((uc) 0), 0);
1098+ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((uc) 0), _Bool), 1);
1099+ TEST_COMPARE (stdc_has_single_bit ((us) 0), 0);
1100+ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((us) 0), _Bool), 1);
1101+ TEST_COMPARE (stdc_has_single_bit (0U), 0);
1102+ TEST_COMPARE (expr_has_type (stdc_has_single_bit (0U), _Bool), 1);
1103+ TEST_COMPARE (stdc_has_single_bit (0UL), 0);
1104+ TEST_COMPARE (expr_has_type (stdc_has_single_bit (0UL), _Bool), 1);
1105+ TEST_COMPARE (stdc_has_single_bit (0ULL), 0);
1106+ TEST_COMPARE (expr_has_type (stdc_has_single_bit (0ULL), _Bool), 1);
1107+ TEST_COMPARE (stdc_has_single_bit ((uc) 2), 1);
1108+ TEST_COMPARE (stdc_has_single_bit ((us) 8), 1);
1109+ TEST_COMPARE (stdc_has_single_bit (32U), 1);
1110+ TEST_COMPARE (stdc_has_single_bit (128UL), 1);
1111+ TEST_COMPARE (stdc_has_single_bit (512ULL), 1);
1112+ TEST_COMPARE (stdc_has_single_bit ((uc) 7), 0);
1113+ TEST_COMPARE (stdc_has_single_bit ((us) 96), 0);
1114+ TEST_COMPARE (stdc_has_single_bit (513U), 0);
1115+ TEST_COMPARE (stdc_has_single_bit (1022UL), 0);
1116+ TEST_COMPARE (stdc_has_single_bit (12ULL), 0);
1117+ TEST_COMPARE (stdc_bit_width ((uc) 0), 0);
1118+ TEST_COMPARE (expr_has_type (stdc_bit_width ((uc) 0), ui), 1);
1119+ TEST_COMPARE (stdc_bit_width ((us) 0), 0);
1120+ TEST_COMPARE (expr_has_type (stdc_bit_width ((us) 0), ui), 1);
1121+ TEST_COMPARE (stdc_bit_width (0U), 0);
1122+ TEST_COMPARE (expr_has_type (stdc_bit_width (0U), ui), 1);
1123+ TEST_COMPARE (stdc_bit_width (0UL), 0);
1124+ TEST_COMPARE (expr_has_type (stdc_bit_width (0UL), ui), 1);
1125+ TEST_COMPARE (stdc_bit_width (0ULL), 0);
1126+ TEST_COMPARE (expr_has_type (stdc_bit_width (0ULL), ui), 1);
1127+ TEST_COMPARE (stdc_bit_width ((uc) ~0U), CHAR_BIT);
1128+ TEST_COMPARE (stdc_bit_width ((us) ~0U), sizeof (short) * CHAR_BIT);
1129+ TEST_COMPARE (stdc_bit_width (~0U), sizeof (int) * CHAR_BIT);
1130+ TEST_COMPARE (stdc_bit_width (~0UL), sizeof (long int) * CHAR_BIT);
1131+ TEST_COMPARE (stdc_bit_width (~0ULL), sizeof (long long int) * CHAR_BIT);
1132+ TEST_COMPARE (stdc_bit_width ((uc) ((uc) ~0U >> 1)), CHAR_BIT - 1);
1133+ TEST_COMPARE (stdc_bit_width ((uc) 6), 3);
1134+ TEST_COMPARE (stdc_bit_width ((us) 12U), 4);
1135+ TEST_COMPARE (stdc_bit_width ((us) ((us) ~0U >> 5)),
1136+ sizeof (short) * CHAR_BIT - 5);
1137+ TEST_COMPARE (stdc_bit_width (137U), 8);
1138+ TEST_COMPARE (stdc_bit_width (269U), 9);
1139+ TEST_COMPARE (stdc_bit_width (39UL), 6);
1140+ TEST_COMPARE (stdc_bit_width (~0UL >> 2), sizeof (long int) * CHAR_BIT - 2);
1141+ TEST_COMPARE (stdc_bit_width (1023ULL), 10);
1142+ TEST_COMPARE (stdc_bit_width (1024ULL), 11);
1143+ TEST_COMPARE (stdc_bit_floor ((uc) 0), 0);
1144+ TEST_COMPARE (expr_has_type (stdc_bit_floor ((uc) 0), uc), 1);
1145+ TEST_COMPARE (stdc_bit_floor ((us) 0), 0);
1146+ TEST_COMPARE (expr_has_type (stdc_bit_floor ((us) 0), us), 1);
1147+ TEST_COMPARE (stdc_bit_floor (0U), 0U);
1148+ TEST_COMPARE (expr_has_type (stdc_bit_floor (0U), ui), 1);
1149+ TEST_COMPARE (stdc_bit_floor (0UL), 0UL);
1150+ TEST_COMPARE (expr_has_type (stdc_bit_floor (0UL), ul), 1);
1151+ TEST_COMPARE (stdc_bit_floor (0ULL), 0ULL);
1152+ TEST_COMPARE (expr_has_type (stdc_bit_floor (0ULL), ull), 1);
1153+ TEST_COMPARE (stdc_bit_floor ((uc) ~0U), (1U << (CHAR_BIT - 1)));
1154+ TEST_COMPARE (stdc_bit_floor ((us) ~0U),
1155+ (1U << (sizeof (short) * CHAR_BIT - 1)));
1156+ TEST_COMPARE (stdc_bit_floor (~0U), (1U << (sizeof (int) * CHAR_BIT - 1)));
1157+ TEST_COMPARE (stdc_bit_floor (~0UL),
1158+ (1UL << (sizeof (long int) * CHAR_BIT - 1)));
1159+ TEST_COMPARE (stdc_bit_floor (~0ULL),
1160+ (1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
1161+ TEST_COMPARE (stdc_bit_floor ((uc) 4), 4);
1162+ TEST_COMPARE (stdc_bit_floor ((uc) 7), 4);
1163+ TEST_COMPARE (stdc_bit_floor ((us) 8U), 8);
1164+ TEST_COMPARE (stdc_bit_floor ((us) 31U), 16);
1165+ TEST_COMPARE (stdc_bit_floor (137U), 128U);
1166+ TEST_COMPARE (stdc_bit_floor (269U), 256U);
1167+ TEST_COMPARE (stdc_bit_floor (511UL), 256UL);
1168+ TEST_COMPARE (stdc_bit_floor (512UL), 512UL);
1169+ TEST_COMPARE (stdc_bit_floor (513UL), 512ULL);
1170+ TEST_COMPARE (stdc_bit_floor (1024ULL), 1024ULL);
1171+ TEST_COMPARE (stdc_bit_ceil ((uc) 0), 1);
1172+ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((uc) 0), uc), 1);
1173+ TEST_COMPARE (stdc_bit_ceil ((us) 0), 1);
1174+ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((us) 0), us), 1);
1175+ TEST_COMPARE (stdc_bit_ceil (0U), 1U);
1176+ TEST_COMPARE (expr_has_type (stdc_bit_ceil (0U), ui), 1);
1177+ TEST_COMPARE (stdc_bit_ceil (0UL), 1UL);
1178+ TEST_COMPARE (expr_has_type (stdc_bit_ceil (0UL), ul), 1);
1179+ TEST_COMPARE (stdc_bit_ceil (0ULL), 1ULL);
1180+ TEST_COMPARE (expr_has_type (stdc_bit_ceil (0ULL), ull), 1);
1181+ TEST_COMPARE (stdc_bit_ceil ((uc) ~0U), 0);
1182+ TEST_COMPARE (stdc_bit_ceil ((us) ~0U), 0);
1183+ TEST_COMPARE (stdc_bit_ceil (~0U), 0U);
1184+ TEST_COMPARE (stdc_bit_ceil (~0UL), 0UL);
1185+ TEST_COMPARE (stdc_bit_ceil (~0ULL), 0ULL);
1186+ TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1)));
1187+ TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1)));
1188+ TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)),
1189+ (1U << (sizeof (short) * CHAR_BIT - 1)));
1190+ TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)),
1191+ (1U << (sizeof (short) * CHAR_BIT - 1)));
1192+ TEST_COMPARE (stdc_bit_ceil (~0U >> 1),
1193+ (1U << (sizeof (int) * CHAR_BIT - 1)));
1194+ TEST_COMPARE (stdc_bit_ceil (1U << (sizeof (int) * CHAR_BIT - 1)),
1195+ (1U << (sizeof (int) * CHAR_BIT - 1)));
1196+ TEST_COMPARE (stdc_bit_ceil (~0UL >> 1),
1197+ (1UL << (sizeof (long int) * CHAR_BIT - 1)));
1198+ TEST_COMPARE (stdc_bit_ceil (~0UL >> 1),
1199+ (1UL << (sizeof (long int) * CHAR_BIT - 1)));
1200+ TEST_COMPARE (stdc_bit_ceil (1ULL
1201+ << (sizeof (long long int) * CHAR_BIT - 1)),
1202+ (1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
1203+ TEST_COMPARE (stdc_bit_ceil (~0ULL >> 1),
1204+ (1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
1205+ TEST_COMPARE (stdc_bit_ceil ((uc) 1), 1);
1206+ TEST_COMPARE (stdc_bit_ceil ((uc) 2), 2);
1207+ TEST_COMPARE (stdc_bit_ceil ((us) 3U), 4);
1208+ TEST_COMPARE (stdc_bit_ceil ((us) 4U), 4);
1209+ TEST_COMPARE (stdc_bit_ceil (5U), 8U);
1210+ TEST_COMPARE (stdc_bit_ceil (269U), 512U);
1211+ TEST_COMPARE (stdc_bit_ceil (511UL), 512UL);
1212+ TEST_COMPARE (stdc_bit_ceil (512UL), 512UL);
1213+ TEST_COMPARE (stdc_bit_ceil (513ULL), 1024ULL);
1214+ TEST_COMPARE (stdc_bit_ceil (1025ULL), 2048ULL);
1215+# ifdef __SIZEOF_INT128__
1216+ TEST_COMPARE (stdc_leading_zeros ((unsigned __int128) 0),
1217+ sizeof (__int128) * CHAR_BIT);
1218+ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned __int128) 0), ui),
1219+ 1);
1220+ TEST_COMPARE (stdc_leading_zeros (~(unsigned __int128) 0), 0);
1221+ TEST_COMPARE (stdc_leading_ones ((unsigned __int128) 0), 0);
1222+ TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned __int128) 0), ui),
1223+ 1);
1224+ TEST_COMPARE (stdc_leading_ones (~(unsigned __int128) 0),
1225+ sizeof (__int128) * CHAR_BIT);
1226+ TEST_COMPARE (stdc_trailing_zeros ((unsigned __int128) 0),
1227+ sizeof (__int128) * CHAR_BIT);
1228+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned __int128) 0),
1229+ ui), 1);
1230+ TEST_COMPARE (stdc_trailing_zeros (~(unsigned __int128) 0), 0);
1231+ TEST_COMPARE (stdc_trailing_ones ((unsigned __int128) 0), 0);
1232+ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned __int128) 0), ui),
1233+ 1);
1234+ TEST_COMPARE (stdc_trailing_ones (~(unsigned __int128) 0),
1235+ sizeof (__int128) * CHAR_BIT);
1236+ TEST_COMPARE (stdc_first_leading_zero ((unsigned __int128) 0), 1);
1237+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned __int128) 0),
1238+ ui), 1);
1239+ TEST_COMPARE (stdc_first_leading_zero (~(unsigned __int128) 0), 0);
1240+ TEST_COMPARE (stdc_first_leading_one ((unsigned __int128) 0), 0);
1241+ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned __int128) 0),
1242+ ui), 1);
1243+ TEST_COMPARE (stdc_first_leading_one (~(unsigned __int128) 0), 1);
1244+ TEST_COMPARE (stdc_first_trailing_zero ((unsigned __int128) 0), 1);
1245+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned __int128)
1246+ 0), ui), 1);
1247+ TEST_COMPARE (stdc_first_trailing_zero (~(unsigned __int128) 0), 0);
1248+ TEST_COMPARE (stdc_first_trailing_one ((unsigned __int128) 0), 0);
1249+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned __int128) 0),
1250+ ui), 1);
1251+ TEST_COMPARE (stdc_first_trailing_one (~(unsigned __int128) 0), 1);
1252+ TEST_COMPARE (stdc_count_zeros ((unsigned __int128) 0),
1253+ sizeof (__int128) * CHAR_BIT);
1254+ TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned __int128) 0), ui),
1255+ 1);
1256+ TEST_COMPARE (stdc_count_zeros (~(unsigned __int128) 0), 0);
1257+ TEST_COMPARE (stdc_count_ones ((unsigned __int128) 0), 0);
1258+ TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned __int128) 0), ui),
1259+ 1);
1260+ TEST_COMPARE (stdc_count_ones (~(unsigned __int128) 0),
1261+ sizeof (__int128) * CHAR_BIT);
1262+ TEST_COMPARE (stdc_has_single_bit ((unsigned __int128) 0), 0);
1263+ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned __int128) 0),
1264+ _Bool), 1);
1265+ TEST_COMPARE (stdc_has_single_bit (~(unsigned __int128) 0), 0);
1266+ TEST_COMPARE (stdc_bit_width ((unsigned __int128) 0), 0);
1267+ TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned __int128) 0), ui), 1);
1268+ TEST_COMPARE (stdc_bit_width (~(unsigned __int128) 0),
1269+ sizeof (__int128) * CHAR_BIT);
1270+ TEST_COMPARE (stdc_bit_floor ((unsigned __int128) 0) != 0, 0);
1271+ TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned __int128) 0),
1272+ unsigned __int128), 1);
1273+ TEST_COMPARE (stdc_bit_floor (~(unsigned __int128) 0)
1274+ != ((unsigned __int128) 1) << (sizeof (__int128)
1275+ * CHAR_BIT - 1), 0);
1276+ TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 0) != 1, 0);
1277+ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned __int128) 0),
1278+ unsigned __int128), 1);
1279+ TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 1) != 1, 0);
1280+ TEST_COMPARE (stdc_bit_ceil ((~(unsigned __int128) 0) >> 1)
1281+ != ((unsigned __int128) 1) << (sizeof (__int128)
1282+ * CHAR_BIT - 1), 0);
1283+ TEST_COMPARE (stdc_bit_ceil (~(unsigned __int128) 0) != 0, 0);
1284+# endif
1285+ uc a = 0;
1286+ TEST_COMPARE (stdc_bit_width (a++), 0);
1287+ TEST_COMPARE (a, 1);
1288+ ull b = 0;
1289+ TEST_COMPARE (stdc_bit_width (b++), 0);
1290+ TEST_COMPARE (b, 1);
1291+ TEST_COMPARE (stdc_bit_floor (a++), 1);
1292+ TEST_COMPARE (a, 2);
1293+ TEST_COMPARE (stdc_bit_floor (b++), 1);
1294+ TEST_COMPARE (b, 2);
1295+ TEST_COMPARE (stdc_bit_ceil (a++), 2);
1296+ TEST_COMPARE (a, 3);
1297+ TEST_COMPARE (stdc_bit_ceil (b++), 2);
1298+ TEST_COMPARE (b, 3);
1299+ TEST_COMPARE (stdc_leading_zeros (a++), CHAR_BIT - 2);
1300+ TEST_COMPARE (a, 4);
1301+ TEST_COMPARE (stdc_leading_zeros (b++),
1302+ sizeof (long long int) * CHAR_BIT - 2);
1303+ TEST_COMPARE (b, 4);
1304+ TEST_COMPARE (stdc_leading_ones (a++), 0);
1305+ TEST_COMPARE (a, 5);
1306+ TEST_COMPARE (stdc_leading_ones (b++), 0);
1307+ TEST_COMPARE (b, 5);
1308+ TEST_COMPARE (stdc_trailing_zeros (a++), 0);
1309+ TEST_COMPARE (a, 6);
1310+ TEST_COMPARE (stdc_trailing_zeros (b++), 0);
1311+ TEST_COMPARE (b, 6);
1312+ TEST_COMPARE (stdc_trailing_ones (a++), 0);
1313+ TEST_COMPARE (a, 7);
1314+ TEST_COMPARE (stdc_trailing_ones (b++), 0);
1315+ TEST_COMPARE (b, 7);
1316+ TEST_COMPARE (stdc_first_leading_zero (a++), 1);
1317+ TEST_COMPARE (a, 8);
1318+ TEST_COMPARE (stdc_first_leading_zero (b++), 1);
1319+ TEST_COMPARE (b, 8);
1320+ TEST_COMPARE (stdc_first_leading_one (a++), CHAR_BIT - 3);
1321+ TEST_COMPARE (a, 9);
1322+ TEST_COMPARE (stdc_first_leading_one (b++),
1323+ sizeof (long long int) * CHAR_BIT - 3);
1324+ TEST_COMPARE (b, 9);
1325+ TEST_COMPARE (stdc_first_trailing_zero (a++), 2);
1326+ TEST_COMPARE (a, 10);
1327+ TEST_COMPARE (stdc_first_trailing_zero (b++), 2);
1328+ TEST_COMPARE (b, 10);
1329+ TEST_COMPARE (stdc_first_trailing_one (a++), 2);
1330+ TEST_COMPARE (a, 11);
1331+ TEST_COMPARE (stdc_first_trailing_one (b++), 2);
1332+ TEST_COMPARE (b, 11);
1333+ TEST_COMPARE (stdc_count_zeros (a++), CHAR_BIT - 3);
1334+ TEST_COMPARE (a, 12);
1335+ TEST_COMPARE (stdc_count_zeros (b++),
1336+ sizeof (long long int) * CHAR_BIT - 3);
1337+ TEST_COMPARE (b, 12);
1338+ TEST_COMPARE (stdc_count_ones (a++), 2);
1339+ TEST_COMPARE (a, 13);
1340+ TEST_COMPARE (stdc_count_ones (b++), 2);
1341+ TEST_COMPARE (b, 13);
1342+ TEST_COMPARE (stdc_has_single_bit (a++), 0);
1343+ TEST_COMPARE (a, 14);
1344+ TEST_COMPARE (stdc_has_single_bit (b++), 0);
1345+ TEST_COMPARE (b, 14);
1346+# ifdef BITINT_MAXWIDTH
1347+# if BITINT_MAXWIDTH >= 64
1348+ TEST_COMPARE (stdc_leading_zeros (0uwb), 1);
1349+ TEST_COMPARE (expr_has_type (stdc_leading_zeros (0uwb), ui), 1);
1350+ TEST_COMPARE (stdc_leading_zeros (1uwb), 0);
1351+ TEST_COMPARE (expr_has_type (stdc_leading_zeros (1uwb), ui), 1);
1352+ TEST_COMPARE (stdc_leading_ones (0uwb), 0);
1353+ TEST_COMPARE (expr_has_type (stdc_leading_ones (0uwb), ui), 1);
1354+ TEST_COMPARE (stdc_leading_ones (1uwb), 1);
1355+ TEST_COMPARE (expr_has_type (stdc_leading_ones (1uwb), ui), 1);
1356+ TEST_COMPARE (stdc_trailing_zeros (0uwb), 1);
1357+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0uwb), ui), 1);
1358+ TEST_COMPARE (stdc_trailing_zeros (1uwb), 0);
1359+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (1uwb), ui), 1);
1360+ TEST_COMPARE (stdc_trailing_ones (0uwb), 0);
1361+ TEST_COMPARE (expr_has_type (stdc_trailing_ones (0uwb), ui), 1);
1362+ TEST_COMPARE (stdc_trailing_ones (1uwb), 1);
1363+ TEST_COMPARE (expr_has_type (stdc_trailing_ones (1uwb), ui), 1);
1364+ TEST_COMPARE (stdc_first_leading_zero (0uwb), 1);
1365+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0uwb), ui), 1);
1366+ TEST_COMPARE (stdc_first_leading_zero (1uwb), 0);
1367+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (1uwb), ui), 1);
1368+ TEST_COMPARE (stdc_first_leading_one (0uwb), 0);
1369+ TEST_COMPARE (expr_has_type (stdc_first_leading_one (0uwb), ui), 1);
1370+ TEST_COMPARE (stdc_first_leading_one (1uwb), 1);
1371+ TEST_COMPARE (expr_has_type (stdc_first_leading_one (1uwb), ui), 1);
1372+ TEST_COMPARE (stdc_first_trailing_zero (0uwb), 1);
1373+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0uwb), ui), 1);
1374+ TEST_COMPARE (stdc_first_trailing_zero (1uwb), 0);
1375+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (1uwb), ui), 1);
1376+ TEST_COMPARE (stdc_first_trailing_one (0uwb), 0);
1377+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0uwb), ui), 1);
1378+ TEST_COMPARE (stdc_first_trailing_one (1uwb), 1);
1379+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (1uwb), ui), 1);
1380+ TEST_COMPARE (stdc_count_zeros (0uwb), 1);
1381+ TEST_COMPARE (expr_has_type (stdc_count_zeros (0uwb), ui), 1);
1382+ TEST_COMPARE (stdc_count_zeros (1uwb), 0);
1383+ TEST_COMPARE (expr_has_type (stdc_count_zeros (1uwb), ui), 1);
1384+ TEST_COMPARE (stdc_count_ones (0uwb), 0);
1385+ TEST_COMPARE (expr_has_type (stdc_count_ones (0uwb), ui), 1);
1386+ TEST_COMPARE (stdc_count_ones (1uwb), 1);
1387+ TEST_COMPARE (expr_has_type (stdc_count_ones (1uwb), ui), 1);
1388+ TEST_COMPARE (stdc_has_single_bit (0uwb), 0);
1389+ TEST_COMPARE (expr_has_type (stdc_has_single_bit (0uwb), _Bool), 1);
1390+ TEST_COMPARE (stdc_has_single_bit (1uwb), 1);
1391+ TEST_COMPARE (expr_has_type (stdc_has_single_bit (1uwb), _Bool), 1);
1392+ TEST_COMPARE (stdc_bit_width (0uwb), 0);
1393+ TEST_COMPARE (expr_has_type (stdc_bit_width (0uwb), ui), 1);
1394+ TEST_COMPARE (stdc_bit_width (1uwb), 1);
1395+ TEST_COMPARE (expr_has_type (stdc_bit_width (1uwb), ui), 1);
1396+ TEST_COMPARE (stdc_bit_floor (0uwb), 0);
1397+ TEST_COMPARE (expr_has_type (stdc_bit_floor (0uwb), unsigned _BitInt(1)), 1);
1398+ TEST_COMPARE (stdc_bit_floor (1uwb), 1);
1399+ TEST_COMPARE (expr_has_type (stdc_bit_floor (1uwb), unsigned _BitInt(1)), 1);
1400+ TEST_COMPARE (stdc_bit_ceil (0uwb), 1);
1401+ TEST_COMPARE (expr_has_type (stdc_bit_ceil (0uwb), unsigned _BitInt(1)), 1);
1402+ TEST_COMPARE (stdc_bit_ceil (1uwb), 1);
1403+ TEST_COMPARE (expr_has_type (stdc_bit_ceil (1uwb), unsigned _BitInt(1)), 1);
1404+ unsigned _BitInt(1) c = 0;
1405+ TEST_COMPARE (stdc_bit_floor (c++), 0);
1406+ TEST_COMPARE (c, 1);
1407+ TEST_COMPARE (stdc_bit_floor (c++), 1);
1408+ TEST_COMPARE (c, 0);
1409+ TEST_COMPARE (stdc_bit_ceil (c++), 1);
1410+ TEST_COMPARE (c, 1);
1411+ TEST_COMPARE (stdc_bit_ceil (c++), 1);
1412+ TEST_COMPARE (c, 0);
1413+# endif
1414+# if BITINT_MAXWIDTH >= 512
1415+ TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 0), 512);
1416+ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(512)) 0),
1417+ ui), 1);
1418+ TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 0), 373);
1419+ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(373)) 0),
1420+ ui), 1);
1421+ TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(512)) 0), 0);
1422+ TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(373)) 0), 0);
1423+ TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 275), 512 - 9);
1424+ TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 512), 373 - 10);
1425+ TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(512)) 0), 0);
1426+ TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(512)) 0),
1427+ ui), 1);
1428+ TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(373)) 0), 0);
1429+ TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(373)) 0),
1430+ ui), 1);
1431+ TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 0), 512);
1432+ TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 0), 373);
1433+ TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 275), 512 - 9);
1434+ TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 512), 373 - 10);
1435+ TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), 512);
1436+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(512)) 0),
1437+ ui), 1);
1438+ TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), 373);
1439+ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(373)) 0),
1440+ ui), 1);
1441+ TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(512)) 0), 0);
1442+ TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(373)) 0), 0);
1443+ TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 256), 8);
1444+ TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 512), 9);
1445+ TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 0), 0);
1446+ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(512)) 0),
1447+ ui), 1);
1448+ TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(373)) 0), 0);
1449+ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(373)) 0),
1450+ ui), 1);
1451+ TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(512)) 0), 512);
1452+ TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(373)) 0), 373);
1453+ TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 255), 8);
1454+ TEST_COMPARE (stdc_trailing_ones ((~(unsigned _BitInt(373)) 0) >> 2),
1455+ 373 - 2);
1456+ TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(512)) 0), 1);
1457+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(512))
1458+ 0), ui), 1);
1459+ TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(373)) 0), 1);
1460+ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(373))
1461+ 0), ui), 1);
1462+ TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 0), 0);
1463+ TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 0), 0);
1464+ TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 511),
1465+ 512 - 8);
1466+ TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 1023),
1467+ 373 - 9);
1468+ TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 0), 0);
1469+ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(512))
1470+ 0), ui), 1);
1471+ TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 0), 0);
1472+ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(373))
1473+ 0), ui), 1);
1474+ TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(512)) 0), 1);
1475+ TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(373)) 0), 1);
1476+ TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 275), 512 - 8);
1477+ TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 512), 373 - 9);
1478+ TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 0), 1);
1479+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned
1480+ _BitInt(512)) 0),
1481+ ui), 1);
1482+ TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 0), 1);
1483+ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned
1484+ _BitInt(373)) 0),
1485+ ui), 1);
1486+ TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(512)) 0), 0);
1487+ TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(373)) 0), 0);
1488+ TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 255), 9);
1489+ TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 511), 10);
1490+ TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(512)) 0), 0);
1491+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(512))
1492+ 0), ui), 1);
1493+ TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(373)) 0), 0);
1494+ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(373))
1495+ 0), ui), 1);
1496+ TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(512)) 0), 1);
1497+ TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(373)) 0), 1);
1498+ TEST_COMPARE (stdc_first_trailing_one (((unsigned _BitInt(512)) 255) << 175),
1499+ 176);
1500+ TEST_COMPARE (stdc_first_trailing_one ((~(unsigned _BitInt(373)) 0) << 311),
1501+ 312);
1502+ TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 0), 512);
1503+ TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(512)) 0),
1504+ ui), 1);
1505+ TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 0), 373);
1506+ TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(373)) 0),
1507+ ui), 1);
1508+ TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(512)) 0), 0);
1509+ TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(373)) 0), 0);
1510+ TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 1315), 512 - 5);
1511+ TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 3363), 373 - 6);
1512+ TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(512)) 0), 0);
1513+ TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(512)) 0),
1514+ ui), 1);
1515+ TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(373)) 0), 0);
1516+ TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(373)) 0),
1517+ ui), 1);
1518+ TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 0), 512);
1519+ TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 0), 373);
1520+ TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 1315), 512 - 5);
1521+ TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 3363), 373 - 6);
1522+ TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(512)) 0), 0);
1523+ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(512)) 0),
1524+ _Bool), 1);
1525+ TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(373)) 0), 0);
1526+ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(373)) 0),
1527+ _Bool), 1);
1528+ TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(512)) 0), 0);
1529+ TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(373)) 0), 0);
1530+ TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(512)) 1022) << 279),
1531+ 0);
1532+ TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(373)) 12) << 305), 0);
1533+ TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(512)) 0), 0);
1534+ TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(512)) 0),
1535+ ui), 1);
1536+ TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(373)) 0), 0);
1537+ TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(373)) 0),
1538+ ui), 1);
1539+ TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(512)) 0), 512);
1540+ TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(373)) 0), 373);
1541+ TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(512)) 1023) << 405),
1542+ 405 + 10);
1543+ TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(373)) 1024) << 242),
1544+ 242 + 11);
1545+ TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(512)) 0) != 0, 0);
1546+ TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(512)) 0),
1547+ unsigned _BitInt(512)), 1);
1548+ TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(373)) 0) != 0, 0);
1549+ TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(373)) 0),
1550+ unsigned _BitInt(373)), 1);
1551+ TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(512)) 0)
1552+ != ((unsigned _BitInt(512)) 1) << (512 - 1), 0);
1553+ TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(373)) 0)
1554+ != ((unsigned _BitInt(373)) 1) << (373 - 1), 0);
1555+ TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(512)) 511) << 405)
1556+ != (((unsigned _BitInt(512)) 256) << 405), 0);
1557+ TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(373)) 512) << 242)
1558+ != (((unsigned _BitInt(512)) 512) << 242), 0);
1559+ TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(512)) 0) != 1, 0);
1560+ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(512)) 0),
1561+ unsigned _BitInt(512)), 1);
1562+ TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(373)) 0) != 1, 0);
1563+ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(373)) 0),
1564+ unsigned _BitInt(373)), 1);
1565+ TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(512)) 0) != 0, 0);
1566+ TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(373)) 0) != 0, 0);
1567+ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 1) << (512 - 1))
1568+ != ((unsigned _BitInt(512)) 1) << (512 - 1), 0);
1569+ TEST_COMPARE (stdc_bit_ceil ((~(unsigned _BitInt(373)) 0) >> 1)
1570+ != ((unsigned _BitInt(373)) 1) << (373 - 1), 0);
1571+ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 512) << 405)
1572+ != (((unsigned _BitInt(512)) 512) << 405), 0);
1573+ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(373)) 513) << 242)
1574+ != (((unsigned _BitInt(512)) 1024) << 242), 0);
1575+ TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0,
1576+ 0);
1577+ TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0)
1578+ != ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH
1579+ - 1), 0);
1580+ TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 511)
1581+ << 405)
1582+ != (((unsigned _BitInt(BITINT_MAXWIDTH)) 256) << 405), 0);
1583+ TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 512)
1584+ << 405)
1585+ != (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0);
1586+ TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 1, 0);
1587+ TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0,
1588+ 0);
1589+ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 1)
1590+ << (BITINT_MAXWIDTH - 1))
1591+ != ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH
1592+ - 1), 0);
1593+ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 512)
1594+ << 405)
1595+ != (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0);
1596+ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 513)
1597+ << 405)
1598+ != (((unsigned _BitInt(BITINT_MAXWIDTH)) 1024) << 405), 0);
1599+# endif
1600+# endif
1601+ return 0;
1602+}
1603+#else
1604+static int
1605+do_test (void)
1606+{
1607+ return 0;
1608+}
1609+#endif
1610+
1611+#include <support/test-driver.c>
1612
1613commit 71fcdba577884627c3ee4e43beb915da752efb1f
1614Author: Florian Weimer <fweimer@redhat.com>
1615Date: Fri Mar 15 19:08:24 2024 +0100
1616
1617 linux: Use rseq area unconditionally in sched_getcpu (bug 31479)
1618
1619 Originally, nptl/descr.h included <sys/rseq.h>, but we removed that
1620 in commit 2c6b4b272e6b4d07303af25709051c3e96288f2d ("nptl:
1621 Unconditionally use a 32-byte rseq area"). After that, it was
1622 not ensured that the RSEQ_SIG macro was defined during sched_getcpu.c
1623 compilation that provided a definition. This commit always checks
1624 the rseq area for CPU number information before using the other
1625 approaches.
1626
1627 This adds an unnecessary (but well-predictable) branch on
1628 architectures which do not define RSEQ_SIG, but its cost is small
1629 compared to the system call. Most architectures that have vDSO
1630 acceleration for getcpu also have rseq support.
1631
1632 Fixes: 2c6b4b272e6b4d07303af25709051c3e96288f2d
1633 Fixes: 1d350aa06091211863e41169729cee1bca39f72f
1634 Reviewed-by: Arjun Shankar <arjun@redhat.com>
1635 (cherry picked from commit 7a76f218677d149d8b7875b336722108239f7ee9)
1636
1637diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c
1638index dfb884568d..72a3360550 100644
1639--- a/sysdeps/unix/sysv/linux/sched_getcpu.c
1640+++ b/sysdeps/unix/sysv/linux/sched_getcpu.c
1641@@ -33,17 +33,9 @@ vsyscall_sched_getcpu (void)
1642 return r == -1 ? r : cpu;
1643 }
1644
1645-#ifdef RSEQ_SIG
1646 int
1647 sched_getcpu (void)
1648 {
1649 int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id);
1650 return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu ();
1651 }
1652-#else /* RSEQ_SIG */
1653-int
1654-sched_getcpu (void)
1655-{
1656- return vsyscall_sched_getcpu ();
1657-}
1658-#endif /* RSEQ_SIG */
1659
1660commit ee7f4c54e19738c2c27d3846e1e9b3595c89221f
1661Author: Manjunath Matti <mmatti@linux.ibm.com>
1662Date: Tue Mar 19 15:29:48 2024 -0500
1663
1664 powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture.
1665
1666 This patch adds a new feature for powerpc. In order to get faster
1667 access to the HWCAP3/HWCAP4 masks, similar to HWCAP/HWCAP2 (i.e. for
1668 implementing __builtin_cpu_supports() in GCC) without the overhead of
1669 reading them from the auxiliary vector, we now reserve space for them
1670 in the TCB.
1671
1672 Suggested-by: Peter Bergner <bergner@linux.ibm.com>
1673 Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
1674 (cherry picked from commit 3ab9b88e2ac91062b6d493fe32bd101a55006c6a)
1675
1676diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c
1677index 7345ebc4e5..aaf67b87e8 100644
1678--- a/elf/dl-diagnostics.c
1679+++ b/elf/dl-diagnostics.c
1680@@ -235,6 +235,8 @@ _dl_print_diagnostics (char **environ)
1681 _dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap));
1682 _dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT);
1683 _dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2));
1684+ _dl_diagnostics_print_labeled_value ("dl_hwcap3", GLRO (dl_hwcap3));
1685+ _dl_diagnostics_print_labeled_value ("dl_hwcap4", GLRO (dl_hwcap4));
1686 _dl_diagnostics_print_labeled_string
1687 ("dl_hwcaps_subdirs", _dl_hwcaps_subdirs);
1688 _dl_diagnostics_print_labeled_value
1689diff --git a/elf/dl-support.c b/elf/dl-support.c
1690index 2f502c8b0d..451932dd03 100644
1691--- a/elf/dl-support.c
1692+++ b/elf/dl-support.c
1693@@ -158,6 +158,8 @@ const ElfW(Phdr) *_dl_phdr;
1694 size_t _dl_phnum;
1695 uint64_t _dl_hwcap;
1696 uint64_t _dl_hwcap2;
1697+uint64_t _dl_hwcap3;
1698+uint64_t _dl_hwcap4;
1699
1700 enum dso_sort_algorithm _dl_dso_sort_algo;
1701
1702diff --git a/elf/elf.h b/elf/elf.h
1703index 455731663c..1c394c64cd 100644
1704--- a/elf/elf.h
1705+++ b/elf/elf.h
1706@@ -1234,6 +1234,10 @@ typedef struct
1707 #define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size. */
1708 #define AT_RSEQ_ALIGN 28 /* rseq allocation alignment. */
1709
1710+/* More machine-dependent hints about processor capabilities. */
1711+#define AT_HWCAP3 29 /* extension of AT_HWCAP. */
1712+#define AT_HWCAP4 30 /* extension of AT_HWCAP. */
1713+
1714 #define AT_EXECFN 31 /* Filename of executable. */
1715
1716 /* Pointer to the global system page used for system calls and other
1717diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
1718index 117c901ccc..50f58a60e3 100644
1719--- a/sysdeps/generic/ldsodefs.h
1720+++ b/sysdeps/generic/ldsodefs.h
1721@@ -646,6 +646,8 @@ struct rtld_global_ro
1722 /* Mask for more hardware capabilities that are available on some
1723 platforms. */
1724 EXTERN uint64_t _dl_hwcap2;
1725+ EXTERN uint64_t _dl_hwcap3;
1726+ EXTERN uint64_t _dl_hwcap4;
1727
1728 EXTERN enum dso_sort_algorithm _dl_dso_sort_algo;
1729
1730diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
1731index a76bb6e5b0..8cf00aa7e3 100644
1732--- a/sysdeps/powerpc/dl-procinfo.c
1733+++ b/sysdeps/powerpc/dl-procinfo.c
1734@@ -38,6 +38,10 @@
1735 needed.
1736 */
1737
1738+/* The total number of available bits (including those prior to
1739+ _DL_HWCAP_FIRST). Some of these bits might not be used. */
1740+#define _DL_HWCAP_COUNT 128
1741+
1742 #ifndef PROCINFO_CLASS
1743 # define PROCINFO_CLASS
1744 #endif
1745@@ -61,7 +65,7 @@ PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features
1746 #if !defined PROCINFO_DECL && defined SHARED
1747 ._dl_powerpc_cap_flags
1748 #else
1749-PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
1750+PROCINFO_CLASS const char _dl_powerpc_cap_flags[_DL_HWCAP_COUNT][15]
1751 #endif
1752 #ifndef PROCINFO_DECL
1753 = {
1754diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
1755index 68f4241095..f8cb343877 100644
1756--- a/sysdeps/powerpc/dl-procinfo.h
1757+++ b/sysdeps/powerpc/dl-procinfo.h
1758@@ -22,16 +22,17 @@
1759 #include <ldsodefs.h>
1760 #include <sysdep.h> /* This defines the PPC_FEATURE[2]_* macros. */
1761
1762-/* The total number of available bits (including those prior to
1763- _DL_HWCAP_FIRST). Some of these bits might not be used. */
1764-#define _DL_HWCAP_COUNT 64
1765+/* Feature masks are all 32-bits in size. */
1766+#define _DL_HWCAP_SIZE 32
1767
1768-/* Features started at bit 31 and decremented as new features were added. */
1769-#define _DL_HWCAP_LAST 31
1770+/* AT_HWCAP2 feature strings follow the AT_HWCAP feature strings. */
1771+#define _DL_HWCAP2_OFFSET _DL_HWCAP_SIZE
1772
1773-/* AT_HWCAP2 features started at bit 31 and decremented as new features were
1774- added. HWCAP2 feature bits start at bit 0. */
1775-#define _DL_HWCAP2_LAST 31
1776+/* AT_HWCAP3 feature strings follow the AT_HWCAP2 feature strings. */
1777+#define _DL_HWCAP3_OFFSET (_DL_HWCAP2_OFFSET + _DL_HWCAP_SIZE)
1778+
1779+/* AT_HWCAP4 feature strings follow the AT_HWCAP3 feature strings. */
1780+#define _DL_HWCAP4_OFFSET (_DL_HWCAP3_OFFSET + _DL_HWCAP_SIZE)
1781
1782 /* These bits influence library search. */
1783 #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \
1784@@ -187,21 +188,42 @@ _dl_procinfo (unsigned int type, unsigned long int word)
1785 case AT_HWCAP:
1786 _dl_printf ("AT_HWCAP: ");
1787
1788- for (int i = 0; i <= _DL_HWCAP_LAST; ++i)
1789+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
1790 if (word & (1 << i))
1791 _dl_printf (" %s", _dl_hwcap_string (i));
1792 break;
1793 case AT_HWCAP2:
1794 {
1795- unsigned int offset = _DL_HWCAP_LAST + 1;
1796
1797 _dl_printf ("AT_HWCAP2: ");
1798
1799- /* We have to go through them all because the kernel added the
1800- AT_HWCAP2 features starting with the high bits. */
1801- for (int i = 0; i <= _DL_HWCAP2_LAST; ++i)
1802- if (word & (1 << i))
1803- _dl_printf (" %s", _dl_hwcap_string (offset + i));
1804+ /* We have to go through them all because the kernel added the
1805+ AT_HWCAP2 features starting with the high bits. */
1806+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
1807+ if (word & (1 << i))
1808+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP2_OFFSET + i));
1809+ break;
1810+ }
1811+ case AT_HWCAP3:
1812+ {
1813+ _dl_printf ("AT_HWCAP3: ");
1814+
1815+ /* We have to go through them all because the kernel added the
1816+ AT_HWCAP3 features starting with the high bits. */
1817+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
1818+ if (word & (1 << i))
1819+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP3_OFFSET + i));
1820+ break;
1821+ }
1822+ case AT_HWCAP4:
1823+ {
1824+ _dl_printf ("AT_HWCAP4: ");
1825+
1826+ /* We have to go through them all because the kernel added the
1827+ AT_HWCAP4 features starting with the high bits. */
1828+ for (int i = 0; i <= _DL_HWCAP_SIZE; ++i)
1829+ if (word & (1 << i))
1830+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP4_OFFSET + i));
1831 break;
1832 }
1833 case AT_L1I_CACHEGEOMETRY:
1834diff --git a/sysdeps/powerpc/hwcapinfo.c b/sysdeps/powerpc/hwcapinfo.c
1835index 76344f285a..f6fede15a7 100644
1836--- a/sysdeps/powerpc/hwcapinfo.c
1837+++ b/sysdeps/powerpc/hwcapinfo.c
1838@@ -31,7 +31,7 @@ void
1839 __tcb_parse_hwcap_and_convert_at_platform (void)
1840 {
1841
1842- uint64_t h1, h2;
1843+ uint64_t h1, h2, h3, h4;
1844
1845 /* Read AT_PLATFORM string from auxv and convert it to a number. */
1846 __tcb.at_platform = _dl_string_platform (GLRO (dl_platform));
1847@@ -39,6 +39,8 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
1848 /* Read HWCAP and HWCAP2 from auxv. */
1849 h1 = GLRO (dl_hwcap);
1850 h2 = GLRO (dl_hwcap2);
1851+ h3 = GLRO (dl_hwcap3);
1852+ h4 = GLRO (dl_hwcap4);
1853
1854 /* hwcap contains only the latest supported ISA, the code checks which is
1855 and fills the previous supported ones. */
1856@@ -64,13 +66,16 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
1857 else if (h1 & PPC_FEATURE_POWER5)
1858 h1 |= PPC_FEATURE_POWER4;
1859
1860- uint64_t array_hwcaps[] = { h1, h2 };
1861+ uint64_t array_hwcaps[] = { h1, h2, h3, h4 };
1862 init_cpu_features (&GLRO(dl_powerpc_cpu_features), array_hwcaps);
1863
1864 /* Consolidate both HWCAP and HWCAP2 into a single doubleword so that
1865 we can read both in a single load later. */
1866 __tcb.hwcap = (h1 << 32) | (h2 & 0xffffffff);
1867- __tcb.hwcap_extn = 0x0;
1868+
1869+ /* Consolidate both HWCAP3 and HWCAP4 into a single doubleword so that
1870+ we can read both in a single load later. */
1871+ __tcb.hwcap_extn = (h3 << 32) | (h4 & 0xffffffff);
1872
1873 }
1874 #if IS_IN (rtld)
1875diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
1876index e3d758b163..ea2a58ecb1 100644
1877--- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h
1878+++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
1879@@ -47,6 +47,8 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values)
1880 GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM];
1881 GLRO(dl_hwcap) = auxv_values[AT_HWCAP];
1882 GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2];
1883+ GLRO(dl_hwcap3) = auxv_values[AT_HWCAP3];
1884+ GLRO(dl_hwcap4) = auxv_values[AT_HWCAP4];
1885 GLRO(dl_clktck) = auxv_values[AT_CLKTCK];
1886 GLRO(dl_fpu_control) = auxv_values[AT_FPUCW];
1887 _dl_random = (void *) auxv_values[AT_RANDOM];
1888diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c
1889index ad3692d738..e1b14e9eb3 100644
1890--- a/sysdeps/unix/sysv/linux/dl-sysdep.c
1891+++ b/sysdeps/unix/sysv/linux/dl-sysdep.c
1892@@ -197,6 +197,8 @@ _dl_show_auxv (void)
1893 [AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex },
1894 [AT_RANDOM - 2] = { "RANDOM: 0x", hex },
1895 [AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex },
1896+ [AT_HWCAP3 - 2] = { "HWCAP3: 0x", hex },
1897+ [AT_HWCAP4 - 2] = { "HWCAP4: 0x", hex },
1898 [AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec },
1899 [AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec },
1900 [AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex },
1901diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
1902index 8e8a5ec2ea..a947d62db6 100644
1903--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
1904+++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
1905@@ -94,6 +94,8 @@ init_cpu_features (struct cpu_features *cpu_features, uint64_t hwcaps[])
1906 which are set by __tcb_parse_hwcap_and_convert_at_platform. */
1907 cpu_features->hwcap = hwcaps[0];
1908 cpu_features->hwcap2 = hwcaps[1];
1909+ cpu_features->hwcap3 = hwcaps[2];
1910+ cpu_features->hwcap4 = hwcaps[3];
1911 /* Default is to use aligned memory access on optimized function unless
1912 tunables is enable, since for this case user can explicit disable
1913 unaligned optimizations. */
1914diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
1915index 1294f0b601..e9eb6a13c8 100644
1916--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
1917+++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
1918@@ -26,6 +26,8 @@ struct cpu_features
1919 bool use_cached_memopt;
1920 unsigned long int hwcap;
1921 unsigned long int hwcap2;
1922+ unsigned long int hwcap3;
1923+ unsigned long int hwcap4;
1924 };
1925
1926 static const char hwcap_names[] = {
1927diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
1928index a4705daf1c..6a00cd88cd 100644
1929--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
1930+++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
1931@@ -87,6 +87,12 @@ __libc_start_main_impl (int argc, char **argv,
1932 case AT_HWCAP2:
1933 _dl_hwcap2 = (unsigned long int) av->a_un.a_val;
1934 break;
1935+ case AT_HWCAP3:
1936+ _dl_hwcap3 = (unsigned long int) av->a_un.a_val;
1937+ break;
1938+ case AT_HWCAP4:
1939+ _dl_hwcap4 = (unsigned long int) av->a_un.a_val;
1940+ break;
1941 case AT_PLATFORM:
1942 _dl_platform = (void *) av->a_un.a_val;
1943 break;
1944
1945commit aad45c8ac30aa1072e54903ce6aead22702f244a
1946Author: Amrita H S <amritahs@linux.ibm.com>
1947Date: Tue Mar 19 19:08:47 2024 -0500
1948
1949 powerpc: Placeholder and infrastructure/build support to add Power11 related changes.
1950
1951 The following three changes have been added to provide initial Power11 support.
1952 1. Add the directories to hold Power11 files.
1953 2. Add support to select Power11 libraries based on AT_PLATFORM.
1954 3. Let submachine=power11 be set automatically.
1955
1956 Reviewed-by: Florian Weimer <fweimer@redhat.com>
1957 Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
1958 (cherry picked from commit 1ea051145612f199d8716ecdf78b084b00b5a727)
1959
1960diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
1961index f8cb343877..b36697ba44 100644
1962--- a/sysdeps/powerpc/dl-procinfo.h
1963+++ b/sysdeps/powerpc/dl-procinfo.h
1964@@ -38,7 +38,7 @@
1965 #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \
1966 + PPC_FEATURE_HAS_DFP)
1967
1968-#define _DL_PLATFORMS_COUNT 16
1969+#define _DL_PLATFORMS_COUNT 17
1970
1971 #define _DL_FIRST_PLATFORM 32
1972 /* Mask to filter out platforms. */
1973@@ -62,6 +62,7 @@
1974 #define PPC_PLATFORM_POWER8 13
1975 #define PPC_PLATFORM_POWER9 14
1976 #define PPC_PLATFORM_POWER10 15
1977+#define PPC_PLATFORM_POWER11 16
1978
1979 static inline const char *
1980 __attribute__ ((unused))
1981@@ -89,6 +90,11 @@ _dl_string_platform (const char *str)
1982 ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10;
1983 str++;
1984 }
1985+ else if (str[1] == '1')
1986+ {
1987+ ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER11;
1988+ str++;
1989+ }
1990 else
1991 return -1;
1992 break;
1993diff --git a/sysdeps/powerpc/powerpc32/power11/Implies b/sysdeps/powerpc/powerpc32/power11/Implies
1994new file mode 100644
1995index 0000000000..051cbe0f79
1996--- /dev/null
1997+++ b/sysdeps/powerpc/powerpc32/power11/Implies
1998@@ -0,0 +1,2 @@
1999+powerpc/powerpc32/power10/fpu
2000+powerpc/powerpc32/power10
2001diff --git a/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
2002new file mode 100644
2003index 0000000000..58edb2861d
2004--- /dev/null
2005+++ b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
2006@@ -0,0 +1 @@
2007+powerpc/powerpc32/power10/fpu/multiarch
2008diff --git a/sysdeps/powerpc/powerpc32/power11/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
2009new file mode 100644
2010index 0000000000..c70f0428ba
2011--- /dev/null
2012+++ b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
2013@@ -0,0 +1 @@
2014+powerpc/powerpc32/power10/multiarch
2015diff --git a/sysdeps/powerpc/powerpc64/be/power11/Implies b/sysdeps/powerpc/powerpc64/be/power11/Implies
2016new file mode 100644
2017index 0000000000..de481d1c13
2018--- /dev/null
2019+++ b/sysdeps/powerpc/powerpc64/be/power11/Implies
2020@@ -0,0 +1,2 @@
2021+powerpc/powerpc64/be/power10/fpu
2022+powerpc/powerpc64/be/power10
2023diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
2024new file mode 100644
2025index 0000000000..dff0e13064
2026--- /dev/null
2027+++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
2028@@ -0,0 +1 @@
2029+powerpc/powerpc64/be/power10/fpu
2030diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
2031new file mode 100644
2032index 0000000000..c3f259e009
2033--- /dev/null
2034+++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
2035@@ -0,0 +1 @@
2036+powerpc/powerpc64/be/power10/fpu/multiarch
2037diff --git a/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
2038new file mode 100644
2039index 0000000000..9491a394c9
2040--- /dev/null
2041+++ b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
2042@@ -0,0 +1 @@
2043+powerpc/powerpc64/be/power10/multiarch
2044diff --git a/sysdeps/powerpc/powerpc64/le/power11/Implies b/sysdeps/powerpc/powerpc64/le/power11/Implies
2045new file mode 100644
2046index 0000000000..e18182dcc1
2047--- /dev/null
2048+++ b/sysdeps/powerpc/powerpc64/le/power11/Implies
2049@@ -0,0 +1,2 @@
2050+powerpc/powerpc64/le/power10/fpu
2051+powerpc/powerpc64/le/power10
2052diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
2053new file mode 100644
2054index 0000000000..e41bd55684
2055--- /dev/null
2056+++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
2057@@ -0,0 +1 @@
2058+powerpc/powerpc64/le/power10/fpu
2059diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
2060new file mode 100644
2061index 0000000000..c838d50931
2062--- /dev/null
2063+++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
2064@@ -0,0 +1 @@
2065+powerpc/powerpc64/le/power10/fpu/multiarch
2066diff --git a/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
2067new file mode 100644
2068index 0000000000..687248c3c2
2069--- /dev/null
2070+++ b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
2071@@ -0,0 +1 @@
2072+powerpc/powerpc64/le/power10/multiarch
2073diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
2074index 77465d9133..65d3e69303 100644
2075--- a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
2076+++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
2077@@ -36,9 +36,11 @@ compute_level (void)
2078 return 9;
2079 if (strcmp (platform, "power10") == 0)
2080 return 10;
2081+ if (strcmp (platform, "power11") == 0)
2082+ return 11;
2083 printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
2084- /* Assume that the new platform supports POWER10. */
2085- return 10;
2086+ /* Assume that the new platform supports POWER11. */
2087+ return 11;
2088 }
2089
2090 static int
2091diff --git a/sysdeps/powerpc/preconfigure b/sysdeps/powerpc/preconfigure
2092index 4de94089a3..9e5a07ab6d 100644
2093--- a/sysdeps/powerpc/preconfigure
2094+++ b/sysdeps/powerpc/preconfigure
2095@@ -58,7 +58,7 @@ fi
2096
2097 ;;
2098
2099- a2|970|power[4-9]|power5x|power6+|power10)
2100+ a2|970|power[4-9]|power5x|power6+|power10|power11)
2101 submachine=${archcpu}
2102 if test ${libc_cv_cc_submachine+y}
2103 then :
2104diff --git a/sysdeps/powerpc/preconfigure.ac b/sysdeps/powerpc/preconfigure.ac
2105index 6c63bd8257..14b6dafd4a 100644
2106--- a/sysdeps/powerpc/preconfigure.ac
2107+++ b/sysdeps/powerpc/preconfigure.ac
2108@@ -46,7 +46,7 @@ case "${machine}:${submachine}" in
2109 AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
2110 ;;
2111
2112- a2|970|power[[4-9]]|power5x|power6+|power10)
2113+ a2|970|power[[4-9]]|power5x|power6+|power10|power11)
2114 submachine=${archcpu}
2115 AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
2116 ;;
2117
2118commit 983f34a1252de3ca6f2305c211d86530ea42010e
2119Author: caiyinyu <caiyinyu@loongson.cn>
2120Date: Mon Mar 11 16:07:48 2024 +0800
2121
2122 LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf
2123
2124diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c
2125index 9f05485236..7c0395fbb5 100644
2126--- a/sysdeps/loongarch/fpu/e_scalbf.c
2127+++ b/sysdeps/loongarch/fpu/e_scalbf.c
2128@@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn)
2129
2130 return x;
2131 }
2132-libm_alias_finite (__ieee754_scalb, __scalb)
2133+libm_alias_finite (__ieee754_scalbf, __scalbf)
2134
2135commit 7fc8242bf87828c935ac5df5cafb9dc7ab635fd9
2136Author: H.J. Lu <hjl.tools@gmail.com>
2137Date: Fri Feb 16 07:17:10 2024 -0800
2138
2139 x86-64: Save APX registers in ld.so trampoline
2140
2141 Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
2142 ld.so trampoline. This fixes BZ #31371.
2143
2144 Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
2145 be used by i386 _dl_tlsdesc_dynamic.
2146 Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
2147
2148 (cherry picked from commit dfb05f8e704edac70db38c4c8ee700769d91a413)
2149
2150diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
2151index 85d0a8c943..837fd28734 100644
2152--- a/sysdeps/x86/sysdep.h
2153+++ b/sysdeps/x86/sysdep.h
2154@@ -21,14 +21,54 @@
2155
2156 #include <sysdeps/generic/sysdep.h>
2157
2158+/* The extended state feature IDs in the state component bitmap. */
2159+#define X86_XSTATE_X87_ID 0
2160+#define X86_XSTATE_SSE_ID 1
2161+#define X86_XSTATE_AVX_ID 2
2162+#define X86_XSTATE_BNDREGS_ID 3
2163+#define X86_XSTATE_BNDCFG_ID 4
2164+#define X86_XSTATE_K_ID 5
2165+#define X86_XSTATE_ZMM_H_ID 6
2166+#define X86_XSTATE_ZMM_ID 7
2167+#define X86_XSTATE_PKRU_ID 9
2168+#define X86_XSTATE_TILECFG_ID 17
2169+#define X86_XSTATE_TILEDATA_ID 18
2170+#define X86_XSTATE_APX_F_ID 19
2171+
2172+#ifdef __x86_64__
2173 /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
2174 space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
2175- aligned to 16 bytes for fxsave and 64 bytes for xsave. */
2176-#define STATE_SAVE_OFFSET (8 * 7 + 8)
2177-
2178-/* Save SSE, AVX, AVX512, mask and bound registers. */
2179-#define STATE_SAVE_MASK \
2180- ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
2181+ aligned to 16 bytes for fxsave and 64 bytes for xsave.
2182+
2183+ NB: Is is non-zero because of the 128-byte red-zone. Some registers
2184+ are saved on stack without adjusting stack pointer first. When we
2185+ update stack pointer to allocate more space, we need to take the
2186+ red-zone into account. */
2187+# define STATE_SAVE_OFFSET (8 * 7 + 8)
2188+
2189+/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
2190+ registers are mutually exclusive. */
2191+# define STATE_SAVE_MASK \
2192+ ((1 << X86_XSTATE_SSE_ID) \
2193+ | (1 << X86_XSTATE_AVX_ID) \
2194+ | (1 << X86_XSTATE_BNDREGS_ID) \
2195+ | (1 << X86_XSTATE_K_ID) \
2196+ | (1 << X86_XSTATE_ZMM_H_ID) \
2197+ | (1 << X86_XSTATE_ZMM_ID) \
2198+ | (1 << X86_XSTATE_APX_F_ID))
2199+#else
2200+/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
2201+ doesn't have red-zone, use 0 here. */
2202+# define STATE_SAVE_OFFSET 0
2203+
2204+/* Save SSE, AVX, AXV512, mask and bound registers. */
2205+# define STATE_SAVE_MASK \
2206+ ((1 << X86_XSTATE_SSE_ID) \
2207+ | (1 << X86_XSTATE_AVX_ID) \
2208+ | (1 << X86_XSTATE_BNDREGS_ID) \
2209+ | (1 << X86_XSTATE_K_ID) \
2210+ | (1 << X86_XSTATE_ZMM_H_ID))
2211+#endif
2212
2213 /* Constants for bits in __x86_string_control: */
2214
2215
2216commit a364304718725a31ab141936322855c76c73e35e
2217Author: H.J. Lu <hjl.tools@gmail.com>
2218Date: Mon Feb 26 06:37:03 2024 -0800
2219
2220 x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers
2221
2222 Compiler generates the following instruction sequence for GNU2 dynamic
2223 TLS access:
2224
2225 leaq tls_var@TLSDESC(%rip), %rax
2226 call *tls_var@TLSCALL(%rax)
2227
2228 or
2229
2230 leal tls_var@TLSDESC(%ebx), %eax
2231 call *tls_var@TLSCALL(%eax)
2232
2233 CALL instruction is transparent to compiler which assumes all registers,
2234 except for EFLAGS and RAX/EAX, are unchanged after CALL. When
2235 _dl_tlsdesc_dynamic is called, it calls __tls_get_addr on the slow
2236 path. __tls_get_addr is a normal function which doesn't preserve any
2237 caller-saved registers. _dl_tlsdesc_dynamic saved and restored integer
2238 caller-saved registers, but didn't preserve any other caller-saved
2239 registers. Add _dl_tlsdesc_dynamic IFUNC functions for FNSAVE, FXSAVE,
2240 XSAVE and XSAVEC to save and restore all caller-saved registers. This
2241 fixes BZ #31372.
2242
2243 Add GLRO(dl_x86_64_runtime_resolve) with GLRO(dl_x86_tlsdesc_dynamic)
2244 to optimize elf_machine_runtime_setup.
2245 Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
2246
2247 (cherry picked from commit 0aac205a814a8511e98d02b91a8dc908f1c53cde)
2248
2249diff --git a/elf/Makefile b/elf/Makefile
2250index 5d78b659ce..c5c37a9147 100644
2251--- a/elf/Makefile
2252+++ b/elf/Makefile
2253@@ -424,6 +424,7 @@ tests += \
2254 tst-glibc-hwcaps-prepend \
2255 tst-global1 \
2256 tst-global2 \
2257+ tst-gnu2-tls2 \
2258 tst-initfinilazyfail \
2259 tst-initorder \
2260 tst-initorder2 \
2261@@ -846,6 +847,9 @@ modules-names += \
2262 tst-filterobj-flt \
2263 tst-finilazyfailmod \
2264 tst-globalmod2 \
2265+ tst-gnu2-tls2mod0 \
2266+ tst-gnu2-tls2mod1 \
2267+ tst-gnu2-tls2mod2 \
2268 tst-initlazyfailmod \
2269 tst-initorder2a \
2270 tst-initorder2b \
2271@@ -3044,8 +3048,22 @@ $(objpfx)tst-tlsgap.out: \
2272 $(objpfx)tst-tlsgap-mod0.so \
2273 $(objpfx)tst-tlsgap-mod1.so \
2274 $(objpfx)tst-tlsgap-mod2.so
2275+
2276+$(objpfx)tst-gnu2-tls2: $(shared-thread-library)
2277+$(objpfx)tst-gnu2-tls2.out: \
2278+ $(objpfx)tst-gnu2-tls2mod0.so \
2279+ $(objpfx)tst-gnu2-tls2mod1.so \
2280+ $(objpfx)tst-gnu2-tls2mod2.so
2281+
2282 ifeq (yes,$(have-mtls-dialect-gnu2))
2283+# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved
2284+# registers. See https://sourceware.org/bugzilla/show_bug.cgi?id=31372
2285+test-xfail-tst-gnu2-tls2 = yes
2286+
2287 CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
2288 CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
2289 CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
2290+CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2
2291+CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2
2292+CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2
2293 endif
2294diff --git a/elf/tst-gnu2-tls2.c b/elf/tst-gnu2-tls2.c
2295new file mode 100644
2296index 0000000000..7ac04d7f33
2297--- /dev/null
2298+++ b/elf/tst-gnu2-tls2.c
2299@@ -0,0 +1,122 @@
2300+/* Test TLSDESC relocation.
2301+ Copyright (C) 2024 Free Software Foundation, Inc.
2302+ This file is part of the GNU C Library.
2303+
2304+ The GNU C Library is free software; you can redistribute it and/or
2305+ modify it under the terms of the GNU Lesser General Public
2306+ License as published by the Free Software Foundation; either
2307+ version 2.1 of the License, or (at your option) any later version.
2308+
2309+ The GNU C Library is distributed in the hope that it will be useful,
2310+ but WITHOUT ANY WARRANTY; without even the implied warranty of
2311+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
2312+ Lesser General Public License for more details.
2313+
2314+ You should have received a copy of the GNU Lesser General Public
2315+ License along with the GNU C Library; if not, see
2316+ <http://www.gnu.org/licenses/>. */
2317+
2318+#include <stdio.h>
2319+#include <stdlib.h>
2320+#include <string.h>
2321+#include <dlfcn.h>
2322+#include <pthread.h>
2323+#include <support/xdlfcn.h>
2324+#include <support/xthread.h>
2325+#include <support/check.h>
2326+#include <support/test-driver.h>
2327+#include "tst-gnu2-tls2.h"
2328+
2329+#ifndef IS_SUPPORTED
2330+# define IS_SUPPORTED() true
2331+#endif
2332+
2333+/* An architecture can define it to clobber caller-saved registers in
2334+ malloc below to verify that the implicit TLSDESC call won't change
2335+ caller-saved registers. */
2336+#ifndef PREPARE_MALLOC
2337+# define PREPARE_MALLOC()
2338+#endif
2339+
2340+extern void * __libc_malloc (size_t);
2341+
2342+size_t malloc_counter = 0;
2343+
2344+void *
2345+malloc (size_t n)
2346+{
2347+ PREPARE_MALLOC ();
2348+ malloc_counter++;
2349+ return __libc_malloc (n);
2350+}
2351+
2352+static void *mod[3];
2353+#ifndef MOD
2354+# define MOD(i) "tst-gnu2-tls2mod" #i ".so"
2355+#endif
2356+static const char *modname[3] = { MOD(0), MOD(1), MOD(2) };
2357+#undef MOD
2358+
2359+static void
2360+open_mod (int i)
2361+{
2362+ mod[i] = xdlopen (modname[i], RTLD_LAZY);
2363+ printf ("open %s\n", modname[i]);
2364+}
2365+
2366+static void
2367+close_mod (int i)
2368+{
2369+ xdlclose (mod[i]);
2370+ mod[i] = NULL;
2371+ printf ("close %s\n", modname[i]);
2372+}
2373+
2374+static void
2375+access_mod (int i, const char *sym)
2376+{
2377+ struct tls var = { -1, -1, -1, -1 };
2378+ struct tls *(*f) (struct tls *) = xdlsym (mod[i], sym);
2379+ /* Check that our malloc is called. */
2380+ malloc_counter = 0;
2381+ struct tls *p = f (&var);
2382+ TEST_VERIFY (malloc_counter != 0);
2383+ printf ("access %s: %s() = %p\n", modname[i], sym, p);
2384+ TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0);
2385+ ++(p->a);
2386+}
2387+
2388+static void *
2389+start (void *arg)
2390+{
2391+ /* The DTV generation is at the last dlopen of mod0 and the
2392+ entry for mod1 is NULL. */
2393+
2394+ open_mod (1); /* Reuse modid of mod1. Uses dynamic TLS. */
2395+
2396+ /* Force the slow path in GNU2 TLS descriptor call. */
2397+ access_mod (1, "apply_tls");
2398+
2399+ return arg;
2400+}
2401+
2402+static int
2403+do_test (void)
2404+{
2405+ if (!IS_SUPPORTED ())
2406+ return EXIT_UNSUPPORTED;
2407+
2408+ open_mod (0);
2409+ open_mod (1);
2410+ open_mod (2);
2411+ close_mod (0);
2412+ close_mod (1); /* Create modid gap at mod1. */
2413+ open_mod (0); /* Reuse modid of mod0, bump generation count. */
2414+
2415+ /* Create a thread where DTV of mod1 is NULL. */
2416+ pthread_t t = xpthread_create (NULL, start, NULL);
2417+ xpthread_join (t);
2418+ return 0;
2419+}
2420+
2421+#include <support/test-driver.c>
2422diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h
2423new file mode 100644
2424index 0000000000..77964a57a3
2425--- /dev/null
2426+++ b/elf/tst-gnu2-tls2.h
2427@@ -0,0 +1,36 @@
2428+/* Test TLSDESC relocation.
2429+ Copyright (C) 2024 Free Software Foundation, Inc.
2430+ This file is part of the GNU C Library.
2431+
2432+ The GNU C Library is free software; you can redistribute it and/or
2433+ modify it under the terms of the GNU Lesser General Public
2434+ License as published by the Free Software Foundation; either
2435+ version 2.1 of the License, or (at your option) any later version.
2436+
2437+ The GNU C Library is distributed in the hope that it will be useful,
2438+ but WITHOUT ANY WARRANTY; without even the implied warranty of
2439+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
2440+ Lesser General Public License for more details.
2441+
2442+ You should have received a copy of the GNU Lesser General Public
2443+ License along with the GNU C Library; if not, see
2444+ <https://www.gnu.org/licenses/>. */
2445+
2446+#include <stdint.h>
2447+
2448+struct tls
2449+{
2450+ int64_t a, b, c, d;
2451+};
2452+
2453+extern struct tls *apply_tls (struct tls *);
2454+
2455+/* An architecture can define them to verify that clobber caller-saved
2456+ registers aren't changed by the implicit TLSDESC call. */
2457+#ifndef BEFORE_TLSDESC_CALL
2458+# define BEFORE_TLSDESC_CALL()
2459+#endif
2460+
2461+#ifndef AFTER_TLSDESC_CALL
2462+# define AFTER_TLSDESC_CALL()
2463+#endif
2464diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c
2465new file mode 100644
2466index 0000000000..45556a0e17
2467--- /dev/null
2468+++ b/elf/tst-gnu2-tls2mod0.c
2469@@ -0,0 +1,31 @@
2470+/* DSO used by tst-gnu2-tls2.
2471+ Copyright (C) 2024 Free Software Foundation, Inc.
2472+ This file is part of the GNU C Library.
2473+
2474+ The GNU C Library is free software; you can redistribute it and/or
2475+ modify it under the terms of the GNU Lesser General Public
2476+ License as published by the Free Software Foundation; either
2477+ version 2.1 of the License, or (at your option) any later version.
2478+
2479+ The GNU C Library is distributed in the hope that it will be useful,
2480+ but WITHOUT ANY WARRANTY; without even the implied warranty of
2481+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
2482+ Lesser General Public License for more details.
2483+
2484+ You should have received a copy of the GNU Lesser General Public
2485+ License along with the GNU C Library; if not, see
2486+ <https://www.gnu.org/licenses/>. */
2487+
2488+#include "tst-gnu2-tls2.h"
2489+
2490+__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
2491+
2492+struct tls *
2493+apply_tls (struct tls *p)
2494+{
2495+ BEFORE_TLSDESC_CALL ();
2496+ tls_var0 = *p;
2497+ struct tls *ret = &tls_var0;
2498+ AFTER_TLSDESC_CALL ();
2499+ return ret;
2500+}
2501diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c
2502new file mode 100644
2503index 0000000000..e10b9dbc0a
2504--- /dev/null
2505+++ b/elf/tst-gnu2-tls2mod1.c
2506@@ -0,0 +1,31 @@
2507+/* DSO used by tst-gnu2-tls2.
2508+ Copyright (C) 2024 Free Software Foundation, Inc.
2509+ This file is part of the GNU C Library.
2510+
2511+ The GNU C Library is free software; you can redistribute it and/or
2512+ modify it under the terms of the GNU Lesser General Public
2513+ License as published by the Free Software Foundation; either
2514+ version 2.1 of the License, or (at your option) any later version.
2515+
2516+ The GNU C Library is distributed in the hope that it will be useful,
2517+ but WITHOUT ANY WARRANTY; without even the implied warranty of
2518+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
2519+ Lesser General Public License for more details.
2520+
2521+ You should have received a copy of the GNU Lesser General Public
2522+ License along with the GNU C Library; if not, see
2523+ <https://www.gnu.org/licenses/>. */
2524+
2525+#include "tst-gnu2-tls2.h"
2526+
2527+__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden")));
2528+
2529+struct tls *
2530+apply_tls (struct tls *p)
2531+{
2532+ BEFORE_TLSDESC_CALL ();
2533+ tls_var1[1] = *p;
2534+ struct tls *ret = &tls_var1[1];
2535+ AFTER_TLSDESC_CALL ();
2536+ return ret;
2537+}
2538diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c
2539new file mode 100644
2540index 0000000000..141af51e55
2541--- /dev/null
2542+++ b/elf/tst-gnu2-tls2mod2.c
2543@@ -0,0 +1,31 @@
2544+/* DSO used by tst-gnu2-tls2.
2545+ Copyright (C) 2024 Free Software Foundation, Inc.
2546+ This file is part of the GNU C Library.
2547+
2548+ The GNU C Library is free software; you can redistribute it and/or
2549+ modify it under the terms of the GNU Lesser General Public
2550+ License as published by the Free Software Foundation; either
2551+ version 2.1 of the License, or (at your option) any later version.
2552+
2553+ The GNU C Library is distributed in the hope that it will be useful,
2554+ but WITHOUT ANY WARRANTY; without even the implied warranty of
2555+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
2556+ Lesser General Public License for more details.
2557+
2558+ You should have received a copy of the GNU Lesser General Public
2559+ License along with the GNU C Library; if not, see
2560+ <https://www.gnu.org/licenses/>. */
2561+
2562+#include "tst-gnu2-tls2.h"
2563+
2564+__thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
2565+
2566+struct tls *
2567+apply_tls (struct tls *p)
2568+{
2569+ BEFORE_TLSDESC_CALL ();
2570+ tls_var2 = *p;
2571+ struct tls *ret = &tls_var2;
2572+ AFTER_TLSDESC_CALL ();
2573+ return ret;
2574+}
2575diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
2576index fc1ef96587..50d74fe6e9 100644
2577--- a/sysdeps/i386/dl-machine.h
2578+++ b/sysdeps/i386/dl-machine.h
2579@@ -347,7 +347,7 @@ and creates an unsatisfiable circular dependency.\n",
2580 {
2581 td->arg = _dl_make_tlsdesc_dynamic
2582 (sym_map, sym->st_value + (ElfW(Word))td->arg);
2583- td->entry = _dl_tlsdesc_dynamic;
2584+ td->entry = GLRO(dl_x86_tlsdesc_dynamic);
2585 }
2586 else
2587 # endif
2588diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h
2589new file mode 100644
2590index 0000000000..3627028577
2591--- /dev/null
2592+++ b/sysdeps/i386/dl-tlsdesc-dynamic.h
2593@@ -0,0 +1,190 @@
2594+/* Thread-local storage handling in the ELF dynamic linker. i386 version.
2595+ Copyright (C) 2004-2024 Free Software Foundation, Inc.
2596+ This file is part of the GNU C Library.
2597+
2598+ The GNU C Library is free software; you can redistribute it and/or
2599+ modify it under the terms of the GNU Lesser General Public
2600+ License as published by the Free Software Foundation; either
2601+ version 2.1 of the License, or (at your option) any later version.
2602+
2603+ The GNU C Library is distributed in the hope that it will be useful,
2604+ but WITHOUT ANY WARRANTY; without even the implied warranty of
2605+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
2606+ Lesser General Public License for more details.
2607+
2608+ You should have received a copy of the GNU Lesser General Public
2609+ License along with the GNU C Library; if not, see
2610+ <https://www.gnu.org/licenses/>. */
2611+
2612+#undef REGISTER_SAVE_AREA
2613+
2614+#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
2615+# error STATE_SAVE_ALIGNMENT must be multiple of 16
2616+#endif
2617+
2618+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
2619+# ifdef USE_FNSAVE
2620+# error USE_FNSAVE shouldn't be defined
2621+# endif
2622+# ifdef USE_FXSAVE
2623+/* Use fxsave to save all registers. */
2624+# define REGISTER_SAVE_AREA 512
2625+# endif
2626+#else
2627+# ifdef USE_FNSAVE
2628+/* Use fnsave to save x87 FPU stack registers. */
2629+# define REGISTER_SAVE_AREA 108
2630+# else
2631+# ifndef USE_FXSAVE
2632+# error USE_FXSAVE must be defined
2633+# endif
2634+/* Use fxsave to save all registers. Add 12 bytes to align the stack
2635+ to 16 bytes. */
2636+# define REGISTER_SAVE_AREA (512 + 12)
2637+# endif
2638+#endif
2639+
2640+ .hidden _dl_tlsdesc_dynamic
2641+ .global _dl_tlsdesc_dynamic
2642+ .type _dl_tlsdesc_dynamic,@function
2643+
2644+ /* This function is used for symbols that need dynamic TLS.
2645+
2646+ %eax points to the TLS descriptor, such that 0(%eax) points to
2647+ _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
2648+ tlsdesc_dynamic_arg object. It must return in %eax the offset
2649+ between the thread pointer and the object denoted by the
2650+ argument, without clobbering any registers.
2651+
2652+ The assembly code that follows is a rendition of the following
2653+ C code, hand-optimized a little bit.
2654+
2655+ptrdiff_t
2656+__attribute__ ((__regparm__ (1)))
2657+_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
2658+{
2659+ struct tlsdesc_dynamic_arg *td = tdp->arg;
2660+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
2661+ if (__builtin_expect (td->gen_count <= dtv[0].counter
2662+ && (dtv[td->tlsinfo.ti_module].pointer.val
2663+ != TLS_DTV_UNALLOCATED),
2664+ 1))
2665+ return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
2666+ - __thread_pointer;
2667+
2668+ return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
2669+}
2670+*/
2671+ cfi_startproc
2672+ .align 16
2673+_dl_tlsdesc_dynamic:
2674+ /* Like all TLS resolvers, preserve call-clobbered registers.
2675+ We need two scratch regs anyway. */
2676+ subl $32, %esp
2677+ cfi_adjust_cfa_offset (32)
2678+ movl %ecx, 20(%esp)
2679+ movl %edx, 24(%esp)
2680+ movl TLSDESC_ARG(%eax), %eax
2681+ movl %gs:DTV_OFFSET, %edx
2682+ movl TLSDESC_GEN_COUNT(%eax), %ecx
2683+ cmpl (%edx), %ecx
2684+ ja 2f
2685+ movl TLSDESC_MODID(%eax), %ecx
2686+ movl (%edx,%ecx,8), %edx
2687+ cmpl $-1, %edx
2688+ je 2f
2689+ movl TLSDESC_MODOFF(%eax), %eax
2690+ addl %edx, %eax
2691+1:
2692+ movl 20(%esp), %ecx
2693+ subl %gs:0, %eax
2694+ movl 24(%esp), %edx
2695+ addl $32, %esp
2696+ cfi_adjust_cfa_offset (-32)
2697+ ret
2698+ .p2align 4,,7
2699+2:
2700+ cfi_adjust_cfa_offset (32)
2701+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
2702+ movl %ebx, -28(%esp)
2703+ movl %esp, %ebx
2704+ cfi_def_cfa_register(%ebx)
2705+ and $-STATE_SAVE_ALIGNMENT, %esp
2706+#endif
2707+#ifdef REGISTER_SAVE_AREA
2708+ subl $REGISTER_SAVE_AREA, %esp
2709+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
2710+ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
2711+# endif
2712+#else
2713+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
2714+# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
2715+# endif
2716+ /* Allocate stack space of the required size to save the state. */
2717+ LOAD_PIC_REG (cx)
2718+ subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
2719+#endif
2720+#ifdef USE_FNSAVE
2721+ fnsave (%esp)
2722+#elif defined USE_FXSAVE
2723+ fxsave (%esp)
2724+#else
2725+ /* Save the argument for ___tls_get_addr in EAX. */
2726+ movl %eax, %ecx
2727+ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
2728+ xorl %edx, %edx
2729+ /* Clear the XSAVE Header. */
2730+# ifdef USE_XSAVE
2731+ movl %edx, (512)(%esp)
2732+ movl %edx, (512 + 4 * 1)(%esp)
2733+ movl %edx, (512 + 4 * 2)(%esp)
2734+ movl %edx, (512 + 4 * 3)(%esp)
2735+# endif
2736+ movl %edx, (512 + 4 * 4)(%esp)
2737+ movl %edx, (512 + 4 * 5)(%esp)
2738+ movl %edx, (512 + 4 * 6)(%esp)
2739+ movl %edx, (512 + 4 * 7)(%esp)
2740+ movl %edx, (512 + 4 * 8)(%esp)
2741+ movl %edx, (512 + 4 * 9)(%esp)
2742+ movl %edx, (512 + 4 * 10)(%esp)
2743+ movl %edx, (512 + 4 * 11)(%esp)
2744+ movl %edx, (512 + 4 * 12)(%esp)
2745+ movl %edx, (512 + 4 * 13)(%esp)
2746+ movl %edx, (512 + 4 * 14)(%esp)
2747+ movl %edx, (512 + 4 * 15)(%esp)
2748+# ifdef USE_XSAVE
2749+ xsave (%esp)
2750+# else
2751+ xsavec (%esp)
2752+# endif
2753+ /* Restore the argument for ___tls_get_addr in EAX. */
2754+ movl %ecx, %eax
2755+#endif
2756+ call HIDDEN_JUMPTARGET (___tls_get_addr)
2757+ /* Get register content back. */
2758+#ifdef USE_FNSAVE
2759+ frstor (%esp)
2760+#elif defined USE_FXSAVE
2761+ fxrstor (%esp)
2762+#else
2763+ /* Save and retore ___tls_get_addr return value stored in EAX. */
2764+ movl %eax, %ecx
2765+ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
2766+ xorl %edx, %edx
2767+ xrstor (%esp)
2768+ movl %ecx, %eax
2769+#endif
2770+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
2771+ mov %ebx, %esp
2772+ cfi_def_cfa_register(%esp)
2773+ movl -28(%esp), %ebx
2774+ cfi_restore(%ebx)
2775+#else
2776+ addl $REGISTER_SAVE_AREA, %esp
2777+ cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
2778+#endif
2779+ jmp 1b
2780+ cfi_endproc
2781+ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
2782+
2783+#undef STATE_SAVE_ALIGNMENT
2784diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
2785index 90d93caa0c..f002feee56 100644
2786--- a/sysdeps/i386/dl-tlsdesc.S
2787+++ b/sysdeps/i386/dl-tlsdesc.S
2788@@ -18,8 +18,27 @@
2789
2790 #include <sysdep.h>
2791 #include <tls.h>
2792+#include <cpu-features-offsets.h>
2793+#include <features-offsets.h>
2794 #include "tlsdesc.h"
2795
2796+#ifndef DL_STACK_ALIGNMENT
2797+/* Due to GCC bug:
2798+
2799+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
2800+
2801+ __tls_get_addr may be called with 4-byte stack alignment. Although
2802+ this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
2803+ that stack will be always aligned at 16 bytes. */
2804+# define DL_STACK_ALIGNMENT 4
2805+#endif
2806+
2807+/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align
2808+ stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */
2809+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
2810+ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
2811+ || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
2812+
2813 .text
2814
2815 /* This function is used to compute the TP offset for symbols in
2816@@ -65,69 +84,35 @@ _dl_tlsdesc_undefweak:
2817 .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
2818
2819 #ifdef SHARED
2820- .hidden _dl_tlsdesc_dynamic
2821- .global _dl_tlsdesc_dynamic
2822- .type _dl_tlsdesc_dynamic,@function
2823-
2824- /* This function is used for symbols that need dynamic TLS.
2825-
2826- %eax points to the TLS descriptor, such that 0(%eax) points to
2827- _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
2828- tlsdesc_dynamic_arg object. It must return in %eax the offset
2829- between the thread pointer and the object denoted by the
2830- argument, without clobbering any registers.
2831-
2832- The assembly code that follows is a rendition of the following
2833- C code, hand-optimized a little bit.
2834-
2835-ptrdiff_t
2836-__attribute__ ((__regparm__ (1)))
2837-_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
2838-{
2839- struct tlsdesc_dynamic_arg *td = tdp->arg;
2840- dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
2841- if (__builtin_expect (td->gen_count <= dtv[0].counter
2842- && (dtv[td->tlsinfo.ti_module].pointer.val
2843- != TLS_DTV_UNALLOCATED),
2844- 1))
2845- return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
2846- - __thread_pointer;
2847-
2848- return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
2849-}
2850-*/
2851- cfi_startproc
2852- .align 16
2853-_dl_tlsdesc_dynamic:
2854- /* Like all TLS resolvers, preserve call-clobbered registers.
2855- We need two scratch regs anyway. */
2856- subl $28, %esp
2857- cfi_adjust_cfa_offset (28)
2858- movl %ecx, 20(%esp)
2859- movl %edx, 24(%esp)
2860- movl TLSDESC_ARG(%eax), %eax
2861- movl %gs:DTV_OFFSET, %edx
2862- movl TLSDESC_GEN_COUNT(%eax), %ecx
2863- cmpl (%edx), %ecx
2864- ja .Lslow
2865- movl TLSDESC_MODID(%eax), %ecx
2866- movl (%edx,%ecx,8), %edx
2867- cmpl $-1, %edx
2868- je .Lslow
2869- movl TLSDESC_MODOFF(%eax), %eax
2870- addl %edx, %eax
2871-.Lret:
2872- movl 20(%esp), %ecx
2873- subl %gs:0, %eax
2874- movl 24(%esp), %edx
2875- addl $28, %esp
2876- cfi_adjust_cfa_offset (-28)
2877- ret
2878- .p2align 4,,7
2879-.Lslow:
2880- cfi_adjust_cfa_offset (28)
2881- call HIDDEN_JUMPTARGET (___tls_get_addr)
2882- jmp .Lret
2883- cfi_endproc
2884- .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
2885+# define USE_FNSAVE
2886+# define MINIMUM_ALIGNMENT 4
2887+# define STATE_SAVE_ALIGNMENT 4
2888+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fnsave
2889+# include "dl-tlsdesc-dynamic.h"
2890+# undef _dl_tlsdesc_dynamic
2891+# undef MINIMUM_ALIGNMENT
2892+# undef USE_FNSAVE
2893+
2894+# define MINIMUM_ALIGNMENT 16
2895+
2896+# define USE_FXSAVE
2897+# define STATE_SAVE_ALIGNMENT 16
2898+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
2899+# include "dl-tlsdesc-dynamic.h"
2900+# undef _dl_tlsdesc_dynamic
2901+# undef USE_FXSAVE
2902+
2903+# define USE_XSAVE
2904+# define STATE_SAVE_ALIGNMENT 64
2905+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave
2906+# include "dl-tlsdesc-dynamic.h"
2907+# undef _dl_tlsdesc_dynamic
2908+# undef USE_XSAVE
2909+
2910+# define USE_XSAVEC
2911+# define STATE_SAVE_ALIGNMENT 64
2912+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec
2913+# include "dl-tlsdesc-dynamic.h"
2914+# undef _dl_tlsdesc_dynamic
2915+# undef USE_XSAVEC
2916 #endif /* SHARED */
2917diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
2918index 4d50b327b5..992aabe43e 100644
2919--- a/sysdeps/x86/Makefile
2920+++ b/sysdeps/x86/Makefile
2921@@ -1,5 +1,5 @@
2922 ifeq ($(subdir),csu)
2923-gen-as-const-headers += cpu-features-offsets.sym
2924+gen-as-const-headers += cpu-features-offsets.sym features-offsets.sym
2925 endif
2926
2927 ifeq ($(subdir),elf)
2928@@ -86,6 +86,11 @@ endif
2929 tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512F
2930 tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV)
2931 tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
2932+
2933+CFLAGS-tst-gnu2-tls2.c += -msse
2934+CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
2935+CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
2936+CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
2937 endif
2938
2939 ifeq ($(subdir),math)
2940diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
2941index 25e6622a79..835113b42f 100644
2942--- a/sysdeps/x86/cpu-features.c
2943+++ b/sysdeps/x86/cpu-features.c
2944@@ -27,8 +27,13 @@
2945 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
2946 attribute_hidden;
2947
2948-#if defined SHARED && defined __x86_64__
2949-# include <dl-plt-rewrite.h>
2950+#if defined SHARED
2951+extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden;
2952+extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden;
2953+extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden;
2954+
2955+# ifdef __x86_64__
2956+# include <dl-plt-rewrite.h>
2957
2958 static void
2959 TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
2960@@ -47,6 +52,15 @@ TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
2961 : plt_rewrite_jmp);
2962 }
2963 }
2964+# else
2965+extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden;
2966+# endif
2967+#endif
2968+
2969+#ifdef __x86_64__
2970+extern void _dl_runtime_resolve_fxsave (void) attribute_hidden;
2971+extern void _dl_runtime_resolve_xsave (void) attribute_hidden;
2972+extern void _dl_runtime_resolve_xsavec (void) attribute_hidden;
2973 #endif
2974
2975 #ifdef __LP64__
2976@@ -1130,6 +1144,44 @@ no_cpuid:
2977 TUNABLE_CALLBACK (set_x86_shstk));
2978 #endif
2979
2980+ if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
2981+ {
2982+ if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
2983+ {
2984+#ifdef __x86_64__
2985+ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec;
2986+#endif
2987+#ifdef SHARED
2988+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec;
2989+#endif
2990+ }
2991+ else
2992+ {
2993+#ifdef __x86_64__
2994+ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave;
2995+#endif
2996+#ifdef SHARED
2997+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave;
2998+#endif
2999+ }
3000+ }
3001+ else
3002+ {
3003+#ifdef __x86_64__
3004+ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
3005+# ifdef SHARED
3006+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
3007+# endif
3008+#else
3009+# ifdef SHARED
3010+ if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
3011+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
3012+ else
3013+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
3014+# endif
3015+#endif
3016+ }
3017+
3018 #ifdef SHARED
3019 # ifdef __x86_64__
3020 TUNABLE_GET (plt_rewrite, tunable_val_t *,
3021diff --git a/sysdeps/x86/dl-procinfo.c b/sysdeps/x86/dl-procinfo.c
3022index ee957b4d70..5920d4b320 100644
3023--- a/sysdeps/x86/dl-procinfo.c
3024+++ b/sysdeps/x86/dl-procinfo.c
3025@@ -86,3 +86,19 @@ PROCINFO_CLASS const char _dl_x86_platforms[4][9]
3026 #else
3027 ,
3028 #endif
3029+
3030+#if defined SHARED && !IS_IN (ldconfig)
3031+# if !defined PROCINFO_DECL
3032+ ._dl_x86_tlsdesc_dynamic
3033+# else
3034+PROCINFO_CLASS void * _dl_x86_tlsdesc_dynamic
3035+# endif
3036+# ifndef PROCINFO_DECL
3037+= NULL
3038+# endif
3039+# ifdef PROCINFO_DECL
3040+;
3041+# else
3042+,
3043+# endif
3044+#endif
3045diff --git a/sysdeps/x86_64/features-offsets.sym b/sysdeps/x86/features-offsets.sym
3046similarity index 89%
3047rename from sysdeps/x86_64/features-offsets.sym
3048rename to sysdeps/x86/features-offsets.sym
3049index 9e4be3393a..77e990c705 100644
3050--- a/sysdeps/x86_64/features-offsets.sym
3051+++ b/sysdeps/x86/features-offsets.sym
3052@@ -3,4 +3,6 @@
3053 #include <ldsodefs.h>
3054
3055 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features)
3056+#ifdef __x86_64__
3057 RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1)
3058+#endif
3059diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
3060index 837fd28734..485cad9c02 100644
3061--- a/sysdeps/x86/sysdep.h
3062+++ b/sysdeps/x86/sysdep.h
3063@@ -70,6 +70,12 @@
3064 | (1 << X86_XSTATE_ZMM_H_ID))
3065 #endif
3066
3067+/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
3068+ Compiler assumes that all registers, including x87 FPU stack registers,
3069+ are unchanged after CALL, except for EFLAGS and RAX/EAX. */
3070+#define TLSDESC_CALL_STATE_SAVE_MASK \
3071+ (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
3072+
3073 /* Constants for bits in __x86_string_control: */
3074
3075 /* Avoid short distance REP MOVSB. */
3076diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c
3077new file mode 100644
3078index 0000000000..de900a423b
3079--- /dev/null
3080+++ b/sysdeps/x86/tst-gnu2-tls2.c
3081@@ -0,0 +1,20 @@
3082+#ifndef __x86_64__
3083+#include <sys/platform/x86.h>
3084+
3085+#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
3086+#endif
3087+
3088+/* Clear XMM0...XMM7 */
3089+#define PREPARE_MALLOC() \
3090+{ \
3091+ asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" ); \
3092+ asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" ); \
3093+ asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" ); \
3094+ asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" ); \
3095+ asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" ); \
3096+ asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" ); \
3097+ asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" ); \
3098+ asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" ); \
3099+}
3100+
3101+#include <elf/tst-gnu2-tls2.c>
3102diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
3103index 90f4ecfd26..e8babc9a4e 100644
3104--- a/sysdeps/x86_64/Makefile
3105+++ b/sysdeps/x86_64/Makefile
3106@@ -10,7 +10,7 @@ LDFLAGS-rtld += -Wl,-z,nomark-plt
3107 endif
3108
3109 ifeq ($(subdir),csu)
3110-gen-as-const-headers += features-offsets.sym link-defines.sym
3111+gen-as-const-headers += link-defines.sym
3112 endif
3113
3114 ifeq ($(subdir),gmon)
3115diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
3116index 6d605d0d32..ff5d45f7cb 100644
3117--- a/sysdeps/x86_64/dl-machine.h
3118+++ b/sysdeps/x86_64/dl-machine.h
3119@@ -71,9 +71,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
3120 int lazy, int profile)
3121 {
3122 Elf64_Addr *got;
3123- extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
3124- extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
3125- extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
3126 extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
3127 extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
3128 extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
3129@@ -96,8 +93,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
3130 /* Identify this shared object. */
3131 *(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
3132
3133- const struct cpu_features* cpu_features = __get_cpu_features ();
3134-
3135 #ifdef SHARED
3136 /* The got[2] entry contains the address of a function which gets
3137 called to get the address of a so far unresolved function and
3138@@ -107,6 +102,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
3139 end in this function. */
3140 if (__glibc_unlikely (profile))
3141 {
3142+ const struct cpu_features* cpu_features = __get_cpu_features ();
3143 if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
3144 *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
3145 else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
3146@@ -126,15 +122,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
3147 /* This function will get called to fix up the GOT entry
3148 indicated by the offset on the stack, and then jump to
3149 the resolved address. */
3150- if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
3151- || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
3152- *(ElfW(Addr) *) (got + 2)
3153- = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
3154- ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
3155- : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
3156- else
3157- *(ElfW(Addr) *) (got + 2)
3158- = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
3159+ *(ElfW(Addr) *) (got + 2)
3160+ = (ElfW(Addr)) GLRO(dl_x86_64_runtime_resolve);
3161 }
3162 }
3163
3164@@ -383,7 +372,7 @@ and creates an unsatisfiable circular dependency.\n",
3165 {
3166 td->arg = _dl_make_tlsdesc_dynamic
3167 (sym_map, sym->st_value + reloc->r_addend);
3168- td->entry = _dl_tlsdesc_dynamic;
3169+ td->entry = GLRO(dl_x86_tlsdesc_dynamic);
3170 }
3171 else
3172 # endif
3173diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
3174index 4d1d790fbb..06637a8154 100644
3175--- a/sysdeps/x86_64/dl-procinfo.c
3176+++ b/sysdeps/x86_64/dl-procinfo.c
3177@@ -41,5 +41,21 @@
3178
3179 #include <sysdeps/x86/dl-procinfo.c>
3180
3181+#if !IS_IN (ldconfig)
3182+# if !defined PROCINFO_DECL && defined SHARED
3183+ ._dl_x86_64_runtime_resolve
3184+# else
3185+PROCINFO_CLASS void * _dl_x86_64_runtime_resolve
3186+# endif
3187+# ifndef PROCINFO_DECL
3188+= NULL
3189+# endif
3190+# if !defined SHARED || defined PROCINFO_DECL
3191+;
3192+# else
3193+,
3194+# endif
3195+#endif
3196+
3197 #undef PROCINFO_DECL
3198 #undef PROCINFO_CLASS
3199diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
3200new file mode 100644
3201index 0000000000..0c2e8d5320
3202--- /dev/null
3203+++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
3204@@ -0,0 +1,166 @@
3205+/* Thread-local storage handling in the ELF dynamic linker. x86_64 version.
3206+ Copyright (C) 2004-2024 Free Software Foundation, Inc.
3207+ This file is part of the GNU C Library.
3208+
3209+ The GNU C Library is free software; you can redistribute it and/or
3210+ modify it under the terms of the GNU Lesser General Public
3211+ License as published by the Free Software Foundation; either
3212+ version 2.1 of the License, or (at your option) any later version.
3213+
3214+ The GNU C Library is distributed in the hope that it will be useful,
3215+ but WITHOUT ANY WARRANTY; without even the implied warranty of
3216+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3217+ Lesser General Public License for more details.
3218+
3219+ You should have received a copy of the GNU Lesser General Public
3220+ License along with the GNU C Library; if not, see
3221+ <https://www.gnu.org/licenses/>. */
3222+
3223+#ifndef SECTION
3224+# define SECTION(p) p
3225+#endif
3226+
3227+#undef REGISTER_SAVE_AREA
3228+#undef LOCAL_STORAGE_AREA
3229+#undef BASE
3230+
3231+#include "dl-trampoline-state.h"
3232+
3233+ .section SECTION(.text),"ax",@progbits
3234+
3235+ .hidden _dl_tlsdesc_dynamic
3236+ .global _dl_tlsdesc_dynamic
3237+ .type _dl_tlsdesc_dynamic,@function
3238+
3239+ /* %rax points to the TLS descriptor, such that 0(%rax) points to
3240+ _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
3241+ tlsdesc_dynamic_arg object. It must return in %rax the offset
3242+ between the thread pointer and the object denoted by the
3243+ argument, without clobbering any registers.
3244+
3245+ The assembly code that follows is a rendition of the following
3246+ C code, hand-optimized a little bit.
3247+
3248+ptrdiff_t
3249+_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
3250+{
3251+ struct tlsdesc_dynamic_arg *td = tdp->arg;
3252+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
3253+ if (__builtin_expect (td->gen_count <= dtv[0].counter
3254+ && (dtv[td->tlsinfo.ti_module].pointer.val
3255+ != TLS_DTV_UNALLOCATED),
3256+ 1))
3257+ return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
3258+ - __thread_pointer;
3259+
3260+ return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
3261+}
3262+*/
3263+ cfi_startproc
3264+ .align 16
3265+_dl_tlsdesc_dynamic:
3266+ _CET_ENDBR
3267+ /* Preserve call-clobbered registers that we modify.
3268+ We need two scratch regs anyway. */
3269+ movq %rsi, -16(%rsp)
3270+ mov %fs:DTV_OFFSET, %RSI_LP
3271+ movq %rdi, -8(%rsp)
3272+ movq TLSDESC_ARG(%rax), %rdi
3273+ movq (%rsi), %rax
3274+ cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
3275+ ja 2f
3276+ movq TLSDESC_MODID(%rdi), %rax
3277+ salq $4, %rax
3278+ movq (%rax,%rsi), %rax
3279+ cmpq $-1, %rax
3280+ je 2f
3281+ addq TLSDESC_MODOFF(%rdi), %rax
3282+1:
3283+ movq -16(%rsp), %rsi
3284+ sub %fs:0, %RAX_LP
3285+ movq -8(%rsp), %rdi
3286+ ret
3287+2:
3288+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
3289+ movq %rbx, -24(%rsp)
3290+ mov %RSP_LP, %RBX_LP
3291+ cfi_def_cfa_register(%rbx)
3292+ and $-STATE_SAVE_ALIGNMENT, %RSP_LP
3293+#endif
3294+#ifdef REGISTER_SAVE_AREA
3295+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
3296+ /* STATE_SAVE_OFFSET has space for 8 integer registers. But we
3297+ need space for RCX, RDX, RSI, RDI, R8, R9, R10 and R11, plus
3298+ RBX above. */
3299+ sub $(REGISTER_SAVE_AREA + STATE_SAVE_ALIGNMENT), %RSP_LP
3300+# else
3301+ sub $REGISTER_SAVE_AREA, %RSP_LP
3302+ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
3303+# endif
3304+#else
3305+ /* Allocate stack space of the required size to save the state. */
3306+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
3307+#endif
3308+ /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
3309+ r10 and r11. */
3310+ movq %rcx, REGISTER_SAVE_RCX(%rsp)
3311+ movq %rdx, REGISTER_SAVE_RDX(%rsp)
3312+ movq %r8, REGISTER_SAVE_R8(%rsp)
3313+ movq %r9, REGISTER_SAVE_R9(%rsp)
3314+ movq %r10, REGISTER_SAVE_R10(%rsp)
3315+ movq %r11, REGISTER_SAVE_R11(%rsp)
3316+#ifdef USE_FXSAVE
3317+ fxsave STATE_SAVE_OFFSET(%rsp)
3318+#else
3319+ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
3320+ xorl %edx, %edx
3321+ /* Clear the XSAVE Header. */
3322+# ifdef USE_XSAVE
3323+ movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
3324+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
3325+# endif
3326+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
3327+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
3328+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
3329+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
3330+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
3331+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
3332+# ifdef USE_XSAVE
3333+ xsave STATE_SAVE_OFFSET(%rsp)
3334+# else
3335+ xsavec STATE_SAVE_OFFSET(%rsp)
3336+# endif
3337+#endif
3338+ /* %rdi already points to the tlsinfo data structure. */
3339+ call HIDDEN_JUMPTARGET (__tls_get_addr)
3340+ # Get register content back.
3341+#ifdef USE_FXSAVE
3342+ fxrstor STATE_SAVE_OFFSET(%rsp)
3343+#else
3344+ /* Save and retore __tls_get_addr return value stored in RAX. */
3345+ mov %RAX_LP, %RCX_LP
3346+ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
3347+ xorl %edx, %edx
3348+ xrstor STATE_SAVE_OFFSET(%rsp)
3349+ mov %RCX_LP, %RAX_LP
3350+#endif
3351+ movq REGISTER_SAVE_R11(%rsp), %r11
3352+ movq REGISTER_SAVE_R10(%rsp), %r10
3353+ movq REGISTER_SAVE_R9(%rsp), %r9
3354+ movq REGISTER_SAVE_R8(%rsp), %r8
3355+ movq REGISTER_SAVE_RDX(%rsp), %rdx
3356+ movq REGISTER_SAVE_RCX(%rsp), %rcx
3357+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
3358+ mov %RBX_LP, %RSP_LP
3359+ cfi_def_cfa_register(%rsp)
3360+ movq -24(%rsp), %rbx
3361+ cfi_restore(%rbx)
3362+#else
3363+ add $REGISTER_SAVE_AREA, %RSP_LP
3364+ cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
3365+#endif
3366+ jmp 1b
3367+ cfi_endproc
3368+ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
3369+
3370+#undef STATE_SAVE_ALIGNMENT
3371diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
3372index f748af2ece..ea69f5223a 100644
3373--- a/sysdeps/x86_64/dl-tlsdesc.S
3374+++ b/sysdeps/x86_64/dl-tlsdesc.S
3375@@ -18,7 +18,19 @@
3376
3377 #include <sysdep.h>
3378 #include <tls.h>
3379+#include <cpu-features-offsets.h>
3380+#include <features-offsets.h>
3381 #include "tlsdesc.h"
3382+#include "dl-trampoline-save.h"
3383+
3384+/* Area on stack to save and restore registers used for parameter
3385+ passing when calling _dl_tlsdesc_dynamic. */
3386+#define REGISTER_SAVE_RCX 0
3387+#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
3388+#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDX + 8)
3389+#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
3390+#define REGISTER_SAVE_R10 (REGISTER_SAVE_R9 + 8)
3391+#define REGISTER_SAVE_R11 (REGISTER_SAVE_R10 + 8)
3392
3393 .text
3394
3395@@ -67,80 +79,24 @@ _dl_tlsdesc_undefweak:
3396 .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
3397
3398 #ifdef SHARED
3399- .hidden _dl_tlsdesc_dynamic
3400- .global _dl_tlsdesc_dynamic
3401- .type _dl_tlsdesc_dynamic,@function
3402-
3403- /* %rax points to the TLS descriptor, such that 0(%rax) points to
3404- _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
3405- tlsdesc_dynamic_arg object. It must return in %rax the offset
3406- between the thread pointer and the object denoted by the
3407- argument, without clobbering any registers.
3408-
3409- The assembly code that follows is a rendition of the following
3410- C code, hand-optimized a little bit.
3411-
3412-ptrdiff_t
3413-_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
3414-{
3415- struct tlsdesc_dynamic_arg *td = tdp->arg;
3416- dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
3417- if (__builtin_expect (td->gen_count <= dtv[0].counter
3418- && (dtv[td->tlsinfo.ti_module].pointer.val
3419- != TLS_DTV_UNALLOCATED),
3420- 1))
3421- return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
3422- - __thread_pointer;
3423-
3424- return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
3425-}
3426-*/
3427- cfi_startproc
3428- .align 16
3429-_dl_tlsdesc_dynamic:
3430- _CET_ENDBR
3431- /* Preserve call-clobbered registers that we modify.
3432- We need two scratch regs anyway. */
3433- movq %rsi, -16(%rsp)
3434- mov %fs:DTV_OFFSET, %RSI_LP
3435- movq %rdi, -8(%rsp)
3436- movq TLSDESC_ARG(%rax), %rdi
3437- movq (%rsi), %rax
3438- cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
3439- ja .Lslow
3440- movq TLSDESC_MODID(%rdi), %rax
3441- salq $4, %rax
3442- movq (%rax,%rsi), %rax
3443- cmpq $-1, %rax
3444- je .Lslow
3445- addq TLSDESC_MODOFF(%rdi), %rax
3446-.Lret:
3447- movq -16(%rsp), %rsi
3448- sub %fs:0, %RAX_LP
3449- movq -8(%rsp), %rdi
3450- ret
3451-.Lslow:
3452- /* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
3453- r10 and r11. Also, align the stack, that's off by 8 bytes. */
3454- subq $72, %rsp
3455- cfi_adjust_cfa_offset (72)
3456- movq %rdx, 8(%rsp)
3457- movq %rcx, 16(%rsp)
3458- movq %r8, 24(%rsp)
3459- movq %r9, 32(%rsp)
3460- movq %r10, 40(%rsp)
3461- movq %r11, 48(%rsp)
3462- /* %rdi already points to the tlsinfo data structure. */
3463- call HIDDEN_JUMPTARGET (__tls_get_addr)
3464- movq 8(%rsp), %rdx
3465- movq 16(%rsp), %rcx
3466- movq 24(%rsp), %r8
3467- movq 32(%rsp), %r9
3468- movq 40(%rsp), %r10
3469- movq 48(%rsp), %r11
3470- addq $72, %rsp
3471- cfi_adjust_cfa_offset (-72)
3472- jmp .Lret
3473- cfi_endproc
3474- .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
3475+# define USE_FXSAVE
3476+# define STATE_SAVE_ALIGNMENT 16
3477+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
3478+# include "dl-tlsdesc-dynamic.h"
3479+# undef _dl_tlsdesc_dynamic
3480+# undef USE_FXSAVE
3481+
3482+# define USE_XSAVE
3483+# define STATE_SAVE_ALIGNMENT 64
3484+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave
3485+# include "dl-tlsdesc-dynamic.h"
3486+# undef _dl_tlsdesc_dynamic
3487+# undef USE_XSAVE
3488+
3489+# define USE_XSAVEC
3490+# define STATE_SAVE_ALIGNMENT 64
3491+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec
3492+# include "dl-tlsdesc-dynamic.h"
3493+# undef _dl_tlsdesc_dynamic
3494+# undef USE_XSAVEC
3495 #endif /* SHARED */
3496diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h
3497new file mode 100644
3498index 0000000000..84eac4a8ac
3499--- /dev/null
3500+++ b/sysdeps/x86_64/dl-trampoline-save.h
3501@@ -0,0 +1,34 @@
3502+/* x86-64 PLT trampoline register save macros.
3503+ Copyright (C) 2024 Free Software Foundation, Inc.
3504+ This file is part of the GNU C Library.
3505+
3506+ The GNU C Library is free software; you can redistribute it and/or
3507+ modify it under the terms of the GNU Lesser General Public
3508+ License as published by the Free Software Foundation; either
3509+ version 2.1 of the License, or (at your option) any later version.
3510+
3511+ The GNU C Library is distributed in the hope that it will be useful,
3512+ but WITHOUT ANY WARRANTY; without even the implied warranty of
3513+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3514+ Lesser General Public License for more details.
3515+
3516+ You should have received a copy of the GNU Lesser General Public
3517+ License along with the GNU C Library; if not, see
3518+ <https://www.gnu.org/licenses/>. */
3519+
3520+#ifndef DL_STACK_ALIGNMENT
3521+/* Due to GCC bug:
3522+
3523+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
3524+
3525+ __tls_get_addr may be called with 8-byte stack alignment. Although
3526+ this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
3527+ that stack will be always aligned at 16 bytes. */
3528+# define DL_STACK_ALIGNMENT 8
3529+#endif
3530+
3531+/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
3532+ stack to 16 bytes before calling _dl_fixup. */
3533+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
3534+ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
3535+ || 16 > DL_STACK_ALIGNMENT)
3536diff --git a/sysdeps/x86_64/dl-trampoline-state.h b/sysdeps/x86_64/dl-trampoline-state.h
3537new file mode 100644
3538index 0000000000..575f120797
3539--- /dev/null
3540+++ b/sysdeps/x86_64/dl-trampoline-state.h
3541@@ -0,0 +1,51 @@
3542+/* x86-64 PLT dl-trampoline state macros.
3543+ Copyright (C) 2024 Free Software Foundation, Inc.
3544+ This file is part of the GNU C Library.
3545+
3546+ The GNU C Library is free software; you can redistribute it and/or
3547+ modify it under the terms of the GNU Lesser General Public
3548+ License as published by the Free Software Foundation; either
3549+ version 2.1 of the License, or (at your option) any later version.
3550+
3551+ The GNU C Library is distributed in the hope that it will be useful,
3552+ but WITHOUT ANY WARRANTY; without even the implied warranty of
3553+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3554+ Lesser General Public License for more details.
3555+
3556+ You should have received a copy of the GNU Lesser General Public
3557+ License along with the GNU C Library; if not, see
3558+ <https://www.gnu.org/licenses/>. */
3559+
3560+#if (STATE_SAVE_ALIGNMENT % 16) != 0
3561+# error STATE_SAVE_ALIGNMENT must be multiple of 16
3562+#endif
3563+
3564+#if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
3565+# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
3566+#endif
3567+
3568+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
3569+/* Local stack area before jumping to function address: RBX. */
3570+# define LOCAL_STORAGE_AREA 8
3571+# define BASE rbx
3572+# ifdef USE_FXSAVE
3573+/* Use fxsave to save XMM registers. */
3574+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
3575+# if (REGISTER_SAVE_AREA % 16) != 0
3576+# error REGISTER_SAVE_AREA must be multiple of 16
3577+# endif
3578+# endif
3579+#else
3580+# ifndef USE_FXSAVE
3581+# error USE_FXSAVE must be defined
3582+# endif
3583+/* Use fxsave to save XMM registers. */
3584+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
3585+/* Local stack area before jumping to function address: All saved
3586+ registers. */
3587+# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
3588+# define BASE rsp
3589+# if (REGISTER_SAVE_AREA % 16) != 8
3590+# error REGISTER_SAVE_AREA must be odd multiple of 8
3591+# endif
3592+#endif
3593diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
3594index b2e7e0f69b..87c5137837 100644
3595--- a/sysdeps/x86_64/dl-trampoline.S
3596+++ b/sysdeps/x86_64/dl-trampoline.S
3597@@ -22,25 +22,7 @@
3598 #include <features-offsets.h>
3599 #include <link-defines.h>
3600 #include <isa-level.h>
3601-
3602-#ifndef DL_STACK_ALIGNMENT
3603-/* Due to GCC bug:
3604-
3605- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
3606-
3607- __tls_get_addr may be called with 8-byte stack alignment. Although
3608- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
3609- that stack will be always aligned at 16 bytes. We use unaligned
3610- 16-byte move to load and store SSE registers, which has no penalty
3611- on modern processors if stack is 16-byte aligned. */
3612-# define DL_STACK_ALIGNMENT 8
3613-#endif
3614-
3615-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
3616- stack to 16 bytes before calling _dl_fixup. */
3617-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
3618- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
3619- || 16 > DL_STACK_ALIGNMENT)
3620+#include "dl-trampoline-save.h"
3621
3622 /* Area on stack to save and restore registers used for parameter
3623 passing when calling _dl_fixup. */
3624diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
3625index f55c6ea040..d9ccfb40d4 100644
3626--- a/sysdeps/x86_64/dl-trampoline.h
3627+++ b/sysdeps/x86_64/dl-trampoline.h
3628@@ -27,39 +27,7 @@
3629 # undef LOCAL_STORAGE_AREA
3630 # undef BASE
3631
3632-# if (STATE_SAVE_ALIGNMENT % 16) != 0
3633-# error STATE_SAVE_ALIGNMENT must be multiple of 16
3634-# endif
3635-
3636-# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
3637-# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
3638-# endif
3639-
3640-# if DL_RUNTIME_RESOLVE_REALIGN_STACK
3641-/* Local stack area before jumping to function address: RBX. */
3642-# define LOCAL_STORAGE_AREA 8
3643-# define BASE rbx
3644-# ifdef USE_FXSAVE
3645-/* Use fxsave to save XMM registers. */
3646-# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
3647-# if (REGISTER_SAVE_AREA % 16) != 0
3648-# error REGISTER_SAVE_AREA must be multiple of 16
3649-# endif
3650-# endif
3651-# else
3652-# ifndef USE_FXSAVE
3653-# error USE_FXSAVE must be defined
3654-# endif
3655-/* Use fxsave to save XMM registers. */
3656-# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
3657-/* Local stack area before jumping to function address: All saved
3658- registers. */
3659-# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
3660-# define BASE rsp
3661-# if (REGISTER_SAVE_AREA % 16) != 8
3662-# error REGISTER_SAVE_AREA must be odd multiple of 8
3663-# endif
3664-# endif
3665+# include "dl-trampoline-state.h"
3666
3667 .globl _dl_runtime_resolve
3668 .hidden _dl_runtime_resolve
3669
3670commit 853e915fdd6ae6c5f1a7a68d2594ec8dbfef1286
3671Author: H.J. Lu <hjl.tools@gmail.com>
3672Date: Wed Feb 28 12:08:03 2024 -0800
3673
3674 x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
3675
3676 _dl_tlsdesc_dynamic should also preserve AMX registers which are
3677 caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
3678 to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size
3679 and save it in xsave_state_full_size which is only used by
3680 _dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes
3681 the AMX part of BZ #31372. Tested on AMX processor.
3682
3683 AMX test is enabled only for compilers with the fix for
3684
3685 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098
3686
3687 GCC 14 and GCC 11/12/13 branches have the bug fix.
3688 Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
3689
3690 (cherry picked from commit 9b7091415af47082664717210ac49d51551456ab)
3691
3692diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile
3693index 4223feb95f..9a1e7aa646 100644
3694--- a/sysdeps/unix/sysv/linux/x86_64/Makefile
3695+++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
3696@@ -63,6 +63,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
3697 $(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
3698 cp $< $@
3699 endif
3700+
3701+ifeq (yes,$(have-mamx-tile))
3702+tests += \
3703+ tst-gnu2-tls2-amx \
3704+# tests
3705+
3706+modules-names += \
3707+ tst-gnu2-tls2-amx-mod0 \
3708+ tst-gnu2-tls2-amx-mod1 \
3709+ tst-gnu2-tls2-amx-mod2 \
3710+# modules-names
3711+
3712+$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
3713+$(objpfx)tst-gnu2-tls2-amx.out: \
3714+ $(objpfx)tst-gnu2-tls2-amx-mod0.so \
3715+ $(objpfx)tst-gnu2-tls2-amx-mod1.so \
3716+ $(objpfx)tst-gnu2-tls2-amx-mod2.so
3717+$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
3718+$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
3719+$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
3720+
3721+CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
3722+CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
3723+CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
3724+CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
3725+endif
3726+
3727 endif # $(subdir) == elf
3728
3729 ifneq ($(enable-cet),no)
3730diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
3731index 2f511321ad..ef4631bf4b 100644
3732--- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
3733+++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
3734@@ -20,3 +20,8 @@
3735 # define ARCH_SHSTK_SHSTK 0x1
3736 # define ARCH_SHSTK_WRSS 0x2
3737 #endif
3738+
3739+#ifndef ARCH_GET_XCOMP_PERM
3740+# define ARCH_GET_XCOMP_PERM 0x1022
3741+# define ARCH_REQ_XCOMP_PERM 0x1023
3742+#endif
3743diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
3744new file mode 100644
3745index 0000000000..2e0c7b91b7
3746--- /dev/null
3747+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
3748@@ -0,0 +1,2 @@
3749+#include "tst-gnu2-tls2-amx.h"
3750+#include <tst-gnu2-tls2mod0.c>
3751diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
3752new file mode 100644
3753index 0000000000..b8a8ccf1c1
3754--- /dev/null
3755+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
3756@@ -0,0 +1,2 @@
3757+#include "tst-gnu2-tls2-amx.h"
3758+#include <tst-gnu2-tls2mod1.c>
3759diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
3760new file mode 100644
3761index 0000000000..cdf4a8f363
3762--- /dev/null
3763+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
3764@@ -0,0 +1,2 @@
3765+#include "tst-gnu2-tls2-amx.h"
3766+#include <tst-gnu2-tls2mod2.c>
3767diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
3768new file mode 100644
3769index 0000000000..ae4dd82556
3770--- /dev/null
3771+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
3772@@ -0,0 +1,83 @@
3773+/* Test TLSDESC relocation with AMX.
3774+ Copyright (C) 2024 Free Software Foundation, Inc.
3775+ This file is part of the GNU C Library.
3776+
3777+ The GNU C Library is free software; you can redistribute it and/or
3778+ modify it under the terms of the GNU Lesser General Public
3779+ License as published by the Free Software Foundation; either
3780+ version 2.1 of the License, or (at your option) any later version.
3781+
3782+ The GNU C Library is distributed in the hope that it will be useful,
3783+ but WITHOUT ANY WARRANTY; without even the implied warranty of
3784+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3785+ Lesser General Public License for more details.
3786+
3787+ You should have received a copy of the GNU Lesser General Public
3788+ License along with the GNU C Library; if not, see
3789+ <http://www.gnu.org/licenses/>. */
3790+
3791+#include <stdbool.h>
3792+#include <asm/prctl.h>
3793+#include <support/check.h>
3794+#include "tst-gnu2-tls2-amx.h"
3795+
3796+extern int arch_prctl (int, ...);
3797+
3798+#define X86_XSTATE_TILECFG_ID 17
3799+#define X86_XSTATE_TILEDATA_ID 18
3800+
3801+/* Initialize tile config. */
3802+__attribute__ ((noinline, noclone))
3803+static void
3804+init_tile_config (__tilecfg *tileinfo)
3805+{
3806+ int i;
3807+ tileinfo->palette_id = 1;
3808+ tileinfo->start_row = 0;
3809+
3810+ tileinfo->colsb[0] = MAX_ROWS;
3811+ tileinfo->rows[0] = MAX_ROWS;
3812+
3813+ for (i = 1; i < 4; ++i)
3814+ {
3815+ tileinfo->colsb[i] = MAX_COLS;
3816+ tileinfo->rows[i] = MAX_ROWS;
3817+ }
3818+
3819+ _tile_loadconfig (tileinfo);
3820+}
3821+
3822+static bool
3823+enable_amx (void)
3824+{
3825+ uint64_t bitmask;
3826+ if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
3827+ return false;
3828+
3829+ if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
3830+ return false;
3831+
3832+ if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
3833+ return false;
3834+
3835+ /* Load tile configuration. */
3836+ __tilecfg tile_data = { 0 };
3837+ init_tile_config (&tile_data);
3838+
3839+ return true;
3840+}
3841+
3842+/* An architecture can define it to clobber caller-saved registers in
3843+ malloc below to verify that the implicit TLSDESC call won't change
3844+ caller-saved registers. */
3845+static void
3846+clear_tile_register (void)
3847+{
3848+ _tile_zero (2);
3849+}
3850+
3851+#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
3852+#define IS_SUPPORTED() enable_amx ()
3853+#define PREPARE_MALLOC() clear_tile_register ()
3854+
3855+#include <elf/tst-gnu2-tls2.c>
3856diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
3857new file mode 100644
3858index 0000000000..1845a3caba
3859--- /dev/null
3860+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
3861@@ -0,0 +1,63 @@
3862+/* Test TLSDESC relocation with AMX.
3863+ Copyright (C) 2024 Free Software Foundation, Inc.
3864+ This file is part of the GNU C Library.
3865+
3866+ The GNU C Library is free software; you can redistribute it and/or
3867+ modify it under the terms of the GNU Lesser General Public
3868+ License as published by the Free Software Foundation; either
3869+ version 2.1 of the License, or (at your option) any later version.
3870+
3871+ The GNU C Library is distributed in the hope that it will be useful,
3872+ but WITHOUT ANY WARRANTY; without even the implied warranty of
3873+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3874+ Lesser General Public License for more details.
3875+
3876+ You should have received a copy of the GNU Lesser General Public
3877+ License along with the GNU C Library; if not, see
3878+ <http://www.gnu.org/licenses/>. */
3879+
3880+#include <stdint.h>
3881+#include <string.h>
3882+#include <x86intrin.h>
3883+#include <support/check.h>
3884+
3885+#define MAX_ROWS 16
3886+#define MAX_COLS 64
3887+#define MAX 1024
3888+#define STRIDE 64
3889+
3890+typedef struct __tile_config
3891+{
3892+ uint8_t palette_id;
3893+ uint8_t start_row;
3894+ uint8_t reserved_0[14];
3895+ uint16_t colsb[16];
3896+ uint8_t rows[16];
3897+} __tilecfg __attribute__ ((aligned (64)));
3898+
3899+/* Initialize int8_t buffer */
3900+static inline void
3901+init_buffer (int8_t *buf, int8_t value)
3902+{
3903+ int rows, colsb, i, j;
3904+ rows = MAX_ROWS;
3905+ colsb = MAX_COLS;
3906+
3907+ for (i = 0; i < rows; i++)
3908+ for (j = 0; j < colsb; j++)
3909+ buf[i * colsb + j] = value;
3910+}
3911+
3912+#define BEFORE_TLSDESC_CALL() \
3913+ int8_t src[MAX]; \
3914+ int8_t res[MAX]; \
3915+ /* Initialize src with data */ \
3916+ init_buffer (src, 2); \
3917+ /* Load tile rows from memory. */ \
3918+ _tile_loadd (2, src, STRIDE);
3919+
3920+#define AFTER_TLSDESC_CALL() \
3921+ /* Store the tile data to memory. */ \
3922+ _tile_stored (2, res, STRIDE); \
3923+ _tile_release (); \
3924+ TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
3925diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
3926index 6a8fd29813..21fc88d651 100644
3927--- a/sysdeps/x86/cpu-features-offsets.sym
3928+++ b/sysdeps/x86/cpu-features-offsets.sym
3929@@ -3,3 +3,4 @@
3930 #include <ldsodefs.h>
3931
3932 XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size)
3933+XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
3934diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
3935index 835113b42f..d71e8d3d2e 100644
3936--- a/sysdeps/x86/cpu-features.c
3937+++ b/sysdeps/x86/cpu-features.c
3938@@ -307,6 +307,8 @@ update_active (struct cpu_features *cpu_features)
3939 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
3940 if (ebx != 0)
3941 {
3942+ /* NB: On AMX capable processors, ebx always includes AMX
3943+ states. */
3944 unsigned int xsave_state_full_size
3945 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
3946
3947@@ -320,6 +322,11 @@ update_active (struct cpu_features *cpu_features)
3948 {
3949 unsigned int xstate_comp_offsets[32];
3950 unsigned int xstate_comp_sizes[32];
3951+#ifdef __x86_64__
3952+ unsigned int xstate_amx_comp_offsets[32];
3953+ unsigned int xstate_amx_comp_sizes[32];
3954+ unsigned int amx_ecx;
3955+#endif
3956 unsigned int i;
3957
3958 xstate_comp_offsets[0] = 0;
3959@@ -327,16 +334,39 @@ update_active (struct cpu_features *cpu_features)
3960 xstate_comp_offsets[2] = 576;
3961 xstate_comp_sizes[0] = 160;
3962 xstate_comp_sizes[1] = 256;
3963+#ifdef __x86_64__
3964+ xstate_amx_comp_offsets[0] = 0;
3965+ xstate_amx_comp_offsets[1] = 160;
3966+ xstate_amx_comp_offsets[2] = 576;
3967+ xstate_amx_comp_sizes[0] = 160;
3968+ xstate_amx_comp_sizes[1] = 256;
3969+#endif
3970
3971 for (i = 2; i < 32; i++)
3972 {
3973- if ((STATE_SAVE_MASK & (1 << i)) != 0)
3974+ if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
3975 {
3976 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
3977- xstate_comp_sizes[i] = eax;
3978+#ifdef __x86_64__
3979+ /* Include this in xsave_state_full_size. */
3980+ amx_ecx = ecx;
3981+ xstate_amx_comp_sizes[i] = eax;
3982+ if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
3983+ {
3984+ /* Exclude this from xsave_state_size. */
3985+ ecx = 0;
3986+ xstate_comp_sizes[i] = 0;
3987+ }
3988+ else
3989+#endif
3990+ xstate_comp_sizes[i] = eax;
3991 }
3992 else
3993 {
3994+#ifdef __x86_64__
3995+ amx_ecx = 0;
3996+ xstate_amx_comp_sizes[i] = 0;
3997+#endif
3998 ecx = 0;
3999 xstate_comp_sizes[i] = 0;
4000 }
4001@@ -349,6 +379,15 @@ update_active (struct cpu_features *cpu_features)
4002 if ((ecx & (1 << 1)) != 0)
4003 xstate_comp_offsets[i]
4004 = ALIGN_UP (xstate_comp_offsets[i], 64);
4005+#ifdef __x86_64__
4006+ xstate_amx_comp_offsets[i]
4007+ = (xstate_amx_comp_offsets[i - 1]
4008+ + xstate_amx_comp_sizes[i - 1]);
4009+ if ((amx_ecx & (1 << 1)) != 0)
4010+ xstate_amx_comp_offsets[i]
4011+ = ALIGN_UP (xstate_amx_comp_offsets[i],
4012+ 64);
4013+#endif
4014 }
4015 }
4016
4017@@ -357,6 +396,18 @@ update_active (struct cpu_features *cpu_features)
4018 = xstate_comp_offsets[31] + xstate_comp_sizes[31];
4019 if (size)
4020 {
4021+#ifdef __x86_64__
4022+ unsigned int amx_size
4023+ = (xstate_amx_comp_offsets[31]
4024+ + xstate_amx_comp_sizes[31]);
4025+ amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
4026+ 64);
4027+ /* Set xsave_state_full_size to the compact AMX
4028+ state size for XSAVEC. NB: xsave_state_full_size
4029+ is only used in _dl_tlsdesc_dynamic_xsave and
4030+ _dl_tlsdesc_dynamic_xsavec. */
4031+ cpu_features->xsave_state_full_size = amx_size;
4032+#endif
4033 cpu_features->xsave_state_size
4034 = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
4035 CPU_FEATURE_SET (cpu_features, XSAVEC);
4036diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
4037index b9bf3115b6..cd7bd27cf3 100644
4038--- a/sysdeps/x86/include/cpu-features.h
4039+++ b/sysdeps/x86/include/cpu-features.h
4040@@ -934,6 +934,8 @@ struct cpu_features
4041 /* The full state size for XSAVE when XSAVEC is disabled by
4042
4043 GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
4044+
4045+ and the AMX state size when XSAVEC is available.
4046 */
4047 unsigned int xsave_state_full_size;
4048 /* Data cache size for use in memory and string routines, typically
4049diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
4050index 485cad9c02..db8e576e91 100644
4051--- a/sysdeps/x86/sysdep.h
4052+++ b/sysdeps/x86/sysdep.h
4053@@ -56,6 +56,14 @@
4054 | (1 << X86_XSTATE_ZMM_H_ID) \
4055 | (1 << X86_XSTATE_ZMM_ID) \
4056 | (1 << X86_XSTATE_APX_F_ID))
4057+
4058+/* AMX state mask. */
4059+# define AMX_STATE_SAVE_MASK \
4060+ ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
4061+
4062+/* States to be included in xsave_state_full_size. */
4063+# define FULL_STATE_SAVE_MASK \
4064+ (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
4065 #else
4066 /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
4067 doesn't have red-zone, use 0 here. */
4068@@ -68,13 +76,17 @@
4069 | (1 << X86_XSTATE_BNDREGS_ID) \
4070 | (1 << X86_XSTATE_K_ID) \
4071 | (1 << X86_XSTATE_ZMM_H_ID))
4072+
4073+/* States to be included in xsave_state_size. */
4074+# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
4075 #endif
4076
4077 /* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
4078- Compiler assumes that all registers, including x87 FPU stack registers,
4079- are unchanged after CALL, except for EFLAGS and RAX/EAX. */
4080+ Compiler assumes that all registers, including AMX and x87 FPU
4081+ stack registers, are unchanged after CALL, except for EFLAGS and
4082+ RAX/EAX. */
4083 #define TLSDESC_CALL_STATE_SAVE_MASK \
4084- (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
4085+ (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
4086
4087 /* Constants for bits in __x86_string_control: */
4088
4089diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
4090index 418cc4a9b8..04a534fa12 100755
4091--- a/sysdeps/x86_64/configure
4092+++ b/sysdeps/x86_64/configure
4093@@ -134,6 +134,34 @@ fi
4094 config_vars="$config_vars
4095 enable-cet = $enable_cet"
4096
4097+# Check if -mamx-tile works properly.
4098+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5
4099+printf %s "checking whether -mamx-tile works properly... " >&6; }
4100+if test ${libc_cv_x86_have_amx_tile+y}
4101+then :
4102+ printf %s "(cached) " >&6
4103+else $as_nop
4104+ cat > conftest.c <<EOF
4105+#include <x86intrin.h>
4106+EOF
4107+ libc_cv_x86_have_amx_tile=no
4108+ if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i'
4109+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
4110+ (eval $ac_try) 2>&5
4111+ ac_status=$?
4112+ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
4113+ test $ac_status = 0; }; }; then
4114+ if grep -q __builtin_ia32_ldtilecfg conftest.i; then
4115+ libc_cv_x86_have_amx_tile=yes
4116+ fi
4117+ fi
4118+ rm -rf conftest*
4119+fi
4120+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5
4121+printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
4122+config_vars="$config_vars
4123+have-mamx-tile = $libc_cv_x86_have_amx_tile"
4124+
4125 test -n "$critic_missing" && as_fn_error $? "
4126 *** $critic_missing" "$LINENO" 5
4127
4128diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
4129index d1f803c02e..c714c47351 100644
4130--- a/sysdeps/x86_64/configure.ac
4131+++ b/sysdeps/x86_64/configure.ac
4132@@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
4133 fi
4134 LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
4135
4136+# Check if -mamx-tile works properly.
4137+AC_CACHE_CHECK(whether -mamx-tile works properly,
4138+ libc_cv_x86_have_amx_tile, [dnl
4139+cat > conftest.c <<EOF
4140+#include <x86intrin.h>
4141+EOF
4142+ libc_cv_x86_have_amx_tile=no
4143+ if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then
4144+ if grep -q __builtin_ia32_ldtilecfg conftest.i; then
4145+ libc_cv_x86_have_amx_tile=yes
4146+ fi
4147+ fi
4148+ rm -rf conftest*])
4149+LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
4150+
4151 test -n "$critic_missing" && AC_MSG_ERROR([
4152 *** $critic_missing])
4153diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
4154index 0c2e8d5320..9f02cfc3eb 100644
4155--- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h
4156+++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
4157@@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
4158 # endif
4159 #else
4160 /* Allocate stack space of the required size to save the state. */
4161- sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
4162+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
4163 #endif
4164 /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
4165 r10 and r11. */
4166
4167commit 354cabcb2634abe16da7a2ba5e648aac1204b58e
4168Author: H.J. Lu <hjl.tools@gmail.com>
4169Date: Mon Mar 18 06:40:16 2024 -0700
4170
4171 x86-64: Allocate state buffer space for RDI, RSI and RBX
4172
4173 _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
4174 After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11. Define
4175 TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX
4176 to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to
4177 STATE_SAVE_OFFSET(%rsp).
4178
4179 +==================+<- stack frame start aligned at 8 or 16 bytes
4180 | |<- RDI saved in the red zone
4181 | |<- RSI saved in the red zone
4182 | |<- RBX saved in the red zone
4183 | |<- paddings for stack realignment of 64 bytes
4184 |------------------|<- xsave buffer end aligned at 64 bytes
4185 | |<-
4186 | |<-
4187 | |<-
4188 |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
4189 | |<- 8-byte padding for 64-byte alignment
4190 | |<- 8-byte padding for 64-byte alignment
4191 | |<- R11
4192 | |<- R10
4193 | |<- R9
4194 | |<- R8
4195 | |<- RDX
4196 | |<- RCX
4197 +==================+<- RSP aligned at 64 bytes
4198
4199 Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size
4200 for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI
4201 and RBX are saved onto stack without adjusting stack pointer first, using
4202 the red-zone. This fixes BZ #31501.
4203 Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
4204
4205 (cherry picked from commit 717ebfa85c8240d32d0d19d86a484c31c55c9617)
4206
4207diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
4208index d71e8d3d2e..6fe1b728c6 100644
4209--- a/sysdeps/x86/cpu-features.c
4210+++ b/sysdeps/x86/cpu-features.c
4211@@ -310,7 +310,7 @@ update_active (struct cpu_features *cpu_features)
4212 /* NB: On AMX capable processors, ebx always includes AMX
4213 states. */
4214 unsigned int xsave_state_full_size
4215- = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
4216+ = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
4217
4218 cpu_features->xsave_state_size
4219 = xsave_state_full_size;
4220@@ -400,8 +400,10 @@ update_active (struct cpu_features *cpu_features)
4221 unsigned int amx_size
4222 = (xstate_amx_comp_offsets[31]
4223 + xstate_amx_comp_sizes[31]);
4224- amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
4225- 64);
4226+ amx_size
4227+ = ALIGN_UP ((amx_size
4228+ + TLSDESC_CALL_REGISTER_SAVE_AREA),
4229+ 64);
4230 /* Set xsave_state_full_size to the compact AMX
4231 state size for XSAVEC. NB: xsave_state_full_size
4232 is only used in _dl_tlsdesc_dynamic_xsave and
4233@@ -409,7 +411,8 @@ update_active (struct cpu_features *cpu_features)
4234 cpu_features->xsave_state_full_size = amx_size;
4235 #endif
4236 cpu_features->xsave_state_size
4237- = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
4238+ = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
4239+ 64);
4240 CPU_FEATURE_SET (cpu_features, XSAVEC);
4241 }
4242 }
4243diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
4244index db8e576e91..7359149e17 100644
4245--- a/sysdeps/x86/sysdep.h
4246+++ b/sysdeps/x86/sysdep.h
4247@@ -38,14 +38,59 @@
4248 #ifdef __x86_64__
4249 /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
4250 space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
4251- aligned to 16 bytes for fxsave and 64 bytes for xsave.
4252-
4253- NB: Is is non-zero because of the 128-byte red-zone. Some registers
4254- are saved on stack without adjusting stack pointer first. When we
4255- update stack pointer to allocate more space, we need to take the
4256- red-zone into account. */
4257+ aligned to 16 bytes for fxsave and 64 bytes for xsave. It is non-zero
4258+ because MOV, instead of PUSH, is used to save registers onto stack.
4259+
4260+ +==================+<- stack frame start aligned at 8 or 16 bytes
4261+ | |<- paddings for stack realignment of 64 bytes
4262+ |------------------|<- xsave buffer end aligned at 64 bytes
4263+ | |<-
4264+ | |<-
4265+ | |<-
4266+ |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
4267+ | |<- 8-byte padding for 64-byte alignment
4268+ | |<- R9
4269+ | |<- R8
4270+ | |<- RDI
4271+ | |<- RSI
4272+ | |<- RDX
4273+ | |<- RCX
4274+ | |<- RAX
4275+ +==================+<- RSP aligned at 64 bytes
4276+
4277+ */
4278 # define STATE_SAVE_OFFSET (8 * 7 + 8)
4279
4280+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
4281+ stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and
4282+ R11. Allocate space for RDI, RSI and RBX to avoid clobbering saved
4283+ RDI, RSI and RBX values on stack by xsave.
4284+
4285+ +==================+<- stack frame start aligned at 8 or 16 bytes
4286+ | |<- RDI saved in the red zone
4287+ | |<- RSI saved in the red zone
4288+ | |<- RBX saved in the red zone
4289+ | |<- paddings for stack realignment of 64 bytes
4290+ |------------------|<- xsave buffer end aligned at 64 bytes
4291+ | |<-
4292+ | |<-
4293+ | |<-
4294+ |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
4295+ | |<- 8-byte padding for 64-byte alignment
4296+ | |<- 8-byte padding for 64-byte alignment
4297+ | |<- R11
4298+ | |<- R10
4299+ | |<- R9
4300+ | |<- R8
4301+ | |<- RDX
4302+ | |<- RCX
4303+ +==================+<- RSP aligned at 64 bytes
4304+
4305+ Define the total register save area size for all integer registers by
4306+ adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
4307+ stack without adjusting stack pointer first, using the red-zone. */
4308+# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
4309+
4310 /* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
4311 registers are mutually exclusive. */
4312 # define STATE_SAVE_MASK \
4313@@ -66,8 +111,9 @@
4314 (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
4315 #else
4316 /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
4317- doesn't have red-zone, use 0 here. */
4318+ uses PUSH to save registers onto stack, use 0 here. */
4319 # define STATE_SAVE_OFFSET 0
4320+# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
4321
4322 /* Save SSE, AVX, AXV512, mask and bound registers. */
4323 # define STATE_SAVE_MASK \
4324diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
4325new file mode 100644
4326index 0000000000..1d636669ba
4327--- /dev/null
4328+++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
4329@@ -0,0 +1,87 @@
4330+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
4331+ Copyright (C) 2024 Free Software Foundation, Inc.
4332+ This file is part of the GNU C Library.
4333+
4334+ The GNU C Library is free software; you can redistribute it and/or
4335+ modify it under the terms of the GNU Lesser General Public
4336+ License as published by the Free Software Foundation; either
4337+ version 2.1 of the License, or (at your option) any later version.
4338+
4339+ The GNU C Library is distributed in the hope that it will be useful,
4340+ but WITHOUT ANY WARRANTY; without even the implied warranty of
4341+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4342+ Lesser General Public License for more details.
4343+
4344+ You should have received a copy of the GNU Lesser General Public
4345+ License along with the GNU C Library; if not, see
4346+ <http://www.gnu.org/licenses/>. */
4347+
4348+#include <sysdep.h>
4349+
4350+/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec
4351+ to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size
4352+ is 960 bytes and this test didn't fail. It may be due to the unused
4353+ last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and
4354+ this test might fail without the fix. */
4355+#ifndef OFFSET
4356+# define OFFSET 40
4357+#endif
4358+
4359+ .text
4360+ .p2align 4
4361+ .globl apply_tls
4362+ .type apply_tls, @function
4363+apply_tls:
4364+ cfi_startproc
4365+ _CET_ENDBR
4366+ pushq %rbp
4367+ cfi_def_cfa_offset (16)
4368+ cfi_offset (6, -16)
4369+ movdqu (%RDI_LP), %xmm0
4370+ lea tls_var1@TLSDESC(%rip), %RAX_LP
4371+ mov %RSP_LP, %RBP_LP
4372+ cfi_def_cfa_register (6)
4373+ /* Align stack to 64 bytes. */
4374+ and $-64, %RSP_LP
4375+ sub $OFFSET, %RSP_LP
4376+ pushq %rbx
4377+ /* Set %ebx to 0xbadbeef. */
4378+ movl $0xbadbeef, %ebx
4379+ movl $0xbadbeef, %esi
4380+ movq %rdi, saved_rdi(%rip)
4381+ movq %rsi, saved_rsi(%rip)
4382+ call *tls_var1@TLSCALL(%RAX_LP)
4383+ /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */
4384+ cmpq saved_rdi(%rip), %rdi
4385+ jne L(hlt)
4386+ cmpq saved_rsi(%rip), %rsi
4387+ jne L(hlt)
4388+ cmpl $0xbadbeef, %ebx
4389+ jne L(hlt)
4390+ add %fs:0, %RAX_LP
4391+ movups %xmm0, 32(%RAX_LP)
4392+ movdqu 16(%RDI_LP), %xmm1
4393+ mov %RAX_LP, %RBX_LP
4394+ movups %xmm1, 48(%RAX_LP)
4395+ lea 32(%RBX_LP), %RAX_LP
4396+ pop %rbx
4397+ leave
4398+ cfi_def_cfa (7, 8)
4399+ ret
4400+L(hlt):
4401+ hlt
4402+ cfi_endproc
4403+ .size apply_tls, .-apply_tls
4404+ .hidden tls_var1
4405+ .globl tls_var1
4406+ .section .tbss,"awT",@nobits
4407+ .align 16
4408+ .type tls_var1, @object
4409+ .size tls_var1, 3200
4410+tls_var1:
4411+ .zero 3200
4412+ .local saved_rdi
4413+ .comm saved_rdi,8,8
4414+ .local saved_rsi
4415+ .comm saved_rsi,8,8
4416+ .section .note.GNU-stack,"",@progbits
4417
4418commit 15aebdbada54098787715448c94701f17033fc92
4419Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
4420Date: Tue Mar 12 13:21:18 2024 -0300
4421
4422 Ignore undefined symbols for -mtls-dialect=gnu2
4423
4424 So it does not fail for arm config that defaults to -mtp=soft (which
4425 issues a call to __aeabi_read_tp).
4426 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
4427
4428 (cherry picked from commit 968b0ca9440040a2b31248a572891f0e55c1ab10)
4429
4430diff --git a/configure b/configure
4431index 59ff1e415d..117b48a421 100755
4432--- a/configure
4433+++ b/configure
4434@@ -7020,7 +7020,7 @@ void foo (void)
4435 }
4436 EOF
4437 if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
4438- conftest.c -o conftest 1>&5'
4439+ -shared conftest.c -o conftest 1>&5'
4440 { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
4441 (eval $ac_try) 2>&5
4442 ac_status=$?
4443diff --git a/configure.ac b/configure.ac
4444index 65799e5685..19b88a47a5 100644
4445--- a/configure.ac
4446+++ b/configure.ac
4447@@ -1297,7 +1297,7 @@ void foo (void)
4448 }
4449 EOF
4450 if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
4451- conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
4452+ -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
4453 then
4454 libc_cv_mtls_dialect_gnu2=yes
4455 else
4456
4457commit a8ba52bde58c69f2b31da62ad2311f119adf6cb9
4458Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
4459Date: Tue Mar 12 13:21:19 2024 -0300
4460
4461 arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372)
4462
4463 ARM _dl_tlsdesc_dynamic slow path has two issues:
4464
4465 * The ip/r12 is defined by AAPCS as a scratch register, and gcc is
4466 used to save the stack pointer before on some function calls. So it
4467 should also be saved/restored as well. It fixes the tst-gnu2-tls2.
4468
4469 * None of the possible VFP registers are saved/restored. ARM has the
4470 additional complexity to have different VFP bank sizes (depending of
4471 VFP support by the chip).
4472
4473 The tst-gnu2-tls2 test is extended to check for VFP registers, although
4474 only for hardfp builds. Different than setcontext, _dl_tlsdesc_dynamic
4475 does not have HWCAP_ARM_IWMMXT (I don't have a way to properly test
4476 it and it is almost a decade since newer hardware was released).
4477
4478 With this patch there is no need to mark tst-gnu2-tls2 as XFAIL.
4479
4480 Checked on arm-linux-gnueabihf.
4481 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
4482
4483 (cherry picked from commit 64c7e344289ed085517c2227d8e3b06388242c13)
4484
4485diff --git a/config.h.in b/config.h.in
4486index 44a34072a4..4d33c63a84 100644
4487--- a/config.h.in
4488+++ b/config.h.in
4489@@ -141,6 +141,9 @@
4490 /* LOONGARCH floating-point ABI for ld.so. */
4491 #undef LOONGARCH_ABI_FRLEN
4492
4493+/* Define whether ARM used hard-float and support VFPvX-D32. */
4494+#undef HAVE_ARM_PCS_VFP_D32
4495+
4496 /* Linux specific: minimum supported kernel version. */
4497 #undef __LINUX_KERNEL_VERSION
4498
4499diff --git a/elf/Makefile b/elf/Makefile
4500index c5c37a9147..030db4d207 100644
4501--- a/elf/Makefile
4502+++ b/elf/Makefile
4503@@ -3056,10 +3056,6 @@ $(objpfx)tst-gnu2-tls2.out: \
4504 $(objpfx)tst-gnu2-tls2mod2.so
4505
4506 ifeq (yes,$(have-mtls-dialect-gnu2))
4507-# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved
4508-# registers. See https://sourceware.org/bugzilla/show_bug.cgi?id=31372
4509-test-xfail-tst-gnu2-tls2 = yes
4510-
4511 CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
4512 CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
4513 CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
4514diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h
4515index 77964a57a3..1ade8151e2 100644
4516--- a/elf/tst-gnu2-tls2.h
4517+++ b/elf/tst-gnu2-tls2.h
4518@@ -27,6 +27,10 @@ extern struct tls *apply_tls (struct tls *);
4519
4520 /* An architecture can define them to verify that clobber caller-saved
4521 registers aren't changed by the implicit TLSDESC call. */
4522+#ifndef INIT_TLSDESC_CALL
4523+# define INIT_TLSDESC_CALL()
4524+#endif
4525+
4526 #ifndef BEFORE_TLSDESC_CALL
4527 # define BEFORE_TLSDESC_CALL()
4528 #endif
4529diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c
4530index 45556a0e17..3fe3c14277 100644
4531--- a/elf/tst-gnu2-tls2mod0.c
4532+++ b/elf/tst-gnu2-tls2mod0.c
4533@@ -16,13 +16,14 @@
4534 License along with the GNU C Library; if not, see
4535 <https://www.gnu.org/licenses/>. */
4536
4537-#include "tst-gnu2-tls2.h"
4538+#include <tst-gnu2-tls2.h>
4539
4540 __thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
4541
4542 struct tls *
4543 apply_tls (struct tls *p)
4544 {
4545+ INIT_TLSDESC_CALL ();
4546 BEFORE_TLSDESC_CALL ();
4547 tls_var0 = *p;
4548 struct tls *ret = &tls_var0;
4549diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c
4550index e10b9dbc0a..e210538468 100644
4551--- a/elf/tst-gnu2-tls2mod1.c
4552+++ b/elf/tst-gnu2-tls2mod1.c
4553@@ -16,13 +16,14 @@
4554 License along with the GNU C Library; if not, see
4555 <https://www.gnu.org/licenses/>. */
4556
4557-#include "tst-gnu2-tls2.h"
4558+#include <tst-gnu2-tls2.h>
4559
4560 __thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden")));
4561
4562 struct tls *
4563 apply_tls (struct tls *p)
4564 {
4565+ INIT_TLSDESC_CALL ();
4566 BEFORE_TLSDESC_CALL ();
4567 tls_var1[1] = *p;
4568 struct tls *ret = &tls_var1[1];
4569diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c
4570index 141af51e55..6d3031dc5f 100644
4571--- a/elf/tst-gnu2-tls2mod2.c
4572+++ b/elf/tst-gnu2-tls2mod2.c
4573@@ -16,13 +16,14 @@
4574 License along with the GNU C Library; if not, see
4575 <https://www.gnu.org/licenses/>. */
4576
4577-#include "tst-gnu2-tls2.h"
4578+#include <tst-gnu2-tls2.h>
4579
4580 __thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
4581
4582 struct tls *
4583 apply_tls (struct tls *p)
4584 {
4585+ INIT_TLSDESC_CALL ();
4586 BEFORE_TLSDESC_CALL ();
4587 tls_var2 = *p;
4588 struct tls *ret = &tls_var2;
4589diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure
4590index 35e2918922..4ef4d46cbd 100644
4591--- a/sysdeps/arm/configure
4592+++ b/sysdeps/arm/configure
4593@@ -187,6 +187,38 @@ else
4594 default-abi = soft"
4595 fi
4596
4597+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether VFP supports 32 registers" >&5
4598+printf %s "checking whether VFP supports 32 registers... " >&6; }
4599+if test ${libc_cv_arm_pcs_vfp_d32+y}
4600+then :
4601+ printf %s "(cached) " >&6
4602+else $as_nop
4603+
4604+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
4605+/* end confdefs.h. */
4606+
4607+void foo (void)
4608+{
4609+ asm volatile ("vldr d16,=17" : : : "d16");
4610+}
4611+
4612+_ACEOF
4613+if ac_fn_c_try_compile "$LINENO"
4614+then :
4615+ libc_cv_arm_pcs_vfp_d32=yes
4616+else $as_nop
4617+ libc_cv_arm_pcs_vfp_d32=no
4618+fi
4619+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
4620+fi
4621+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcs_vfp_d32" >&5
4622+printf "%s\n" "$libc_cv_arm_pcs_vfp_d32" >&6; }
4623+if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
4624+then
4625+ printf "%s\n" "#define HAVE_ARM_PCS_VFP_D32 1" >>confdefs.h
4626+
4627+fi
4628+
4629 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
4630 printf %s "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
4631 if test ${libc_cv_arm_pcrel_movw+y}
4632diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac
4633index 5172e30bbe..cd00ddc9d9 100644
4634--- a/sysdeps/arm/configure.ac
4635+++ b/sysdeps/arm/configure.ac
4636@@ -21,6 +21,21 @@ else
4637 LIBC_CONFIG_VAR([default-abi], [soft])
4638 fi
4639
4640+AC_CACHE_CHECK([whether VFP supports 32 registers],
4641+ libc_cv_arm_pcs_vfp_d32, [
4642+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
4643+void foo (void)
4644+{
4645+ asm volatile ("vldr d16,=17" : : : "d16");
4646+}
4647+]])],
4648+ [libc_cv_arm_pcs_vfp_d32=yes],
4649+ [libc_cv_arm_pcs_vfp_d32=no])])
4650+if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
4651+then
4652+ AC_DEFINE(HAVE_ARM_PCS_VFP_D32)
4653+fi
4654+
4655 AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
4656 libc_cv_arm_pcrel_movw, [
4657 cat > conftest.s <<\EOF
4658diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S
4659index 764c56e70f..ada106521d 100644
4660--- a/sysdeps/arm/dl-tlsdesc.S
4661+++ b/sysdeps/arm/dl-tlsdesc.S
4662@@ -19,6 +19,7 @@
4663 #include <sysdep.h>
4664 #include <arm-features.h>
4665 #include <tls.h>
4666+#include <rtld-global-offsets.h>
4667 #include "tlsdesc.h"
4668
4669 .text
4670@@ -83,14 +84,20 @@ _dl_tlsdesc_dynamic(struct tlsdesc *tdp)
4671 .align 2
4672 _dl_tlsdesc_dynamic:
4673 /* Our calling convention is to clobber r0, r1 and the processor
4674- flags. All others that are modified must be saved */
4675- eabi_save ({r2,r3,r4,lr})
4676- push {r2,r3,r4,lr}
4677- cfi_adjust_cfa_offset (16)
4678+ flags. All others that are modified must be saved. r5 is
4679+ used as the hwcap value to avoid reload after __tls_get_addr
4680+ call. If required we will save the vector register on the slow
4681+ path. */
4682+ eabi_save ({r2,r3,r4,r5,ip,lr})
4683+ push {r2,r3,r4,r5,ip,lr}
4684+ cfi_adjust_cfa_offset (24)
4685 cfi_rel_offset (r2,0)
4686 cfi_rel_offset (r3,4)
4687 cfi_rel_offset (r4,8)
4688- cfi_rel_offset (lr,12)
4689+ cfi_rel_offset (r5,12)
4690+ cfi_rel_offset (ip,16)
4691+ cfi_rel_offset (lr,20)
4692+
4693 ldr r1, [r0] /* td */
4694 GET_TLS (lr)
4695 mov r4, r0 /* r4 = tp */
4696@@ -113,22 +120,69 @@ _dl_tlsdesc_dynamic:
4697 rsbne r0, r4, r3
4698 bne 2f
4699 1: mov r0, r1
4700+
4701+ /* Load the hwcap to check for vector support. */
4702+ ldr r2, 3f
4703+ ldr r1, .Lrtld_global_ro
4704+0: add r2, pc, r2
4705+ ldr r2, [r2, r1]
4706+ ldr r5, [r2, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
4707+
4708+#ifdef __SOFTFP__
4709+ tst r5, #HWCAP_ARM_VFP
4710+ beq .Lno_vfp
4711+#endif
4712+
4713+ /* Store the VFP registers. Don't use VFP instructions directly
4714+ because this code is used in non-VFP multilibs. */
4715+#define VFP_STACK_REQ (32*8 + 8)
4716+ sub sp, sp, VFP_STACK_REQ
4717+ cfi_adjust_cfa_offset (VFP_STACK_REQ)
4718+ mov r3, sp
4719+ .inst 0xeca30b20 /* vstmia r3!, {d0-d15} */
4720+ tst r5, #HWCAP_ARM_VFPD32
4721+ beq 4f
4722+ .inst 0xece30b20 /* vstmia r3!, {d16-d31} */
4723+ /* Store the floating-point status register. */
4724+4: .inst 0xeef12a10 /* vmrs r2, fpscr */
4725+ str r2, [r3]
4726+.Lno_vfp:
4727 bl __tls_get_addr
4728 rsb r0, r4, r0
4729+#ifdef __SOFTFP__
4730+ tst r5, #HWCAP_ARM_VFP
4731+ beq 2f
4732+#endif
4733+ mov r3, sp
4734+ .inst 0xecb30b20 /* vldmia r3!, {d0-d15} */
4735+ tst r5, #HWCAP_ARM_VFPD32
4736+ beq 5f
4737+ .inst 0xecf30b20 /* vldmia r3!, {d16-d31} */
4738+ ldr r4, [r3]
4739+5: .inst 0xeee14a10 /* vmsr fpscr, r4 */
4740+ add sp, sp, VFP_STACK_REQ
4741+ cfi_adjust_cfa_offset (-VFP_STACK_REQ)
4742+
4743 2:
4744 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
4745 || defined (ARM_ALWAYS_BX))
4746- pop {r2,r3,r4, lr}
4747- cfi_adjust_cfa_offset (-16)
4748+ pop {r2,r3,r4,r5,ip, lr}
4749+ cfi_adjust_cfa_offset (-20)
4750 cfi_restore (lr)
4751+ cfi_restore (ip)
4752+ cfi_restore (r5)
4753 cfi_restore (r4)
4754 cfi_restore (r3)
4755 cfi_restore (r2)
4756 bx lr
4757 #else
4758- pop {r2,r3,r4, pc}
4759+ pop {r2,r3,r4,r5,ip, pc}
4760 #endif
4761 eabi_fnend
4762 cfi_endproc
4763 .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
4764+
4765+3: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
4766+.Lrtld_global_ro:
4767+ .long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
4768 #endif /* SHARED */
4769diff --git a/sysdeps/arm/tst-gnu2-tls2.h b/sysdeps/arm/tst-gnu2-tls2.h
4770new file mode 100644
4771index 0000000000..e413ac21fb
4772--- /dev/null
4773+++ b/sysdeps/arm/tst-gnu2-tls2.h
4774@@ -0,0 +1,128 @@
4775+/* Test TLSDESC relocation. ARM version.
4776+ Copyright (C) 2024 Free Software Foundation, Inc.
4777+ This file is part of the GNU C Library.
4778+
4779+ The GNU C Library is free software; you can redistribute it and/or
4780+ modify it under the terms of the GNU Lesser General Public
4781+ License as published by the Free Software Foundation; either
4782+ version 2.1 of the License, or (at your option) any later version.
4783+
4784+ The GNU C Library is distributed in the hope that it will be useful,
4785+ but WITHOUT ANY WARRANTY; without even the implied warranty of
4786+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4787+ Lesser General Public License for more details.
4788+
4789+ You should have received a copy of the GNU Lesser General Public
4790+ License along with the GNU C Library; if not, see
4791+ <https://www.gnu.org/licenses/>. */
4792+
4793+#include <config.h>
4794+#include <sys/auxv.h>
4795+#include <string.h>
4796+#include <stdlib.h>
4797+#include <endian.h>
4798+
4799+#ifndef __SOFTFP__
4800+
4801+# ifdef HAVE_ARM_PCS_VFP_D32
4802+# define SAVE_VFP_D32 \
4803+ asm volatile ("vldr d16,=17" : : : "d16"); \
4804+ asm volatile ("vldr d17,=18" : : : "d17"); \
4805+ asm volatile ("vldr d18,=19" : : : "d18"); \
4806+ asm volatile ("vldr d19,=20" : : : "d19"); \
4807+ asm volatile ("vldr d20,=21" : : : "d20"); \
4808+ asm volatile ("vldr d21,=22" : : : "d21"); \
4809+ asm volatile ("vldr d22,=23" : : : "d22"); \
4810+ asm volatile ("vldr d23,=24" : : : "d23"); \
4811+ asm volatile ("vldr d24,=25" : : : "d24"); \
4812+ asm volatile ("vldr d25,=26" : : : "d25"); \
4813+ asm volatile ("vldr d26,=27" : : : "d26"); \
4814+ asm volatile ("vldr d27,=28" : : : "d27"); \
4815+ asm volatile ("vldr d28,=29" : : : "d28"); \
4816+ asm volatile ("vldr d29,=30" : : : "d29"); \
4817+ asm volatile ("vldr d30,=31" : : : "d30"); \
4818+ asm volatile ("vldr d31,=32" : : : "d31");
4819+# else
4820+# define SAVE_VFP_D32
4821+# endif
4822+
4823+# define INIT_TLSDESC_CALL() \
4824+ unsigned long hwcap = getauxval (AT_HWCAP)
4825+
4826+/* Set each vector register to a value from 1 to 32 before the TLS access,
4827+ dump to memory after TLS access, and compare with the expected values. */
4828+
4829+# define BEFORE_TLSDESC_CALL() \
4830+ if (hwcap & HWCAP_ARM_VFP) \
4831+ { \
4832+ asm volatile ("vldr d0,=1" : : : "d0"); \
4833+ asm volatile ("vldr d1,=2" : : : "d1"); \
4834+ asm volatile ("vldr d2,=3" : : : "d1"); \
4835+ asm volatile ("vldr d3,=4" : : : "d3"); \
4836+ asm volatile ("vldr d4,=5" : : : "d4"); \
4837+ asm volatile ("vldr d5,=6" : : : "d5"); \
4838+ asm volatile ("vldr d6,=7" : : : "d6"); \
4839+ asm volatile ("vldr d7,=8" : : : "d7"); \
4840+ asm volatile ("vldr d8,=9" : : : "d8"); \
4841+ asm volatile ("vldr d9,=10" : : : "d9"); \
4842+ asm volatile ("vldr d10,=11" : : : "d10"); \
4843+ asm volatile ("vldr d11,=12" : : : "d11"); \
4844+ asm volatile ("vldr d12,=13" : : : "d12"); \
4845+ asm volatile ("vldr d13,=14" : : : "d13"); \
4846+ asm volatile ("vldr d14,=15" : : : "d14"); \
4847+ asm volatile ("vldr d15,=16" : : : "d15"); \
4848+ } \
4849+ if (hwcap & HWCAP_ARM_VFPD32) \
4850+ { \
4851+ SAVE_VFP_D32 \
4852+ }
4853+
4854+# define VFP_STACK_REQ (16*8)
4855+# if __BYTE_ORDER == __BIG_ENDIAN
4856+# define DISP 7
4857+# else
4858+# define DISP 0
4859+# endif
4860+
4861+# ifdef HAVE_ARM_PCS_VFP_D32
4862+# define CHECK_VFP_D32 \
4863+ char vfp[VFP_STACK_REQ]; \
4864+ asm volatile ("vstmia %0, {d16-d31}\n" \
4865+ : \
4866+ : "r" (vfp) \
4867+ : "memory"); \
4868+ \
4869+ char expected[VFP_STACK_REQ] = { 0 }; \
4870+ for (int i = 0; i < 16; ++i) \
4871+ expected[i * 8 + DISP] = i + 17; \
4872+ \
4873+ if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \
4874+ abort ();
4875+# else
4876+# define CHECK_VFP_D32
4877+# endif
4878+
4879+# define AFTER_TLSDESC_CALL() \
4880+ if (hwcap & HWCAP_ARM_VFP) \
4881+ { \
4882+ char vfp[VFP_STACK_REQ]; \
4883+ asm volatile ("vstmia %0, {d0-d15}\n" \
4884+ : \
4885+ : "r" (vfp) \
4886+ : "memory"); \
4887+ \
4888+ char expected[VFP_STACK_REQ] = { 0 }; \
4889+ for (int i = 0; i < 16; ++i) \
4890+ expected[i * 8 + DISP] = i + 1; \
4891+ \
4892+ if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \
4893+ abort (); \
4894+ } \
4895+ if (hwcap & HWCAP_ARM_VFPD32) \
4896+ { \
4897+ CHECK_VFP_D32 \
4898+ }
4899+
4900+#endif /* __SOFTFP__ */
4901+
4902+#include_next <tst-gnu2-tls2.h>
4903
4904commit aded2fc004e7ee85cf0b45b1382552d41e555a23
4905Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
4906Date: Tue Mar 12 13:21:20 2024 -0300
4907
4908 elf: Enable TLS descriptor tests on aarch64
4909
4910 The aarch64 uses 'trad' for traditional tls and 'desc' for tls
4911 descriptors, but unlike other targets it defaults to 'desc'. The
4912 gnutls2 configure check does not set aarch64 as an ABI that uses
4913 TLS descriptors, which then disable somes stests.
4914
4915 Also rename the internal machinery fron gnu2 to tls descriptors.
4916
4917 Checked on aarch64-linux-gnu.
4918 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
4919
4920 (cherry picked from commit 3d53d18fc71c5d9ef4773b8bce04d54b80181926)
4921
4922diff --git a/configure b/configure
4923index 117b48a421..432e40a592 100755
4924--- a/configure
4925+++ b/configure
4926@@ -653,7 +653,7 @@ LIBGD
4927 libc_cv_cc_loop_to_function
4928 libc_cv_cc_submachine
4929 libc_cv_cc_nofma
4930-libc_cv_mtls_dialect_gnu2
4931+libc_cv_mtls_descriptor
4932 libc_cv_has_glob_dat
4933 libc_cv_fpie
4934 libc_cv_z_execstack
4935@@ -4760,6 +4760,9 @@ libc_config_ok=no
4936 # whether to use such directories.
4937 with_fp_cond=1
4938
4939+# A preconfigure script may define another name to TLS descriptor variant
4940+mtls_descriptor=gnu2
4941+
4942 if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null`
4943 then
4944 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5
4945@@ -7006,9 +7009,9 @@ fi
4946 printf "%s\n" "$libc_cv_has_glob_dat" >&6; }
4947
4948
4949-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -mtls-dialect=gnu2" >&5
4950-printf %s "checking for -mtls-dialect=gnu2... " >&6; }
4951-if test ${libc_cv_mtls_dialect_gnu2+y}
4952+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for tls descriptor support" >&5
4953+printf %s "checking for tls descriptor support... " >&6; }
4954+if test ${libc_cv_mtls_descriptor+y}
4955 then :
4956 printf %s "(cached) " >&6
4957 else $as_nop
4958@@ -7019,7 +7022,7 @@ void foo (void)
4959 i = 10;
4960 }
4961 EOF
4962-if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
4963+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
4964 -shared conftest.c -o conftest 1>&5'
4965 { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
4966 (eval $ac_try) 2>&5
4967@@ -7027,17 +7030,17 @@ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nost
4968 printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
4969 test $ac_status = 0; }; }
4970 then
4971- libc_cv_mtls_dialect_gnu2=yes
4972+ libc_cv_mtls_descriptor=$mtls_descriptor
4973 else
4974- libc_cv_mtls_dialect_gnu2=no
4975+ libc_cv_mtls_descriptor=no
4976 fi
4977 rm -f conftest*
4978 fi
4979-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_dialect_gnu2" >&5
4980-printf "%s\n" "$libc_cv_mtls_dialect_gnu2" >&6; }
4981+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_descriptor" >&5
4982+printf "%s\n" "$libc_cv_mtls_descriptor" >&6; }
4983
4984 config_vars="$config_vars
4985-have-mtls-dialect-gnu2 = $libc_cv_mtls_dialect_gnu2"
4986+have-mtls-descriptor = $libc_cv_mtls_descriptor"
4987
4988 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5
4989 printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; }
4990diff --git a/configure.ac b/configure.ac
4991index 19b88a47a5..bdc385d03c 100644
4992--- a/configure.ac
4993+++ b/configure.ac
4994@@ -442,6 +442,9 @@ libc_config_ok=no
4995 # whether to use such directories.
4996 with_fp_cond=1
4997
4998+# A preconfigure script may define another name to TLS descriptor variant
4999+mtls_descriptor=gnu2
5000+
5001 dnl Let sysdeps/*/preconfigure act here.
5002 LIBC_PRECONFIGURE([$srcdir], [for sysdeps])
5003
5004@@ -1287,7 +1290,7 @@ fi
5005 rm -f conftest*])
5006 AC_SUBST(libc_cv_has_glob_dat)
5007
5008-AC_CACHE_CHECK([for -mtls-dialect=gnu2], libc_cv_mtls_dialect_gnu2,
5009+AC_CACHE_CHECK([for tls descriptor support], libc_cv_mtls_descriptor,
5010 [dnl
5011 cat > conftest.c <<EOF
5012 __thread int i;
5013@@ -1296,16 +1299,16 @@ void foo (void)
5014 i = 10;
5015 }
5016 EOF
5017-if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
5018+if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
5019 -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
5020 then
5021- libc_cv_mtls_dialect_gnu2=yes
5022+ libc_cv_mtls_descriptor=$mtls_descriptor
5023 else
5024- libc_cv_mtls_dialect_gnu2=no
5025+ libc_cv_mtls_descriptor=no
5026 fi
5027 rm -f conftest*])
5028-AC_SUBST(libc_cv_mtls_dialect_gnu2)
5029-LIBC_CONFIG_VAR([have-mtls-dialect-gnu2], [$libc_cv_mtls_dialect_gnu2])
5030+AC_SUBST(libc_cv_mtls_descriptor)
5031+LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor])
5032
5033 dnl clang emits an warning for a double alias redirection, to warn the
5034 dnl original symbol is sed even when weak definition overrides it.
5035diff --git a/elf/Makefile b/elf/Makefile
5036index 030db4d207..69aa423c4b 100644
5037--- a/elf/Makefile
5038+++ b/elf/Makefile
5039@@ -999,13 +999,13 @@ modules-names-tests = $(filter-out ifuncmod% tst-tlsmod%,\
5040 # For +depfiles in Makerules.
5041 extra-test-objs += tst-auditmod17.os
5042
5043-ifeq (yes,$(have-mtls-dialect-gnu2))
5044+ifneq (no,$(have-mtls-descriptor))
5045 tests += tst-gnu2-tls1
5046 modules-names += tst-gnu2-tls1mod
5047 $(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so
5048 tst-gnu2-tls1mod.so-no-z-defs = yes
5049-CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2
5050-endif # $(have-mtls-dialect-gnu2)
5051+CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=$(have-mtls-descriptor)
5052+endif # $(have-mtls-descriptor)
5053
5054 ifeq (yes,$(have-protected-data))
5055 modules-names += tst-protected1moda tst-protected1modb
5056@@ -2972,11 +2972,11 @@ $(objpfx)tst-tls-allocation-failure-static-patched.out: \
5057 $(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \
5058 $(objpfx)tst-audit-tlsdesc-mod2.so \
5059 $(shared-thread-library)
5060-ifeq (yes,$(have-mtls-dialect-gnu2))
5061+ifneq (no,$(have-mtls-descriptor))
5062 # The test is valid for all TLS types, but we want to exercise GNU2
5063 # TLS if possible.
5064-CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2
5065-CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2
5066+CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
5067+CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
5068 endif
5069 $(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library)
5070 $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \
5071@@ -3055,11 +3055,11 @@ $(objpfx)tst-gnu2-tls2.out: \
5072 $(objpfx)tst-gnu2-tls2mod1.so \
5073 $(objpfx)tst-gnu2-tls2mod2.so
5074
5075-ifeq (yes,$(have-mtls-dialect-gnu2))
5076-CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
5077-CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
5078-CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
5079-CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2
5080-CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2
5081-CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2
5082+ifneq (no,$(have-mtls-descriptor))
5083+CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=$(have-mtls-descriptor)
5084+CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
5085+CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
5086+CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor)
5087+CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor)
5088+CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor)
5089 endif
5090diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
5091index d9bd1f8558..19657b627b 100644
5092--- a/sysdeps/aarch64/preconfigure
5093+++ b/sysdeps/aarch64/preconfigure
5094@@ -2,5 +2,6 @@ case "$machine" in
5095 aarch64*)
5096 base_machine=aarch64
5097 machine=aarch64
5098+ mtls_descriptor=desc
5099 ;;
5100 esac
5101diff --git a/sysdeps/arm/Makefile b/sysdeps/arm/Makefile
5102index d5cea717a9..619474eca9 100644
5103--- a/sysdeps/arm/Makefile
5104+++ b/sysdeps/arm/Makefile
5105@@ -13,15 +13,15 @@ $(objpfx)libgcc-stubs.a: $(objpfx)aeabi_unwind_cpp_pr1.os
5106 lib-noranlib: $(objpfx)libgcc-stubs.a
5107
5108 ifeq ($(build-shared),yes)
5109-ifeq (yes,$(have-mtls-dialect-gnu2))
5110+ifneq (no,$(have-mtls-descriptor))
5111 tests += tst-armtlsdescloc tst-armtlsdescextnow tst-armtlsdescextlazy
5112 modules-names += tst-armtlsdesclocmod
5113 modules-names += tst-armtlsdescextlazymod tst-armtlsdescextnowmod
5114 CPPFLAGS-tst-armtlsdescextnowmod.c += -Dstatic=
5115 CPPFLAGS-tst-armtlsdescextlazymod.c += -Dstatic=
5116-CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=gnu2
5117-CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=gnu2
5118-CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=gnu2
5119+CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=$(have-mtls-descriptor)
5120+CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=$(have-mtls-descriptor)
5121+CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=$(have-mtls-descriptor)
5122 LDFLAGS-tst-armtlsdescextnowmod.so += -Wl,-z,now
5123 tst-armtlsdescloc-ENV = LD_BIND_NOW=1
5124 tst-armtlsdescextnow-ENV = LD_BIND_NOW=1
5125
5126commit 5a461f2949ded98d8211939f84988bc464c7b4fe
5127Author: Andreas Schwab <schwab@suse.de>
5128Date: Tue Mar 19 13:49:50 2024 +0100
5129
5130 Add tst-gnu2-tls2mod1 to test-internal-extras
5131
5132 That allows sysdeps/x86_64/tst-gnu2-tls2mod1.S to use internal headers.
5133
5134 Fixes: 717ebfa85c ("x86-64: Allocate state buffer space for RDI, RSI and RBX")
5135 (cherry picked from commit fd7ee2e6c5eb49e4a630a9978b4d668bff6354ee)
5136
5137diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
5138index e8babc9a4e..9d374a3299 100644
5139--- a/sysdeps/x86_64/Makefile
5140+++ b/sysdeps/x86_64/Makefile
5141@@ -210,6 +210,8 @@ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2
5142 $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
5143 endif
5144
5145+test-internal-extras += tst-gnu2-tls2mod1
5146+
5147 endif # $(subdir) == elf
5148
5149 ifeq ($(subdir),csu)
5150
5151commit aa4249266e9906c4bc833e4847f4d8feef59504f
5152Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
5153Date: Thu Feb 8 10:08:38 2024 -0300
5154
5155 x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
5156
5157 The REP MOVSB usage on memcpy/memmove does not show much performance
5158 improvement on Zen3/Zen4 cores compared to the vectorized loops. Also,
5159 as from BZ 30994, if the source is aligned and the destination is not
5160 the performance can be 20x slower.
5161
5162 The performance difference is noticeable with small buffer sizes, closer
5163 to the lower bounds limits when memcpy/memmove starts to use ERMS. The
5164 performance of REP MOVSB is similar to vectorized instruction on the
5165 size limit (the L2 cache). Also, there is no drawback to multiple cores
5166 sharing the cache.
5167
5168 Checked on x86_64-linux-gnu on Zen3.
5169 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
5170
5171 (cherry picked from commit 0c0d39fe4aeb0f69b26e76337c5dfd5530d5d44e)
5172
5173diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
5174index d5101615e3..f34d12846c 100644
5175--- a/sysdeps/x86/dl-cacheinfo.h
5176+++ b/sysdeps/x86/dl-cacheinfo.h
5177@@ -791,7 +791,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5178 long int data = -1;
5179 long int shared = -1;
5180 long int shared_per_thread = -1;
5181- long int core = -1;
5182 unsigned int threads = 0;
5183 unsigned long int level1_icache_size = -1;
5184 unsigned long int level1_icache_linesize = -1;
5185@@ -809,7 +808,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5186 if (cpu_features->basic.kind == arch_kind_intel)
5187 {
5188 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
5189- core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
5190 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
5191 shared_per_thread = shared;
5192
5193@@ -822,7 +820,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5194 = handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
5195 level1_dcache_linesize
5196 = handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
5197- level2_cache_size = core;
5198+ level2_cache_size
5199+ = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
5200 level2_cache_assoc
5201 = handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
5202 level2_cache_linesize
5203@@ -835,12 +834,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5204 level4_cache_size
5205 = handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features);
5206
5207- get_common_cache_info (&shared, &shared_per_thread, &threads, core);
5208+ get_common_cache_info (&shared, &shared_per_thread, &threads,
5209+ level2_cache_size);
5210 }
5211 else if (cpu_features->basic.kind == arch_kind_zhaoxin)
5212 {
5213 data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
5214- core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
5215 shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
5216 shared_per_thread = shared;
5217
5218@@ -849,19 +848,19 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5219 level1_dcache_size = data;
5220 level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC);
5221 level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE);
5222- level2_cache_size = core;
5223+ level2_cache_size = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
5224 level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC);
5225 level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE);
5226 level3_cache_size = shared;
5227 level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC);
5228 level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE);
5229
5230- get_common_cache_info (&shared, &shared_per_thread, &threads, core);
5231+ get_common_cache_info (&shared, &shared_per_thread, &threads,
5232+ level2_cache_size);
5233 }
5234 else if (cpu_features->basic.kind == arch_kind_amd)
5235 {
5236 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
5237- core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
5238 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
5239
5240 level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
5241@@ -869,7 +868,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5242 level1_dcache_size = data;
5243 level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
5244 level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
5245- level2_cache_size = core;
5246+ level2_cache_size = handle_amd (_SC_LEVEL2_CACHE_SIZE);;
5247 level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
5248 level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
5249 level3_cache_size = shared;
5250@@ -880,12 +879,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5251 if (shared <= 0)
5252 {
5253 /* No shared L3 cache. All we have is the L2 cache. */
5254- shared = core;
5255+ shared = level2_cache_size;
5256 }
5257 else if (cpu_features->basic.family < 0x17)
5258 {
5259 /* Account for exclusive L2 and L3 caches. */
5260- shared += core;
5261+ shared += level2_cache_size;
5262 }
5263
5264 shared_per_thread = shared;
5265@@ -987,6 +986,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5266 if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
5267 rep_movsb_threshold = 2112;
5268
5269+ /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
5270+ cases slower than the vectorized path (and for some alignments,
5271+ it is really slow, check BZ #30994). */
5272+ if (cpu_features->basic.kind == arch_kind_amd)
5273+ rep_movsb_threshold = non_temporal_threshold;
5274+
5275 /* The default threshold to use Enhanced REP STOSB. */
5276 unsigned long int rep_stosb_threshold = 2048;
5277
5278@@ -1028,16 +1033,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5279 SIZE_MAX);
5280
5281 unsigned long int rep_movsb_stop_threshold;
5282- /* ERMS feature is implemented from AMD Zen3 architecture and it is
5283- performing poorly for data above L2 cache size. Henceforth, adding
5284- an upper bound threshold parameter to limit the usage of Enhanced
5285- REP MOVSB operations and setting its value to L2 cache size. */
5286- if (cpu_features->basic.kind == arch_kind_amd)
5287- rep_movsb_stop_threshold = core;
5288 /* Setting the upper bound of ERMS to the computed value of
5289- non-temporal threshold for architectures other than AMD. */
5290- else
5291- rep_movsb_stop_threshold = non_temporal_threshold;
5292+ non-temporal threshold for all architectures. */
5293+ rep_movsb_stop_threshold = non_temporal_threshold;
5294
5295 cpu_features->data_cache_size = data;
5296 cpu_features->shared_cache_size = shared;
5297
5298commit 6484a92698039c4a7a510f0214e22d067b0d78b3
5299Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
5300Date: Thu Feb 8 10:08:39 2024 -0300
5301
5302 x86: Do not prefer ERMS for memset on Zen3+
5303
5304 For AMD Zen3+ architecture, the performance of the vectorized loop is
5305 slightly better than ERMS.
5306
5307 Checked on x86_64-linux-gnu on Zen3.
5308 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
5309
5310 (cherry picked from commit 272708884cb750f12f5c74a00e6620c19dc6d567)
5311
5312diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
5313index f34d12846c..5a98f70364 100644
5314--- a/sysdeps/x86/dl-cacheinfo.h
5315+++ b/sysdeps/x86/dl-cacheinfo.h
5316@@ -1021,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
5317 minimum value is fixed. */
5318 rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
5319 long int, NULL);
5320+ if (cpu_features->basic.kind == arch_kind_amd
5321+ && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold))
5322+ /* For AMD Zen3+ architecture, the performance of the vectorized loop is
5323+ slightly better than ERMS. */
5324+ rep_stosb_threshold = SIZE_MAX;
5325
5326 TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
5327 TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
5328
5329commit 5d070d12b3a52bc44dd1b71743abc4b6243862ae
5330Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
5331Date: Thu Feb 8 10:08:40 2024 -0300
5332
5333 x86: Expand the comment on when REP STOSB is used on memset
5334
5335 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
5336 (cherry picked from commit 491e55beab7457ed310a4a47496f4a333c5d1032)
5337
5338diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
5339index 9984c3ca0f..97839a2248 100644
5340--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
5341+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
5342@@ -21,7 +21,9 @@
5343 2. If size is less than VEC, use integer register stores.
5344 3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
5345 4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
5346- 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
5347+ 5. On machines ERMS feature, if size is greater or equal than
5348+ __x86_rep_stosb_threshold then REP STOSB will be used.
5349+ 6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
5350 4 VEC stores and store 4 * VEC at a time until done. */
5351
5352 #include <sysdep.h>
5353
5354commit 31c7d69af59da0da80caa74b2ec6ae149013384d
5355Author: Florian Weimer <fweimer@redhat.com>
5356Date: Fri Feb 16 07:40:37 2024 +0100
5357
5358 i386: Use generic memrchr in libc (bug 31316)
5359
5360 Before this change, we incorrectly used the SSE2 variant in the
5361 implementation, without checking that the system actually supports
5362 SSE2.
5363
5364 Tested-by: Sam James <sam@gentoo.org>
5365 (cherry picked from commit 0d9166c2245cad4ac520b337dee40c9a583872b6)
5366
5367diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c
5368index ef7bbbe792..20bfdf3af3 100644
5369--- a/sysdeps/i386/i686/multiarch/memrchr-c.c
5370+++ b/sysdeps/i386/i686/multiarch/memrchr-c.c
5371@@ -5,3 +5,4 @@ extern void *__memrchr_ia32 (const void *, int, size_t);
5372 #endif
5373
5374 #include "string/memrchr.c"
5375+strong_alias (__memrchr_ia32, __GI___memrchr)
5376diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S
5377index d9dae04171..e123f87435 100644
5378--- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S
5379+++ b/sysdeps/i386/i686/multiarch/memrchr-sse2.S
5380@@ -720,5 +720,4 @@ L(ret_null):
5381 ret
5382
5383 END (__memrchr_sse2)
5384-strong_alias (__memrchr_sse2, __GI___memrchr)
5385 #endif
5386
5387commit b0e0a07018098c2c5927796be5681a298c312626
5388Author: Joe Ramsay <Joe.Ramsay@arm.com>
5389Date: Tue Feb 20 16:44:13 2024 +0000
5390
5391 aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR
5392
5393 This includes a fix for big-endian in AdvSIMD log, some cosmetic
5394 changes, and numerous small optimisations mainly around inlining and
5395 using indexed variants of MLA intrinsics.
5396 Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
5397
5398 (cherry picked from commit e302e1021391d13a9611ba3a910df128830bd19e)
5399
5400diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c
5401index a8eabb5e71..0a86c9823a 100644
5402--- a/sysdeps/aarch64/fpu/acos_advsimd.c
5403+++ b/sysdeps/aarch64/fpu/acos_advsimd.c
5404@@ -40,8 +40,8 @@ static const struct data
5405 };
5406
5407 #define AllMask v_u64 (0xffffffffffffffff)
5408-#define Oneu (0x3ff0000000000000)
5409-#define Small (0x3e50000000000000) /* 2^-53. */
5410+#define Oneu 0x3ff0000000000000
5411+#define Small 0x3e50000000000000 /* 2^-53. */
5412
5413 #if WANT_SIMD_EXCEPT
5414 static float64x2_t VPCS_ATTR NOINLINE
5415diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c
5416index 141646e954..2de6eff407 100644
5417--- a/sysdeps/aarch64/fpu/asin_advsimd.c
5418+++ b/sysdeps/aarch64/fpu/asin_advsimd.c
5419@@ -39,8 +39,8 @@ static const struct data
5420 };
5421
5422 #define AllMask v_u64 (0xffffffffffffffff)
5423-#define One (0x3ff0000000000000)
5424-#define Small (0x3e50000000000000) /* 2^-12. */
5425+#define One 0x3ff0000000000000
5426+#define Small 0x3e50000000000000 /* 2^-12. */
5427
5428 #if WANT_SIMD_EXCEPT
5429 static float64x2_t VPCS_ATTR NOINLINE
5430diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c
5431index 09a4c559b8..04fa71fa37 100644
5432--- a/sysdeps/aarch64/fpu/atan2_sve.c
5433+++ b/sysdeps/aarch64/fpu/atan2_sve.c
5434@@ -37,9 +37,6 @@ static const struct data
5435 .pi_over_2 = 0x1.921fb54442d18p+0,
5436 };
5437
5438-/* Useful constants. */
5439-#define SignMask sv_u64 (0x8000000000000000)
5440-
5441 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
5442 static svfloat64_t NOINLINE
5443 special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
5444@@ -72,14 +69,15 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
5445 svbool_t cmp_y = zeroinfnan (iy, pg);
5446 svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
5447
5448- svuint64_t sign_x = svand_x (pg, ix, SignMask);
5449- svuint64_t sign_y = svand_x (pg, iy, SignMask);
5450- svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
5451-
5452 svfloat64_t ax = svabs_x (pg, x);
5453 svfloat64_t ay = svabs_x (pg, y);
5454+ svuint64_t iax = svreinterpret_u64 (ax);
5455+ svuint64_t iay = svreinterpret_u64 (ay);
5456+
5457+ svuint64_t sign_x = sveor_x (pg, ix, iax);
5458+ svuint64_t sign_y = sveor_x (pg, iy, iay);
5459+ svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
5460
5461- svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
5462 svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
5463
5464 /* Set up z for call to atan. */
5465@@ -88,8 +86,9 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
5466 svfloat64_t z = svdiv_x (pg, n, d);
5467
5468 /* Work out the correct shift. */
5469- svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0));
5470- shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
5471+ svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
5472+ shift = svsel (pred_aygtax, sv_f64 (1.0), shift);
5473+ shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
5474 shift = svmul_x (pg, shift, data_ptr->pi_over_2);
5475
5476 /* Use split Estrin scheme for P(z^2) with deg(P)=19. */
5477@@ -109,10 +108,10 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
5478 ret = svadd_m (pg, ret, shift);
5479
5480 /* Account for the sign of x and y. */
5481- ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
5482-
5483 if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
5484- return special_case (y, x, ret, cmp_xy);
5485-
5486- return ret;
5487+ return special_case (
5488+ y, x,
5489+ svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)),
5490+ cmp_xy);
5491+ return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
5492 }
5493diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c
5494index b92f83cdea..9ea197147c 100644
5495--- a/sysdeps/aarch64/fpu/atan2f_sve.c
5496+++ b/sysdeps/aarch64/fpu/atan2f_sve.c
5497@@ -32,10 +32,8 @@ static const struct data
5498 .pi_over_2 = 0x1.921fb6p+0f,
5499 };
5500
5501-#define SignMask sv_u32 (0x80000000)
5502-
5503 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
5504-static inline svfloat32_t
5505+static svfloat32_t NOINLINE
5506 special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
5507 const svbool_t cmp)
5508 {
5509@@ -67,14 +65,15 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
5510 svbool_t cmp_y = zeroinfnan (iy, pg);
5511 svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
5512
5513- svuint32_t sign_x = svand_x (pg, ix, SignMask);
5514- svuint32_t sign_y = svand_x (pg, iy, SignMask);
5515- svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
5516-
5517 svfloat32_t ax = svabs_x (pg, x);
5518 svfloat32_t ay = svabs_x (pg, y);
5519+ svuint32_t iax = svreinterpret_u32 (ax);
5520+ svuint32_t iay = svreinterpret_u32 (ay);
5521+
5522+ svuint32_t sign_x = sveor_x (pg, ix, iax);
5523+ svuint32_t sign_y = sveor_x (pg, iy, iay);
5524+ svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
5525
5526- svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
5527 svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
5528
5529 /* Set up z for call to atan. */
5530@@ -83,11 +82,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
5531 svfloat32_t z = svdiv_x (pg, n, d);
5532
5533 /* Work out the correct shift. */
5534- svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0));
5535- shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
5536+ svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
5537+ shift = svsel (pred_aygtax, sv_f32 (1.0), shift);
5538+ shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
5539 shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2));
5540
5541- /* Use split Estrin scheme for P(z^2) with deg(P)=7. */
5542+ /* Use pure Estrin scheme for P(z^2) with deg(P)=7. */
5543 svfloat32_t z2 = svmul_x (pg, z, z);
5544 svfloat32_t z4 = svmul_x (pg, z2, z2);
5545 svfloat32_t z8 = svmul_x (pg, z4, z4);
5546@@ -101,10 +101,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
5547 ret = svadd_m (pg, ret, shift);
5548
5549 /* Account for the sign of x and y. */
5550- ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
5551
5552 if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
5553- return special_case (y, x, ret, cmp_xy);
5554+ return special_case (
5555+ y, x,
5556+ svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)),
5557+ cmp_xy);
5558
5559- return ret;
5560+ return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
5561 }
5562diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c
5563index 2897e8b909..3924c9ce44 100644
5564--- a/sysdeps/aarch64/fpu/cos_advsimd.c
5565+++ b/sysdeps/aarch64/fpu/cos_advsimd.c
5566@@ -63,8 +63,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x)
5567 special-case handler later. */
5568 r = vbslq_f64 (cmp, v_f64 (1.0), r);
5569 #else
5570- cmp = vcageq_f64 (d->range_val, x);
5571- cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */
5572+ cmp = vcageq_f64 (x, d->range_val);
5573 r = x;
5574 #endif
5575
5576diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c
5577index 60abc8dfcf..d0c285b03a 100644
5578--- a/sysdeps/aarch64/fpu/cosf_advsimd.c
5579+++ b/sysdeps/aarch64/fpu/cosf_advsimd.c
5580@@ -64,8 +64,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x)
5581 special-case handler later. */
5582 r = vbslq_f32 (cmp, v_f32 (1.0f), r);
5583 #else
5584- cmp = vcageq_f32 (d->range_val, x);
5585- cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */
5586+ cmp = vcageq_f32 (x, d->range_val);
5587 r = x;
5588 #endif
5589
5590diff --git a/sysdeps/aarch64/fpu/exp10_advsimd.c b/sysdeps/aarch64/fpu/exp10_advsimd.c
5591index fe7149b191..eeb31ca839 100644
5592--- a/sysdeps/aarch64/fpu/exp10_advsimd.c
5593+++ b/sysdeps/aarch64/fpu/exp10_advsimd.c
5594@@ -57,7 +57,7 @@ const static struct data
5595 # define BigBound v_u64 (0x4070000000000000) /* asuint64 (0x1p8). */
5596 # define Thres v_u64 (0x2070000000000000) /* BigBound - TinyBound. */
5597
5598-static inline float64x2_t VPCS_ATTR
5599+static float64x2_t VPCS_ATTR NOINLINE
5600 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
5601 {
5602 /* If fenv exceptions are to be triggered correctly, fall back to the scalar
5603@@ -72,7 +72,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
5604 # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */
5605 # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */
5606
5607-static float64x2_t VPCS_ATTR NOINLINE
5608+static inline float64x2_t VPCS_ATTR
5609 special_case (float64x2_t s, float64x2_t y, float64x2_t n,
5610 const struct data *d)
5611 {
5612diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c
5613index 7ee0c90948..ab117b69da 100644
5614--- a/sysdeps/aarch64/fpu/exp10f_advsimd.c
5615+++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c
5616@@ -25,7 +25,8 @@
5617 static const struct data
5618 {
5619 float32x4_t poly[5];
5620- float32x4_t shift, log10_2, log2_10_hi, log2_10_lo;
5621+ float32x4_t log10_2_and_inv, shift;
5622+
5623 #if !WANT_SIMD_EXCEPT
5624 float32x4_t scale_thresh;
5625 #endif
5626@@ -38,9 +39,9 @@ static const struct data
5627 .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f),
5628 V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) },
5629 .shift = V4 (0x1.8p23f),
5630- .log10_2 = V4 (0x1.a934fp+1),
5631- .log2_10_hi = V4 (0x1.344136p-2),
5632- .log2_10_lo = V4 (-0x1.ec10cp-27),
5633+
5634+ /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */
5635+ .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 },
5636 #if !WANT_SIMD_EXCEPT
5637 .scale_thresh = V4 (ScaleBound)
5638 #endif
5639@@ -98,24 +99,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x)
5640 #if WANT_SIMD_EXCEPT
5641 /* asuint(x) - TinyBound >= BigBound - TinyBound. */
5642 uint32x4_t cmp = vcgeq_u32 (
5643- vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
5644- TinyBound),
5645- Thres);
5646+ vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres);
5647 float32x4_t xm = x;
5648 /* If any lanes are special, mask them with 1 and retain a copy of x to allow
5649 special case handler to fix special lanes later. This is only necessary if
5650 fenv exceptions are to be triggered correctly. */
5651 if (__glibc_unlikely (v_any_u32 (cmp)))
5652- x = vbslq_f32 (cmp, v_f32 (1), x);
5653+ x = v_zerofy_f32 (x, cmp);
5654 #endif
5655
5656 /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
5657 with poly(r) in [1/sqrt(2), sqrt(2)] and
5658 x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */
5659- float32x4_t z = vfmaq_f32 (d->shift, x, d->log10_2);
5660+ float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0);
5661 float32x4_t n = vsubq_f32 (z, d->shift);
5662- float32x4_t r = vfmsq_f32 (x, n, d->log2_10_hi);
5663- r = vfmsq_f32 (r, n, d->log2_10_lo);
5664+ float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1);
5665+ r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2);
5666 uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
5667
5668 float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
5669diff --git a/sysdeps/aarch64/fpu/exp2_advsimd.c b/sysdeps/aarch64/fpu/exp2_advsimd.c
5670index 391a93180c..ae1e63d503 100644
5671--- a/sysdeps/aarch64/fpu/exp2_advsimd.c
5672+++ b/sysdeps/aarch64/fpu/exp2_advsimd.c
5673@@ -24,6 +24,7 @@
5674 #define IndexMask (N - 1)
5675 #define BigBound 1022.0
5676 #define UOFlowBound 1280.0
5677+#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */
5678
5679 static const struct data
5680 {
5681@@ -48,14 +49,13 @@ lookup_sbits (uint64x2_t i)
5682
5683 #if WANT_SIMD_EXCEPT
5684
5685-# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */
5686 # define Thres 0x2080000000000000 /* asuint64(512.0) - TinyBound. */
5687
5688 /* Call scalar exp2 as a fallback. */
5689 static float64x2_t VPCS_ATTR NOINLINE
5690-special_case (float64x2_t x)
5691+special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special)
5692 {
5693- return v_call_f64 (exp2, x, x, v_u64 (0xffffffffffffffff));
5694+ return v_call_f64 (exp2, x, y, is_special);
5695 }
5696
5697 #else
5698@@ -65,7 +65,7 @@ special_case (float64x2_t x)
5699 # define SpecialBias1 0x7000000000000000 /* 0x1p769. */
5700 # define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
5701
5702-static float64x2_t VPCS_ATTR
5703+static inline float64x2_t VPCS_ATTR
5704 special_case (float64x2_t s, float64x2_t y, float64x2_t n,
5705 const struct data *d)
5706 {
5707@@ -94,10 +94,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
5708 #if WANT_SIMD_EXCEPT
5709 uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
5710 cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres));
5711- /* If any special case (inf, nan, small and large x) is detected,
5712- fall back to scalar for all lanes. */
5713- if (__glibc_unlikely (v_any_u64 (cmp)))
5714- return special_case (x);
5715+ /* Mask special lanes and retain a copy of x for passing to special-case
5716+ handler. */
5717+ float64x2_t xc = x;
5718+ x = v_zerofy_f64 (x, cmp);
5719 #else
5720 cmp = vcagtq_f64 (x, d->scale_big_bound);
5721 #endif
5722@@ -120,9 +120,11 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
5723 float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly);
5724 y = vmulq_f64 (r, y);
5725
5726-#if !WANT_SIMD_EXCEPT
5727 if (__glibc_unlikely (v_any_u64 (cmp)))
5728+#if !WANT_SIMD_EXCEPT
5729 return special_case (s, y, n, d);
5730+#else
5731+ return special_case (xc, vfmaq_f64 (s, s, y), cmp);
5732 #endif
5733 return vfmaq_f64 (s, s, y);
5734 }
5735diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
5736index 9a5a523a10..8a686e3e05 100644
5737--- a/sysdeps/aarch64/fpu/exp2f_sve.c
5738+++ b/sysdeps/aarch64/fpu/exp2f_sve.c
5739@@ -20,6 +20,8 @@
5740 #include "sv_math.h"
5741 #include "poly_sve_f32.h"
5742
5743+#define Thres 0x1.5d5e2ap+6f
5744+
5745 static const struct data
5746 {
5747 float poly[5];
5748@@ -33,7 +35,7 @@ static const struct data
5749 .shift = 0x1.903f8p17f,
5750 /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
5751 correctly by FEXPA. */
5752- .thres = 0x1.5d5e2ap+6f,
5753+ .thres = Thres,
5754 };
5755
5756 static svfloat32_t NOINLINE
5757diff --git a/sysdeps/aarch64/fpu/exp_advsimd.c b/sysdeps/aarch64/fpu/exp_advsimd.c
5758index fd215f1d2c..5e3a9a0d44 100644
5759--- a/sysdeps/aarch64/fpu/exp_advsimd.c
5760+++ b/sysdeps/aarch64/fpu/exp_advsimd.c
5761@@ -54,7 +54,7 @@ const static volatile struct
5762 # define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9). */
5763 # define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound. */
5764
5765-static inline float64x2_t VPCS_ATTR
5766+static float64x2_t VPCS_ATTR NOINLINE
5767 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
5768 {
5769 /* If fenv exceptions are to be triggered correctly, fall back to the scalar
5770@@ -69,7 +69,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
5771 # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */
5772 # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */
5773
5774-static float64x2_t VPCS_ATTR NOINLINE
5775+static inline float64x2_t VPCS_ATTR
5776 special_case (float64x2_t s, float64x2_t y, float64x2_t n)
5777 {
5778 /* 2^(n/N) may overflow, break it up into s1*s2. */
5779diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c
5780index 0b85bd06f3..3628398674 100644
5781--- a/sysdeps/aarch64/fpu/expm1_advsimd.c
5782+++ b/sysdeps/aarch64/fpu/expm1_advsimd.c
5783@@ -23,7 +23,7 @@
5784 static const struct data
5785 {
5786 float64x2_t poly[11];
5787- float64x2_t invln2, ln2_lo, ln2_hi, shift;
5788+ float64x2_t invln2, ln2, shift;
5789 int64x2_t exponent_bias;
5790 #if WANT_SIMD_EXCEPT
5791 uint64x2_t thresh, tiny_bound;
5792@@ -38,8 +38,7 @@ static const struct data
5793 V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
5794 V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) },
5795 .invln2 = V2 (0x1.71547652b82fep0),
5796- .ln2_hi = V2 (0x1.62e42fefa39efp-1),
5797- .ln2_lo = V2 (0x1.abc9e3b39803fp-56),
5798+ .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },
5799 .shift = V2 (0x1.8p52),
5800 .exponent_bias = V2 (0x3ff0000000000000),
5801 #if WANT_SIMD_EXCEPT
5802@@ -83,7 +82,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
5803 x = v_zerofy_f64 (x, special);
5804 #else
5805 /* Large input, NaNs and Infs. */
5806- uint64x2_t special = vceqzq_u64 (vcaltq_f64 (x, d->oflow_bound));
5807+ uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
5808 #endif
5809
5810 /* Reduce argument to smaller range:
5811@@ -93,8 +92,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
5812 where 2^i is exact because i is an integer. */
5813 float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
5814 int64x2_t i = vcvtq_s64_f64 (n);
5815- float64x2_t f = vfmsq_f64 (x, n, d->ln2_hi);
5816- f = vfmsq_f64 (f, n, d->ln2_lo);
5817+ float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0);
5818+ f = vfmsq_laneq_f64 (f, n, d->ln2, 1);
5819
5820 /* Approximate expm1(f) using polynomial.
5821 Taylor expansion for expm1(x) has the form:
5822diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c
5823index 8d4c9a2193..93db200f61 100644
5824--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c
5825+++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c
5826@@ -23,7 +23,8 @@
5827 static const struct data
5828 {
5829 float32x4_t poly[5];
5830- float32x4_t invln2, ln2_lo, ln2_hi, shift;
5831+ float32x4_t invln2_and_ln2;
5832+ float32x4_t shift;
5833 int32x4_t exponent_bias;
5834 #if WANT_SIMD_EXCEPT
5835 uint32x4_t thresh;
5836@@ -34,9 +35,8 @@ static const struct data
5837 /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */
5838 .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
5839 V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
5840- .invln2 = V4 (0x1.715476p+0f),
5841- .ln2_hi = V4 (0x1.62e4p-1f),
5842- .ln2_lo = V4 (0x1.7f7d1cp-20f),
5843+ /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */
5844+ .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
5845 .shift = V4 (0x1.8p23f),
5846 .exponent_bias = V4 (0x3f800000),
5847 #if !WANT_SIMD_EXCEPT
5848@@ -80,7 +80,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
5849 x = v_zerofy_f32 (x, special);
5850 #else
5851 /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */
5852- uint32x4_t special = vceqzq_u32 (vcaltq_f32 (x, d->oflow_bound));
5853+ uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
5854 #endif
5855
5856 /* Reduce argument to smaller range:
5857@@ -88,10 +88,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
5858 and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
5859 exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
5860 where 2^i is exact because i is an integer. */
5861- float32x4_t j = vsubq_f32 (vfmaq_f32 (d->shift, d->invln2, x), d->shift);
5862+ float32x4_t j = vsubq_f32 (
5863+ vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
5864 int32x4_t i = vcvtq_s32_f32 (j);
5865- float32x4_t f = vfmsq_f32 (x, j, d->ln2_hi);
5866- f = vfmsq_f32 (f, j, d->ln2_lo);
5867+ float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
5868+ f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
5869
5870 /* Approximate expm1(f) using polynomial.
5871 Taylor expansion for expm1(x) has the form:
5872diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c
5873index 067ae79613..21df61728c 100644
5874--- a/sysdeps/aarch64/fpu/log_advsimd.c
5875+++ b/sysdeps/aarch64/fpu/log_advsimd.c
5876@@ -58,8 +58,13 @@ lookup (uint64x2_t i)
5877 uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
5878 float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
5879 float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
5880+#if __BYTE_ORDER == __LITTLE_ENDIAN
5881 e.invc = vuzp1q_f64 (e0, e1);
5882 e.logc = vuzp2q_f64 (e0, e1);
5883+#else
5884+ e.invc = vuzp1q_f64 (e1, e0);
5885+ e.logc = vuzp2q_f64 (e1, e0);
5886+#endif
5887 return e;
5888 }
5889
5890diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c
5891index efce183e86..a0d9d3b819 100644
5892--- a/sysdeps/aarch64/fpu/sin_advsimd.c
5893+++ b/sysdeps/aarch64/fpu/sin_advsimd.c
5894@@ -75,8 +75,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
5895 r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x);
5896 #else
5897 r = x;
5898- cmp = vcageq_f64 (d->range_val, x);
5899- cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */
5900+ cmp = vcageq_f64 (x, d->range_val);
5901 #endif
5902
5903 /* n = rint(|x|/pi). */
5904diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c
5905index 60cf3f2ca1..375dfc3331 100644
5906--- a/sysdeps/aarch64/fpu/sinf_advsimd.c
5907+++ b/sysdeps/aarch64/fpu/sinf_advsimd.c
5908@@ -67,8 +67,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x)
5909 r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x);
5910 #else
5911 r = x;
5912- cmp = vcageq_f32 (d->range_val, x);
5913- cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */
5914+ cmp = vcageq_f32 (x, d->range_val);
5915 #endif
5916
5917 /* n = rint(|x|/pi) */
5918diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c
5919index d7e5ba7b1a..0459821ab2 100644
5920--- a/sysdeps/aarch64/fpu/tan_advsimd.c
5921+++ b/sysdeps/aarch64/fpu/tan_advsimd.c
5922@@ -23,7 +23,7 @@
5923 static const struct data
5924 {
5925 float64x2_t poly[9];
5926- float64x2_t half_pi_hi, half_pi_lo, two_over_pi, shift;
5927+ float64x2_t half_pi, two_over_pi, shift;
5928 #if !WANT_SIMD_EXCEPT
5929 float64x2_t range_val;
5930 #endif
5931@@ -34,8 +34,7 @@ static const struct data
5932 V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9),
5933 V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11),
5934 V2 (0x1.4e4fd14147622p-12) },
5935- .half_pi_hi = V2 (0x1.921fb54442d18p0),
5936- .half_pi_lo = V2 (0x1.1a62633145c07p-54),
5937+ .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
5938 .two_over_pi = V2 (0x1.45f306dc9c883p-1),
5939 .shift = V2 (0x1.8p52),
5940 #if !WANT_SIMD_EXCEPT
5941@@ -56,15 +55,15 @@ special_case (float64x2_t x)
5942
5943 /* Vector approximation for double-precision tan.
5944 Maximum measured error is 3.48 ULP:
5945- __v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
5946- want -0x1.f6ccd8ecf7deap+37. */
5947+ _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
5948+ want -0x1.f6ccd8ecf7deap+37. */
5949 float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
5950 {
5951 const struct data *dat = ptr_barrier (&data);
5952- /* Our argument reduction cannot calculate q with sufficient accuracy for very
5953- large inputs. Fall back to scalar routine for all lanes if any are too
5954- large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny
5955- input to avoid underflow. */
5956+ /* Our argument reduction cannot calculate q with sufficient accuracy for
5957+ very large inputs. Fall back to scalar routine for all lanes if any are
5958+ too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
5959+ tiny input to avoid underflow. */
5960 #if WANT_SIMD_EXCEPT
5961 uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
5962 /* iax - tiny_bound > range_val - tiny_bound. */
5963@@ -82,8 +81,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
5964 /* Use q to reduce x to r in [-pi/4, pi/4], by:
5965 r = x - q * pi/2, in extended precision. */
5966 float64x2_t r = x;
5967- r = vfmsq_f64 (r, q, dat->half_pi_hi);
5968- r = vfmsq_f64 (r, q, dat->half_pi_lo);
5969+ r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0);
5970+ r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1);
5971 /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
5972 formula. */
5973 r = vmulq_n_f64 (r, 0.5);
5974@@ -106,14 +105,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
5975 and reciprocity around pi/2:
5976 tan(x) = 1 / (tan(pi/2 - x))
5977 to assemble result using change-of-sign and conditional selection of
5978- numerator/denominator, dependent on odd/even-ness of q (hence quadrant). */
5979+ numerator/denominator, dependent on odd/even-ness of q (hence quadrant).
5980+ */
5981 float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p);
5982 float64x2_t d = vaddq_f64 (p, p);
5983
5984 uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
5985
5986 #if !WANT_SIMD_EXCEPT
5987- uint64x2_t special = vceqzq_u64 (vcaleq_f64 (x, dat->range_val));
5988+ uint64x2_t special = vcageq_f64 (x, dat->range_val);
5989 if (__glibc_unlikely (v_any_u64 (special)))
5990 return special_case (x);
5991 #endif
5992diff --git a/sysdeps/aarch64/fpu/tanf_advsimd.c b/sysdeps/aarch64/fpu/tanf_advsimd.c
5993index 1f16103f8a..5a7489390a 100644
5994--- a/sysdeps/aarch64/fpu/tanf_advsimd.c
5995+++ b/sysdeps/aarch64/fpu/tanf_advsimd.c
5996@@ -23,7 +23,8 @@
5997 static const struct data
5998 {
5999 float32x4_t poly[6];
6000- float32x4_t neg_half_pi_1, neg_half_pi_2, neg_half_pi_3, two_over_pi, shift;
6001+ float32x4_t pi_consts;
6002+ float32x4_t shift;
6003 #if !WANT_SIMD_EXCEPT
6004 float32x4_t range_val;
6005 #endif
6006@@ -31,10 +32,9 @@ static const struct data
6007 /* Coefficients generated using FPMinimax. */
6008 .poly = { V4 (0x1.55555p-2f), V4 (0x1.11166p-3f), V4 (0x1.b88a78p-5f),
6009 V4 (0x1.7b5756p-6f), V4 (0x1.4ef4cep-8f), V4 (0x1.0e1e74p-7f) },
6010- .neg_half_pi_1 = V4 (-0x1.921fb6p+0f),
6011- .neg_half_pi_2 = V4 (0x1.777a5cp-25f),
6012- .neg_half_pi_3 = V4 (0x1.ee59dap-50f),
6013- .two_over_pi = V4 (0x1.45f306p-1f),
6014+ /* Stores constants: (-pi/2)_high, (-pi/2)_mid, (-pi/2)_low, and 2/pi. */
6015+ .pi_consts
6016+ = { -0x1.921fb6p+0f, 0x1.777a5cp-25f, 0x1.ee59dap-50f, 0x1.45f306p-1f },
6017 .shift = V4 (0x1.8p+23f),
6018 #if !WANT_SIMD_EXCEPT
6019 .range_val = V4 (0x1p15f),
6020@@ -58,10 +58,11 @@ eval_poly (float32x4_t z, const struct data *d)
6021 {
6022 float32x4_t z2 = vmulq_f32 (z, z);
6023 #if WANT_SIMD_EXCEPT
6024- /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. If fp exceptions
6025- are to be triggered correctly, sidestep this by fixing such lanes to 0. */
6026+ /* Tiny z (<= 0x1p-31) will underflow when calculating z^4.
6027+ If fp exceptions are to be triggered correctly,
6028+ sidestep this by fixing such lanes to 0. */
6029 uint32x4_t will_uflow
6030- = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound);
6031+ = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound);
6032 if (__glibc_unlikely (v_any_u32 (will_uflow)))
6033 z2 = vbslq_f32 (will_uflow, v_f32 (0), z2);
6034 #endif
6035@@ -94,16 +95,16 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x)
6036 #endif
6037
6038 /* n = rint(x/(pi/2)). */
6039- float32x4_t q = vfmaq_f32 (d->shift, d->two_over_pi, x);
6040+ float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3);
6041 float32x4_t n = vsubq_f32 (q, d->shift);
6042 /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */
6043 uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1));
6044
6045 /* r = x - n * (pi/2) (range reduction into -pi./4 .. pi/4). */
6046 float32x4_t r;
6047- r = vfmaq_f32 (x, d->neg_half_pi_1, n);
6048- r = vfmaq_f32 (r, d->neg_half_pi_2, n);
6049- r = vfmaq_f32 (r, d->neg_half_pi_3, n);
6050+ r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0);
6051+ r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1);
6052+ r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2);
6053
6054 /* If x lives in an interval, where |tan(x)|
6055 - is finite, then use a polynomial approximation of the form
6056
6057commit 395a89f61e19fa916ae4cc93fc10d81a28ce3039
6058Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
6059Date: Wed Mar 13 14:34:14 2024 +0000
6060
6061 aarch64: fix check for SVE support in assembler
6062
6063 Due to GCC bug 110901 -mcpu can override -march setting when compiling
6064 asm code and thus a compiler targetting a specific cpu can fail the
6065 configure check even when binutils gas supports SVE.
6066
6067 The workaround is that explicit .arch directive overrides both -mcpu
6068 and -march, and since that's what the actual SVE memcpy uses the
6069 configure check should use that too even if the GCC issue is fixed
6070 independently.
6071
6072 Reviewed-by: Florian Weimer <fweimer@redhat.com>
6073 (cherry picked from commit 73c26018ed0ecd9c807bb363cc2c2ab4aca66a82)
6074
6075diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
6076old mode 100644
6077new mode 100755
6078index ca57edce47..9606137e8d
6079--- a/sysdeps/aarch64/configure
6080+++ b/sysdeps/aarch64/configure
6081@@ -325,9 +325,10 @@ then :
6082 printf %s "(cached) " >&6
6083 else $as_nop
6084 cat > conftest.s <<\EOF
6085- ptrue p0.b
6086+ .arch armv8.2-a+sve
6087+ ptrue p0.b
6088 EOF
6089-if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5'
6090+if { ac_try='${CC-cc} -c conftest.s 1>&5'
6091 { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
6092 (eval $ac_try) 2>&5
6093 ac_status=$?
6094diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
6095index 27874eceb4..56d12d661d 100644
6096--- a/sysdeps/aarch64/configure.ac
6097+++ b/sysdeps/aarch64/configure.ac
6098@@ -90,9 +90,10 @@ LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs])
6099 # Check if asm support armv8.2-a+sve
6100 AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl
6101 cat > conftest.s <<\EOF
6102- ptrue p0.b
6103+ .arch armv8.2-a+sve
6104+ ptrue p0.b
6105 EOF
6106-if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then
6107+if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then
6108 libc_cv_aarch64_sve_asm=yes
6109 else
6110 libc_cv_aarch64_sve_asm=no
6111
6112commit 9d92452c70805a2e2dbbdb2b1ffc34bd86e1c8df
6113Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
6114Date: Thu Mar 21 16:48:33 2024 +0000
6115
6116 AArch64: Check kernel version for SVE ifuncs
6117
6118 Old Linux kernels disable SVE after every system call. Calling the
6119 SVE-optimized memcpy afterwards will then cause a trap to reenable SVE.
6120 As a result, applications with a high use of syscalls may run slower with
6121 the SVE memcpy. This is true for kernels between 4.15.0 and before 6.2.0,
6122 except for 5.14.0 which was patched. Avoid this by checking the kernel
6123 version and selecting the SVE ifunc on modern kernels.
6124
6125 Parse the kernel version reported by uname() into a 24-bit kernel.major.minor
6126 value without calling any library functions. If uname() is not supported or
6127 if the version format is not recognized, assume the kernel is modern.
6128
6129 Tested-by: Florian Weimer <fweimer@redhat.com>
6130 Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
6131 (cherry picked from commit 2e94e2f5d2bf2de124c8ad7da85463355e54ccb2)
6132
6133diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h
6134index 77a782422a..5f2da91ebb 100644
6135--- a/sysdeps/aarch64/cpu-features.h
6136+++ b/sysdeps/aarch64/cpu-features.h
6137@@ -71,6 +71,7 @@ struct cpu_features
6138 /* Currently, the GLIBC memory tagging tunable only defines 8 bits. */
6139 uint8_t mte_state;
6140 bool sve;
6141+ bool prefer_sve_ifuncs;
6142 bool mops;
6143 };
6144
6145diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
6146index c52860efb2..61dc40088f 100644
6147--- a/sysdeps/aarch64/multiarch/init-arch.h
6148+++ b/sysdeps/aarch64/multiarch/init-arch.h
6149@@ -36,5 +36,7 @@
6150 MTE_ENABLED (); \
6151 bool __attribute__((unused)) sve = \
6152 GLRO(dl_aarch64_cpu_features).sve; \
6153+ bool __attribute__((unused)) prefer_sve_ifuncs = \
6154+ GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs; \
6155 bool __attribute__((unused)) mops = \
6156 GLRO(dl_aarch64_cpu_features).mops;
6157diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
6158index d12eccfca5..ce53567dab 100644
6159--- a/sysdeps/aarch64/multiarch/memcpy.c
6160+++ b/sysdeps/aarch64/multiarch/memcpy.c
6161@@ -47,7 +47,7 @@ select_memcpy_ifunc (void)
6162 {
6163 if (IS_A64FX (midr))
6164 return __memcpy_a64fx;
6165- return __memcpy_sve;
6166+ return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic;
6167 }
6168
6169 if (IS_THUNDERX (midr))
6170diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
6171index 2081eeb4d4..fe95037be3 100644
6172--- a/sysdeps/aarch64/multiarch/memmove.c
6173+++ b/sysdeps/aarch64/multiarch/memmove.c
6174@@ -47,7 +47,7 @@ select_memmove_ifunc (void)
6175 {
6176 if (IS_A64FX (midr))
6177 return __memmove_a64fx;
6178- return __memmove_sve;
6179+ return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic;
6180 }
6181
6182 if (IS_THUNDERX (midr))
6183diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
6184index b1a3f673f0..c0b047bc0d 100644
6185--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
6186+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
6187@@ -21,6 +21,7 @@
6188 #include <sys/auxv.h>
6189 #include <elf/dl-hwcaps.h>
6190 #include <sys/prctl.h>
6191+#include <sys/utsname.h>
6192 #include <dl-tunables-parse.h>
6193
6194 #define DCZID_DZP_MASK (1 << 4)
6195@@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu)
6196 return UINT64_MAX;
6197 }
6198
6199+#if __LINUX_KERNEL_VERSION < 0x060200
6200+
6201+/* Return true if we prefer using SVE in string ifuncs. Old kernels disable
6202+ SVE after every system call which results in unnecessary traps if memcpy
6203+ uses SVE. This is true for kernels between 4.15.0 and before 6.2.0, except
6204+ for 5.14.0 which was patched. For these versions return false to avoid using
6205+ SVE ifuncs.
6206+ Parse the kernel version into a 24-bit kernel.major.minor value without
6207+ calling any library functions. If uname() is not supported or if the version
6208+ format is not recognized, assume the kernel is modern and return true. */
6209+
6210+static inline bool
6211+prefer_sve_ifuncs (void)
6212+{
6213+ struct utsname buf;
6214+ const char *p = &buf.release[0];
6215+ int kernel = 0;
6216+ int val;
6217+
6218+ if (__uname (&buf) < 0)
6219+ return true;
6220+
6221+ for (int shift = 16; shift >= 0; shift -= 8)
6222+ {
6223+ for (val = 0; *p >= '0' && *p <= '9'; p++)
6224+ val = val * 10 + *p - '0';
6225+ kernel |= (val & 255) << shift;
6226+ if (*p++ != '.')
6227+ break;
6228+ }
6229+
6230+ if (kernel >= 0x060200 || kernel == 0x050e00)
6231+ return true;
6232+ if (kernel >= 0x040f00)
6233+ return false;
6234+ return true;
6235+}
6236+
6237+#endif
6238+
6239 static inline void
6240 init_cpu_features (struct cpu_features *cpu_features)
6241 {
6242@@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features)
6243 /* Check if SVE is supported. */
6244 cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
6245
6246+ cpu_features->prefer_sve_ifuncs = cpu_features->sve;
6247+
6248+#if __LINUX_KERNEL_VERSION < 0x060200
6249+ if (cpu_features->sve)
6250+ cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs ();
6251+#endif
6252+
6253 /* Check if MOPS is supported. */
6254 cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
6255 }
6256
6257commit 9883f4304cfb1558d0f1e6d9f48c4ab0a35355fe
6258Author: H.J. Lu <hjl.tools@gmail.com>
6259Date: Wed Feb 28 09:51:14 2024 -0800
6260
6261 x86-64: Don't use SSE resolvers for ISA level 3 or above
6262
6263 When glibc is built with ISA level 3 or above enabled, SSE resolvers
6264 aren't available and glibc fails to build:
6265
6266 ld: .../elf/librtld.os: in function `init_cpu_features':
6267 .../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave'
6268 ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object
6269 /usr/local/bin/ld: final link failed: bad value
6270
6271 For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor
6272 _dl_tlsdesc_dynamic_fxsave.
6273
6274 This fixes BZ #31429.
6275 Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
6276
6277 (cherry picked from commit befe2d3c4dec8be2cdd01a47132e47bdb7020922)
6278
6279diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
6280index 6fe1b728c6..b8abe733ab 100644
6281--- a/sysdeps/x86/cpu-features.c
6282+++ b/sysdeps/x86/cpu-features.c
6283@@ -18,6 +18,7 @@
6284
6285 #include <dl-hwcap.h>
6286 #include <libc-pointer-arith.h>
6287+#include <isa-level.h>
6288 #include <get-isa-level.h>
6289 #include <cacheinfo.h>
6290 #include <dl-cacheinfo.h>
6291@@ -1198,7 +1199,9 @@ no_cpuid:
6292 TUNABLE_CALLBACK (set_x86_shstk));
6293 #endif
6294
6295+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
6296 if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
6297+#endif
6298 {
6299 if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
6300 {
6301@@ -1219,22 +1222,24 @@ no_cpuid:
6302 #endif
6303 }
6304 }
6305+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
6306 else
6307 {
6308-#ifdef __x86_64__
6309+# ifdef __x86_64__
6310 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
6311-# ifdef SHARED
6312+# ifdef SHARED
6313 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
6314-# endif
6315-#else
6316-# ifdef SHARED
6317+# endif
6318+# else
6319+# ifdef SHARED
6320 if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
6321 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
6322 else
6323 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
6324+# endif
6325 # endif
6326-#endif
6327 }
6328+#endif
6329
6330 #ifdef SHARED
6331 # ifdef __x86_64__
6332diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
6333index ea69f5223a..057a10862a 100644
6334--- a/sysdeps/x86_64/dl-tlsdesc.S
6335+++ b/sysdeps/x86_64/dl-tlsdesc.S
6336@@ -20,6 +20,7 @@
6337 #include <tls.h>
6338 #include <cpu-features-offsets.h>
6339 #include <features-offsets.h>
6340+#include <isa-level.h>
6341 #include "tlsdesc.h"
6342 #include "dl-trampoline-save.h"
6343
6344@@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak:
6345 .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
6346
6347 #ifdef SHARED
6348-# define USE_FXSAVE
6349-# define STATE_SAVE_ALIGNMENT 16
6350-# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
6351-# include "dl-tlsdesc-dynamic.h"
6352-# undef _dl_tlsdesc_dynamic
6353-# undef USE_FXSAVE
6354+# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
6355+# define USE_FXSAVE
6356+# define STATE_SAVE_ALIGNMENT 16
6357+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
6358+# include "dl-tlsdesc-dynamic.h"
6359+# undef _dl_tlsdesc_dynamic
6360+# undef USE_FXSAVE
6361+# endif
6362
6363 # define USE_XSAVE
6364 # define STATE_SAVE_ALIGNMENT 64
6365
6366commit 7b92f46f04c6cbce19d19ae1099628431858996c
6367Author: Sunil K Pandey <skpgkp2@gmail.com>
6368Date: Thu Feb 29 17:57:02 2024 -0800
6369
6370 x86-64: Simplify minimum ISA check ifdef conditional with if
6371
6372 Replace minimum ISA check ifdef conditional with if. Since
6373 MINIMUM_X86_ISA_LEVEL and AVX_X86_ISA_LEVEL are compile time constants,
6374 compiler will perform constant folding optimization, getting same
6375 results.
6376
6377 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
6378 (cherry picked from commit b6e3898194bbae78910bbe9cd086937014961e45)
6379
6380diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
6381index b8abe733ab..3d7c2819d7 100644
6382--- a/sysdeps/x86/cpu-features.c
6383+++ b/sysdeps/x86/cpu-features.c
6384@@ -1199,9 +1199,8 @@ no_cpuid:
6385 TUNABLE_CALLBACK (set_x86_shstk));
6386 #endif
6387
6388-#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
6389- if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
6390-#endif
6391+ if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
6392+ || (GLRO(dl_x86_cpu_features).xsave_state_size != 0))
6393 {
6394 if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
6395 {
6396@@ -1222,24 +1221,22 @@ no_cpuid:
6397 #endif
6398 }
6399 }
6400-#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
6401 else
6402 {
6403-# ifdef __x86_64__
6404+#ifdef __x86_64__
6405 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
6406-# ifdef SHARED
6407+# ifdef SHARED
6408 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
6409-# endif
6410-# else
6411-# ifdef SHARED
6412+# endif
6413+#else
6414+# ifdef SHARED
6415 if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
6416 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
6417 else
6418 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
6419-# endif
6420 # endif
6421- }
6422 #endif
6423+ }
6424
6425 #ifdef SHARED
6426 # ifdef __x86_64__
6427
6428commit edb9a76e3008725e9dc035d38a58e849a3bde0f1
6429Author: Florian Weimer <fweimer@redhat.com>
6430Date: Sun Apr 14 08:24:51 2024 +0200
6431
6432 powerpc: Fix ld.so address determination for PCREL mode (bug 31640)
6433
6434 This seems to have stopped working with some GCC 14 versions,
6435 which clobber r2. With other compilers, the kernel-provided
6436 r2 value is still available at this point.
6437
6438 Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
6439 (cherry picked from commit 14e56bd4ce15ac2d1cc43f762eb2e6b83fec1afe)
6440
6441diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
6442index c6682f3445..2b6f5d2b08 100644
6443--- a/sysdeps/powerpc/powerpc64/dl-machine.h
6444+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
6445@@ -78,6 +78,7 @@ elf_host_tolerates_class (const Elf64_Ehdr *ehdr)
6446 static inline Elf64_Addr
6447 elf_machine_load_address (void) __attribute__ ((const));
6448
6449+#ifndef __PCREL__
6450 static inline Elf64_Addr
6451 elf_machine_load_address (void)
6452 {
6453@@ -105,6 +106,24 @@ elf_machine_dynamic (void)
6454 /* Then subtract off the load address offset. */
6455 return runtime_dynamic - elf_machine_load_address() ;
6456 }
6457+#else /* __PCREL__ */
6458+/* In PCREL mode, r2 may have been clobbered. Rely on relative
6459+ relocations instead. */
6460+
6461+static inline ElfW(Addr)
6462+elf_machine_load_address (void)
6463+{
6464+ extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
6465+ return (ElfW(Addr)) &__ehdr_start;
6466+}
6467+
6468+static inline ElfW(Addr)
6469+elf_machine_dynamic (void)
6470+{
6471+ extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
6472+ return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
6473+}
6474+#endif /* __PCREL__ */
6475
6476 /* The PLT uses Elf64_Rela relocs. */
6477 #define elf_machine_relplt elf_machine_rela
6478
6479commit 04df8652eb1919da18d54b3dcd6db1675993d45d
6480Author: H.J. Lu <hjl.tools@gmail.com>
6481Date: Thu Feb 15 11:19:56 2024 -0800
6482
6483 Apply the Makefile sorting fix
6484
6485 Apply the Makefile sorting fix generated by sort-makefile-lines.py.
6486
6487 (cherry picked from commit ef7f4b1fef67430a8f3cfc77fa6aada2add851d7)
6488
6489diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
6490index fe863e1ba4..01762ef526 100644
6491--- a/sysdeps/loongarch/lp64/multiarch/Makefile
6492+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
6493@@ -1,52 +1,52 @@
6494 ifeq ($(subdir),string)
6495 sysdep_routines += \
6496- strlen-aligned \
6497- strlen-lsx \
6498- strlen-lasx \
6499- strnlen-aligned \
6500- strnlen-lsx \
6501- strnlen-lasx \
6502+ memchr-aligned \
6503+ memchr-lasx \
6504+ memchr-lsx \
6505+ memcmp-aligned \
6506+ memcmp-lasx \
6507+ memcmp-lsx \
6508+ memcpy-aligned \
6509+ memcpy-unaligned \
6510+ memmove-lasx \
6511+ memmove-lsx \
6512+ memmove-unaligned \
6513+ memrchr-generic \
6514+ memrchr-lasx \
6515+ memrchr-lsx \
6516+ memset-aligned \
6517+ memset-lasx \
6518+ memset-lsx \
6519+ memset-unaligned \
6520+ rawmemchr-aligned \
6521+ rawmemchr-lasx \
6522+ rawmemchr-lsx \
6523+ stpcpy-aligned \
6524+ stpcpy-lasx \
6525+ stpcpy-lsx \
6526+ stpcpy-unaligned \
6527 strchr-aligned \
6528- strchr-lsx \
6529 strchr-lasx \
6530- strrchr-aligned \
6531- strrchr-lsx \
6532- strrchr-lasx \
6533+ strchr-lsx \
6534 strchrnul-aligned \
6535- strchrnul-lsx \
6536 strchrnul-lasx \
6537+ strchrnul-lsx \
6538 strcmp-aligned \
6539 strcmp-lsx \
6540- strncmp-aligned \
6541- strncmp-lsx \
6542 strcpy-aligned \
6543- strcpy-unaligned \
6544- strcpy-lsx \
6545 strcpy-lasx \
6546- stpcpy-aligned \
6547- stpcpy-unaligned \
6548- stpcpy-lsx \
6549- stpcpy-lasx \
6550- memcpy-aligned \
6551- memcpy-unaligned \
6552- memmove-unaligned \
6553- memmove-lsx \
6554- memmove-lasx \
6555- rawmemchr-aligned \
6556- rawmemchr-lsx \
6557- rawmemchr-lasx \
6558- memchr-aligned \
6559- memchr-lsx \
6560- memchr-lasx \
6561- memrchr-generic \
6562- memrchr-lsx \
6563- memrchr-lasx \
6564- memset-aligned \
6565- memset-unaligned \
6566- memset-lsx \
6567- memset-lasx \
6568- memcmp-aligned \
6569- memcmp-lsx \
6570- memcmp-lasx \
6571+ strcpy-lsx \
6572+ strcpy-unaligned \
6573+ strlen-aligned \
6574+ strlen-lasx \
6575+ strlen-lsx \
6576+ strncmp-aligned \
6577+ strncmp-lsx \
6578+ strnlen-aligned \
6579+ strnlen-lasx \
6580+ strnlen-lsx \
6581+ strrchr-aligned \
6582+ strrchr-lasx \
6583+ strrchr-lsx \
6584 # sysdep_routines
6585 endif
6586diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
6587index 992aabe43e..5311b594af 100644
6588--- a/sysdeps/x86/Makefile
6589+++ b/sysdeps/x86/Makefile
6590@@ -15,18 +15,18 @@ CFLAGS-dl-get-cpu-features.os += $(rtld-early-cflags)
6591 CFLAGS-get-cpuid-feature-leaf.o += $(no-stack-protector)
6592
6593 tests += \
6594- tst-get-cpu-features \
6595- tst-get-cpu-features-static \
6596 tst-cpu-features-cpuinfo \
6597 tst-cpu-features-cpuinfo-static \
6598 tst-cpu-features-supports \
6599 tst-cpu-features-supports-static \
6600+ tst-get-cpu-features \
6601+ tst-get-cpu-features-static \
6602 tst-hwcap-tunables \
6603 # tests
6604 tests-static += \
6605- tst-get-cpu-features-static \
6606 tst-cpu-features-cpuinfo-static \
6607 tst-cpu-features-supports-static \
6608+ tst-get-cpu-features-static \
6609 # tests-static
6610 ifeq (yes,$(have-ifunc))
6611 ifeq (yes,$(have-gcc-ifunc))
6612diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
6613index 9d374a3299..0ede447405 100644
6614--- a/sysdeps/x86_64/Makefile
6615+++ b/sysdeps/x86_64/Makefile
6616@@ -252,6 +252,10 @@ sysdep-dl-routines += dl-cet
6617
6618 tests += \
6619 tst-cet-legacy-1 \
6620+ tst-cet-legacy-10 \
6621+ tst-cet-legacy-10-static \
6622+ tst-cet-legacy-10a \
6623+ tst-cet-legacy-10a-static \
6624 tst-cet-legacy-1a \
6625 tst-cet-legacy-2 \
6626 tst-cet-legacy-2a \
6627@@ -263,15 +267,11 @@ tests += \
6628 tst-cet-legacy-8 \
6629 tst-cet-legacy-9 \
6630 tst-cet-legacy-9-static \
6631- tst-cet-legacy-10 \
6632- tst-cet-legacy-10-static \
6633- tst-cet-legacy-10a \
6634- tst-cet-legacy-10a-static \
6635 # tests
6636 tests-static += \
6637- tst-cet-legacy-9-static \
6638 tst-cet-legacy-10-static \
6639 tst-cet-legacy-10a-static \
6640+ tst-cet-legacy-9-static \
6641 # tests-static
6642 tst-cet-legacy-1a-ARGS = -- $(host-test-program-cmd)
6643
6644diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
6645index ea81753b70..e1a490dd98 100644
6646--- a/sysdeps/x86_64/fpu/multiarch/Makefile
6647+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
6648@@ -4,10 +4,10 @@ libm-sysdep_routines += \
6649 s_ceilf-c \
6650 s_floor-c \
6651 s_floorf-c \
6652- s_rint-c \
6653- s_rintf-c \
6654 s_nearbyint-c \
6655 s_nearbyintf-c \
6656+ s_rint-c \
6657+ s_rintf-c \
6658 s_roundeven-c \
6659 s_roundevenf-c \
6660 s_trunc-c \
6661@@ -21,10 +21,10 @@ libm-sysdep_routines += \
6662 s_floorf-sse4_1 \
6663 s_nearbyint-sse4_1 \
6664 s_nearbyintf-sse4_1 \
6665- s_roundeven-sse4_1 \
6666- s_roundevenf-sse4_1 \
6667 s_rint-sse4_1 \
6668 s_rintf-sse4_1 \
6669+ s_roundeven-sse4_1 \
6670+ s_roundevenf-sse4_1 \
6671 s_trunc-sse4_1 \
6672 s_truncf-sse4_1 \
6673 # libm-sysdep_routines
6674@@ -84,12 +84,12 @@ CFLAGS-s_cosf-fma.c = -mfma -mavx2
6675 CFLAGS-s_sincosf-fma.c = -mfma -mavx2
6676
6677 libm-sysdep_routines += \
6678+ e_asin-fma4 \
6679+ e_atan2-fma4 \
6680 e_exp-fma4 \
6681 e_log-fma4 \
6682 e_pow-fma4 \
6683- e_asin-fma4 \
6684 s_atan-fma4 \
6685- e_atan2-fma4 \
6686 s_sin-fma4 \
6687 s_sincos-fma4 \
6688 s_tan-fma4 \
6689@@ -106,10 +106,10 @@ CFLAGS-s_tan-fma4.c = -mfma4
6690 CFLAGS-s_sincos-fma4.c = -mfma4
6691
6692 libm-sysdep_routines += \
6693+ e_atan2-avx \
6694 e_exp-avx \
6695 e_log-avx \
6696 s_atan-avx \
6697- e_atan2-avx \
6698 s_sin-avx \
6699 s_sincos-avx \
6700 s_tan-avx \
6701diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
6702index e1e894c963..d3d2270394 100644
6703--- a/sysdeps/x86_64/multiarch/Makefile
6704+++ b/sysdeps/x86_64/multiarch/Makefile
6705@@ -4,8 +4,8 @@ sysdep_routines += \
6706 memchr-avx2 \
6707 memchr-avx2-rtm \
6708 memchr-evex \
6709- memchr-evex512 \
6710 memchr-evex-rtm \
6711+ memchr-evex512 \
6712 memchr-sse2 \
6713 memcmp-avx2-movbe \
6714 memcmp-avx2-movbe-rtm \
6715@@ -37,8 +37,8 @@ sysdep_routines += \
6716 rawmemchr-avx2 \
6717 rawmemchr-avx2-rtm \
6718 rawmemchr-evex \
6719- rawmemchr-evex512 \
6720 rawmemchr-evex-rtm \
6721+ rawmemchr-evex512 \
6722 rawmemchr-sse2 \
6723 stpcpy-avx2 \
6724 stpcpy-avx2-rtm \
6725
6726commit 423099a03264ea28298f47355d7811b8efe03c97
6727Author: Sunil K Pandey <skpgkp2@gmail.com>
6728Date: Tue Feb 13 12:23:14 2024 -0800
6729
6730 x86_64: Exclude SSE, AVX and FMA4 variants in libm multiarch
6731
6732 When glibc is built with ISA level 3 or higher by default, the resulting
6733 glibc binaries won't run on SSE or FMA4 processors. Exclude SSE, AVX and
6734 FMA4 variants in libm multiarch when ISA level 3 or higher is enabled by
6735 default.
6736
6737 When glibc is built with ISA level 2 enabled by default, only keep SSE4.1
6738 variant.
6739
6740 Fixes BZ 31335.
6741
6742 NB: elf/tst-valgrind-smoke test fails with ISA level 4, because valgrind
6743 doesn't support AVX512 instructions:
6744
6745 https://bugs.kde.org/show_bug.cgi?id=383010
6746
6747 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
6748 (cherry picked from commit 9f78a7c1d0963282608da836b840f0d5ae1c478e)
6749
6750diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
6751index 1f4c2d67fd..2a5421bb31 100644
6752--- a/sysdeps/x86/configure
6753+++ b/sysdeps/x86/configure
6754@@ -98,6 +98,7 @@ printf "%s\n" "$libc_cv_have_x86_lahf_sahf" >&6; }
6755 if test $libc_cv_have_x86_lahf_sahf = yes; then
6756 printf "%s\n" "#define HAVE_X86_LAHF_SAHF 1" >>confdefs.h
6757
6758+ ISAFLAG="-DHAVE_X86_LAHF_SAHF"
6759 fi
6760 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MOVBE instruction support" >&5
6761 printf %s "checking for MOVBE instruction support... " >&6; }
6762@@ -120,9 +121,41 @@ printf "%s\n" "$libc_cv_have_x86_movbe" >&6; }
6763 if test $libc_cv_have_x86_movbe = yes; then
6764 printf "%s\n" "#define HAVE_X86_MOVBE 1" >>confdefs.h
6765
6766+ ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE"
6767 fi
6768+
6769+ # Check for ISA level support.
6770+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ISA level support" >&5
6771+printf %s "checking for ISA level support... " >&6; }
6772+if test ${libc_cv_have_x86_isa_level+y}
6773+then :
6774+ printf %s "(cached) " >&6
6775+else $as_nop
6776+ cat > conftest.c <<EOF
6777+#include <sysdeps/x86/isa-level.h>
6778+#if MINIMUM_X86_ISA_LEVEL >= 4
6779+libc_cv_have_x86_isa_level=4
6780+#elif MINIMUM_X86_ISA_LEVEL == 3
6781+libc_cv_have_x86_isa_level=3
6782+#elif MINIMUM_X86_ISA_LEVEL == 2
6783+libc_cv_have_x86_isa_level=2
6784+#else
6785+libc_cv_have_x86_isa_level=baseline
6786+#endif
6787+EOF
6788+ eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level`
6789+ rm -rf conftest*
6790+fi
6791+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_isa_level" >&5
6792+printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; }
6793+else
6794+ libc_cv_have_x86_isa_level=baseline
6795 fi
6796 config_vars="$config_vars
6797+have-x86-isa-level = $libc_cv_have_x86_isa_level"
6798+config_vars="$config_vars
6799+x86-isa-level-3-or-above = 3 4"
6800+config_vars="$config_vars
6801 enable-x86-isa-level = $libc_cv_include_x86_isa_level"
6802
6803 printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h
6804diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
6805index 437a50623b..78ff7c8f41 100644
6806--- a/sysdeps/x86/configure.ac
6807+++ b/sysdeps/x86/configure.ac
6808@@ -72,6 +72,7 @@ if test $libc_cv_include_x86_isa_level = yes; then
6809 fi])
6810 if test $libc_cv_have_x86_lahf_sahf = yes; then
6811 AC_DEFINE(HAVE_X86_LAHF_SAHF)
6812+ ISAFLAG="-DHAVE_X86_LAHF_SAHF"
6813 fi
6814 AC_CACHE_CHECK([for MOVBE instruction support],
6815 libc_cv_have_x86_movbe, [dnl
6816@@ -81,8 +82,31 @@ if test $libc_cv_include_x86_isa_level = yes; then
6817 fi])
6818 if test $libc_cv_have_x86_movbe = yes; then
6819 AC_DEFINE(HAVE_X86_MOVBE)
6820+ ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE"
6821 fi
6822+
6823+ # Check for ISA level support.
6824+ AC_CACHE_CHECK([for ISA level support],
6825+ libc_cv_have_x86_isa_level, [dnl
6826+cat > conftest.c <<EOF
6827+#include <sysdeps/x86/isa-level.h>
6828+#if MINIMUM_X86_ISA_LEVEL >= 4
6829+libc_cv_have_x86_isa_level=4
6830+#elif MINIMUM_X86_ISA_LEVEL == 3
6831+libc_cv_have_x86_isa_level=3
6832+#elif MINIMUM_X86_ISA_LEVEL == 2
6833+libc_cv_have_x86_isa_level=2
6834+#else
6835+libc_cv_have_x86_isa_level=baseline
6836+#endif
6837+EOF
6838+ eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level`
6839+ rm -rf conftest*])
6840+else
6841+ libc_cv_have_x86_isa_level=baseline
6842 fi
6843+LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level])
6844+LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4])
6845 LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
6846
6847 dnl Static PIE is supported.
6848diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
6849index e1a490dd98..6ddd50240c 100644
6850--- a/sysdeps/x86_64/fpu/multiarch/Makefile
6851+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
6852@@ -1,49 +1,4 @@
6853 ifeq ($(subdir),math)
6854-libm-sysdep_routines += \
6855- s_ceil-c \
6856- s_ceilf-c \
6857- s_floor-c \
6858- s_floorf-c \
6859- s_nearbyint-c \
6860- s_nearbyintf-c \
6861- s_rint-c \
6862- s_rintf-c \
6863- s_roundeven-c \
6864- s_roundevenf-c \
6865- s_trunc-c \
6866- s_truncf-c \
6867-# libm-sysdep_routines
6868-
6869-libm-sysdep_routines += \
6870- s_ceil-sse4_1 \
6871- s_ceilf-sse4_1 \
6872- s_floor-sse4_1 \
6873- s_floorf-sse4_1 \
6874- s_nearbyint-sse4_1 \
6875- s_nearbyintf-sse4_1 \
6876- s_rint-sse4_1 \
6877- s_rintf-sse4_1 \
6878- s_roundeven-sse4_1 \
6879- s_roundevenf-sse4_1 \
6880- s_trunc-sse4_1 \
6881- s_truncf-sse4_1 \
6882-# libm-sysdep_routines
6883-
6884-libm-sysdep_routines += \
6885- e_asin-fma \
6886- e_atan2-fma \
6887- e_exp-fma \
6888- e_log-fma \
6889- e_log2-fma \
6890- e_pow-fma \
6891- s_atan-fma \
6892- s_expm1-fma \
6893- s_log1p-fma \
6894- s_sin-fma \
6895- s_sincos-fma \
6896- s_tan-fma \
6897-# libm-sysdep_routines
6898-
6899 CFLAGS-e_asin-fma.c = -mfma -mavx2
6900 CFLAGS-e_atan2-fma.c = -mfma -mavx2
6901 CFLAGS-e_exp-fma.c = -mfma -mavx2
6902@@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2
6903 CFLAGS-s_tan-fma.c = -mfma -mavx2
6904 CFLAGS-s_sincos-fma.c = -mfma -mavx2
6905
6906-libm-sysdep_routines += \
6907- s_cosf-sse2 \
6908- s_sincosf-sse2 \
6909- s_sinf-sse2 \
6910-# libm-sysdep_routines
6911-
6912-libm-sysdep_routines += \
6913- e_exp2f-fma \
6914- e_expf-fma \
6915- e_log2f-fma \
6916- e_logf-fma \
6917- e_powf-fma \
6918- s_cosf-fma \
6919- s_sincosf-fma \
6920- s_sinf-fma \
6921-# libm-sysdep_routines
6922-
6923 CFLAGS-e_exp2f-fma.c = -mfma -mavx2
6924 CFLAGS-e_expf-fma.c = -mfma -mavx2
6925 CFLAGS-e_log2f-fma.c = -mfma -mavx2
6926@@ -83,17 +21,93 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2
6927 CFLAGS-s_cosf-fma.c = -mfma -mavx2
6928 CFLAGS-s_sincosf-fma.c = -mfma -mavx2
6929
6930+# Check if ISA level is 3 or above.
6931+ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above)))
6932 libm-sysdep_routines += \
6933+ s_ceil-avx \
6934+ s_ceilf-avx \
6935+ s_floor-avx \
6936+ s_floorf-avx \
6937+ s_nearbyint-avx \
6938+ s_nearbyintf-avx \
6939+ s_rint-avx \
6940+ s_rintf-avx \
6941+ s_roundeven-avx \
6942+ s_roundevenf-avx \
6943+ s_trunc-avx \
6944+ s_truncf-avx \
6945+# libm-sysdep_routines
6946+else
6947+libm-sysdep_routines += \
6948+ e_asin-fma \
6949 e_asin-fma4 \
6950+ e_atan2-avx \
6951+ e_atan2-fma \
6952 e_atan2-fma4 \
6953+ e_exp-avx \
6954+ e_exp-fma \
6955 e_exp-fma4 \
6956+ e_exp2f-fma \
6957+ e_expf-fma \
6958+ e_log-avx \
6959+ e_log-fma \
6960 e_log-fma4 \
6961+ e_log2-fma \
6962+ e_log2f-fma \
6963+ e_logf-fma \
6964+ e_pow-fma \
6965 e_pow-fma4 \
6966+ e_powf-fma \
6967+ s_atan-avx \
6968+ s_atan-fma \
6969 s_atan-fma4 \
6970+ s_ceil-sse4_1 \
6971+ s_ceilf-sse4_1 \
6972+ s_cosf-fma \
6973+ s_cosf-sse2 \
6974+ s_expm1-fma \
6975+ s_floor-sse4_1 \
6976+ s_floorf-sse4_1 \
6977+ s_log1p-fma \
6978+ s_nearbyint-sse4_1 \
6979+ s_nearbyintf-sse4_1 \
6980+ s_rint-sse4_1 \
6981+ s_rintf-sse4_1 \
6982+ s_roundeven-sse4_1 \
6983+ s_roundevenf-sse4_1 \
6984+ s_sin-avx \
6985+ s_sin-fma \
6986 s_sin-fma4 \
6987+ s_sincos-avx \
6988+ s_sincos-fma \
6989 s_sincos-fma4 \
6990+ s_sincosf-fma \
6991+ s_sincosf-sse2 \
6992+ s_sinf-fma \
6993+ s_sinf-sse2 \
6994+ s_tan-avx \
6995+ s_tan-fma \
6996 s_tan-fma4 \
6997+ s_trunc-sse4_1 \
6998+ s_truncf-sse4_1 \
6999 # libm-sysdep_routines
7000+ifeq ($(have-x86-isa-level),baseline)
7001+libm-sysdep_routines += \
7002+ s_ceil-c \
7003+ s_ceilf-c \
7004+ s_floor-c \
7005+ s_floorf-c \
7006+ s_nearbyint-c \
7007+ s_nearbyintf-c \
7008+ s_rint-c \
7009+ s_rintf-c \
7010+ s_roundeven-c \
7011+ s_roundevenf-c \
7012+ s_trunc-c \
7013+ s_truncf-c \
7014+# libm-sysdep_routines
7015+endif
7016+endif
7017
7018 CFLAGS-e_asin-fma4.c = -mfma4
7019 CFLAGS-e_atan2-fma4.c = -mfma4
7020@@ -105,16 +119,6 @@ CFLAGS-s_sin-fma4.c = -mfma4
7021 CFLAGS-s_tan-fma4.c = -mfma4
7022 CFLAGS-s_sincos-fma4.c = -mfma4
7023
7024-libm-sysdep_routines += \
7025- e_atan2-avx \
7026- e_exp-avx \
7027- e_log-avx \
7028- s_atan-avx \
7029- s_sin-avx \
7030- s_sincos-avx \
7031- s_tan-avx \
7032-# libm-sysdep_routines
7033-
7034 CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
7035 CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
7036 CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
7037diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
7038index 2eaa6c2c04..d64fca2586 100644
7039--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
7040+++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
7041@@ -16,26 +16,29 @@
7042 License along with the GNU C Library; if not, see
7043 <https://www.gnu.org/licenses/>. */
7044
7045-#include <libm-alias-finite.h>
7046+#include <sysdeps/x86/isa-level.h>
7047+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7048+# include <libm-alias-finite.h>
7049
7050 extern double __redirect_ieee754_asin (double);
7051 extern double __redirect_ieee754_acos (double);
7052
7053-#define SYMBOL_NAME ieee754_asin
7054-#include "ifunc-fma4.h"
7055+# define SYMBOL_NAME ieee754_asin
7056+# include "ifunc-fma4.h"
7057
7058 libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
7059 IFUNC_SELECTOR ());
7060 libm_alias_finite (__ieee754_asin, __asin)
7061
7062-#undef SYMBOL_NAME
7063-#define SYMBOL_NAME ieee754_acos
7064-#include "ifunc-fma4.h"
7065+# undef SYMBOL_NAME
7066+# define SYMBOL_NAME ieee754_acos
7067+# include "ifunc-fma4.h"
7068
7069 libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
7070 IFUNC_SELECTOR ());
7071 libm_alias_finite (__ieee754_acos, __acos)
7072
7073-#define __ieee754_acos __ieee754_acos_sse2
7074-#define __ieee754_asin __ieee754_asin_sse2
7075+# define __ieee754_acos __ieee754_acos_sse2
7076+# define __ieee754_asin __ieee754_asin_sse2
7077+#endif
7078 #include <sysdeps/ieee754/dbl-64/e_asin.c>
7079diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
7080index 17ee4f3c36..8a86c14ded 100644
7081--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
7082+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
7083@@ -16,16 +16,19 @@
7084 License along with the GNU C Library; if not, see
7085 <https://www.gnu.org/licenses/>. */
7086
7087-#include <libm-alias-finite.h>
7088+#include <sysdeps/x86/isa-level.h>
7089+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7090+# include <libm-alias-finite.h>
7091
7092 extern double __redirect_ieee754_atan2 (double, double);
7093
7094-#define SYMBOL_NAME ieee754_atan2
7095-#include "ifunc-avx-fma4.h"
7096+# define SYMBOL_NAME ieee754_atan2
7097+# include "ifunc-avx-fma4.h"
7098
7099 libc_ifunc_redirected (__redirect_ieee754_atan2,
7100 __ieee754_atan2, IFUNC_SELECTOR ());
7101 libm_alias_finite (__ieee754_atan2, __atan2)
7102
7103-#define __ieee754_atan2 __ieee754_atan2_sse2
7104+# define __ieee754_atan2 __ieee754_atan2_sse2
7105+#endif
7106 #include <sysdeps/ieee754/dbl-64/e_atan2.c>
7107diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
7108index 406b7ebd44..d56329291a 100644
7109--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
7110+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
7111@@ -16,17 +16,20 @@
7112 License along with the GNU C Library; if not, see
7113 <https://www.gnu.org/licenses/>. */
7114
7115-#include <math.h>
7116-#include <libm-alias-finite.h>
7117+#include <sysdeps/x86/isa-level.h>
7118+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7119+# include <math.h>
7120+# include <libm-alias-finite.h>
7121
7122 extern double __redirect_ieee754_exp (double);
7123
7124-#define SYMBOL_NAME ieee754_exp
7125-#include "ifunc-avx-fma4.h"
7126+# define SYMBOL_NAME ieee754_exp
7127+# include "ifunc-avx-fma4.h"
7128
7129 libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
7130 IFUNC_SELECTOR ());
7131 libm_alias_finite (__ieee754_exp, __exp)
7132
7133-#define __exp __ieee754_exp_sse2
7134+# define __exp __ieee754_exp_sse2
7135+#endif
7136 #include <sysdeps/ieee754/dbl-64/e_exp.c>
7137diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
7138index 804fd6be85..06fe5028d6 100644
7139--- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
7140+++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
7141@@ -16,25 +16,28 @@
7142 License along with the GNU C Library; if not, see
7143 <https://www.gnu.org/licenses/>. */
7144
7145-#include <libm-alias-float.h>
7146-#include <libm-alias-finite.h>
7147+#include <sysdeps/x86/isa-level.h>
7148+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7149+# include <libm-alias-float.h>
7150+# include <libm-alias-finite.h>
7151
7152 extern float __redirect_exp2f (float);
7153
7154-#define SYMBOL_NAME exp2f
7155-#include "ifunc-fma.h"
7156+# define SYMBOL_NAME exp2f
7157+# include "ifunc-fma.h"
7158
7159 libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
7160
7161-#ifdef SHARED
7162+# ifdef SHARED
7163 versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27);
7164 libm_alias_float_other (__exp2, exp2)
7165-#else
7166+# else
7167 libm_alias_float (__exp2, exp2)
7168-#endif
7169+# endif
7170
7171 strong_alias (__exp2f, __ieee754_exp2f)
7172 libm_alias_finite (__exp2f, __exp2f)
7173
7174-#define __exp2f __exp2f_sse2
7175+# define __exp2f __exp2f_sse2
7176+#endif
7177 #include <sysdeps/ieee754/flt-32/e_exp2f.c>
7178diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c
7179index 4a7e2a5bce..19d767f636 100644
7180--- a/sysdeps/x86_64/fpu/multiarch/e_expf.c
7181+++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
7182@@ -16,28 +16,31 @@
7183 License along with the GNU C Library; if not, see
7184 <https://www.gnu.org/licenses/>. */
7185
7186-#include <libm-alias-float.h>
7187-#include <libm-alias-finite.h>
7188+#include <sysdeps/x86/isa-level.h>
7189+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7190+# include <libm-alias-float.h>
7191+# include <libm-alias-finite.h>
7192
7193 extern float __redirect_expf (float);
7194
7195-#define SYMBOL_NAME expf
7196-#include "ifunc-fma.h"
7197+# define SYMBOL_NAME expf
7198+# include "ifunc-fma.h"
7199
7200 libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
7201
7202-#ifdef SHARED
7203+# ifdef SHARED
7204 __hidden_ver1 (__expf, __GI___expf, __redirect_expf)
7205 __attribute__ ((visibility ("hidden")));
7206
7207 versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27);
7208 libm_alias_float_other (__exp, exp)
7209-#else
7210+# else
7211 libm_alias_float (__exp, exp)
7212-#endif
7213+# endif
7214
7215 strong_alias (__expf, __ieee754_expf)
7216 libm_alias_finite (__expf, __expf)
7217
7218-#define __expf __expf_sse2
7219+# define __expf __expf_sse2
7220+#endif
7221 #include <sysdeps/ieee754/flt-32/e_expf.c>
7222diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
7223index 067fbf58c3..d80c1b1463 100644
7224--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
7225+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
7226@@ -16,17 +16,20 @@
7227 License along with the GNU C Library; if not, see
7228 <https://www.gnu.org/licenses/>. */
7229
7230-#include <math.h>
7231-#include <libm-alias-finite.h>
7232+#include <sysdeps/x86/isa-level.h>
7233+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7234+# include <math.h>
7235+# include <libm-alias-finite.h>
7236
7237 extern double __redirect_ieee754_log (double);
7238
7239-#define SYMBOL_NAME ieee754_log
7240-#include "ifunc-avx-fma4.h"
7241+# define SYMBOL_NAME ieee754_log
7242+# include "ifunc-avx-fma4.h"
7243
7244 libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
7245 IFUNC_SELECTOR ());
7246 libm_alias_finite (__ieee754_log, __log)
7247
7248-#define __log __ieee754_log_sse2
7249+# define __log __ieee754_log_sse2
7250+#endif
7251 #include <sysdeps/ieee754/dbl-64/e_log.c>
7252diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c
7253index 9c57a2f6cc..9686782c09 100644
7254--- a/sysdeps/x86_64/fpu/multiarch/e_log2.c
7255+++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c
7256@@ -16,28 +16,31 @@
7257 License along with the GNU C Library; if not, see
7258 <https://www.gnu.org/licenses/>. */
7259
7260-#include <libm-alias-double.h>
7261-#include <libm-alias-finite.h>
7262+#include <sysdeps/x86/isa-level.h>
7263+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7264+# include <libm-alias-double.h>
7265+# include <libm-alias-finite.h>
7266
7267 extern double __redirect_log2 (double);
7268
7269-#define SYMBOL_NAME log2
7270-#include "ifunc-fma.h"
7271+# define SYMBOL_NAME log2
7272+# include "ifunc-fma.h"
7273
7274 libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ());
7275
7276-#ifdef SHARED
7277+# ifdef SHARED
7278 __hidden_ver1 (__log2, __GI___log2, __redirect_log2)
7279 __attribute__ ((visibility ("hidden")));
7280
7281 versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29);
7282 libm_alias_double_other (__log2, log2)
7283-#else
7284+# else
7285 libm_alias_double (__log2, log2)
7286-#endif
7287+# endif
7288
7289 strong_alias (__log2, __ieee754_log2)
7290 libm_alias_finite (__log2, __log2)
7291
7292-#define __log2 __log2_sse2
7293+# define __log2 __log2_sse2
7294+#endif
7295 #include <sysdeps/ieee754/dbl-64/e_log2.c>
7296diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
7297index 2b45c87f38..8ada46e11e 100644
7298--- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c
7299+++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
7300@@ -16,28 +16,31 @@
7301 License along with the GNU C Library; if not, see
7302 <https://www.gnu.org/licenses/>. */
7303
7304-#include <libm-alias-float.h>
7305-#include <libm-alias-finite.h>
7306+#include <sysdeps/x86/isa-level.h>
7307+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7308+# include <libm-alias-float.h>
7309+# include <libm-alias-finite.h>
7310
7311 extern float __redirect_log2f (float);
7312
7313-#define SYMBOL_NAME log2f
7314-#include "ifunc-fma.h"
7315+# define SYMBOL_NAME log2f
7316+# include "ifunc-fma.h"
7317
7318 libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
7319
7320-#ifdef SHARED
7321+# ifdef SHARED
7322 __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
7323 __attribute__ ((visibility ("hidden")));
7324
7325 versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27);
7326 libm_alias_float_other (__log2, log2)
7327-#else
7328+# else
7329 libm_alias_float (__log2, log2)
7330-#endif
7331+# endif
7332
7333 strong_alias (__log2f, __ieee754_log2f)
7334 libm_alias_finite (__log2f, __log2f)
7335
7336-#define __log2f __log2f_sse2
7337+# define __log2f __log2f_sse2
7338+#endif
7339 #include <sysdeps/ieee754/flt-32/e_log2f.c>
7340diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c
7341index 97e23c8fea..a3978d9a8e 100644
7342--- a/sysdeps/x86_64/fpu/multiarch/e_logf.c
7343+++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
7344@@ -16,28 +16,31 @@
7345 License along with the GNU C Library; if not, see
7346 <https://www.gnu.org/licenses/>. */
7347
7348-#include <libm-alias-float.h>
7349-#include <libm-alias-finite.h>
7350+#include <sysdeps/x86/isa-level.h>
7351+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7352+# include <libm-alias-float.h>
7353+# include <libm-alias-finite.h>
7354
7355 extern float __redirect_logf (float);
7356
7357-#define SYMBOL_NAME logf
7358-#include "ifunc-fma.h"
7359+# define SYMBOL_NAME logf
7360+# include "ifunc-fma.h"
7361
7362 libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
7363
7364-#ifdef SHARED
7365+# ifdef SHARED
7366 __hidden_ver1 (__logf, __GI___logf, __redirect_logf)
7367 __attribute__ ((visibility ("hidden")));
7368
7369 versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27);
7370 libm_alias_float_other (__log, log)
7371-#else
7372+# else
7373 libm_alias_float (__log, log)
7374-#endif
7375+# endif
7376
7377 strong_alias (__logf, __ieee754_logf)
7378 libm_alias_finite (__logf, __logf)
7379
7380-#define __logf __logf_sse2
7381+# define __logf __logf_sse2
7382+#endif
7383 #include <sysdeps/ieee754/flt-32/e_logf.c>
7384diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
7385index 42618e7112..f8f17aff9f 100644
7386--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
7387+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
7388@@ -16,17 +16,20 @@
7389 License along with the GNU C Library; if not, see
7390 <https://www.gnu.org/licenses/>. */
7391
7392-#include <math.h>
7393-#include <libm-alias-finite.h>
7394+#include <sysdeps/x86/isa-level.h>
7395+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7396+# include <math.h>
7397+# include <libm-alias-finite.h>
7398
7399 extern double __redirect_ieee754_pow (double, double);
7400
7401-#define SYMBOL_NAME ieee754_pow
7402-#include "ifunc-fma4.h"
7403+# define SYMBOL_NAME ieee754_pow
7404+# include "ifunc-fma4.h"
7405
7406 libc_ifunc_redirected (__redirect_ieee754_pow,
7407 __ieee754_pow, IFUNC_SELECTOR ());
7408 libm_alias_finite (__ieee754_pow, __pow)
7409
7410-#define __pow __ieee754_pow_sse2
7411+# define __pow __ieee754_pow_sse2
7412+#endif
7413 #include <sysdeps/ieee754/dbl-64/e_pow.c>
7414diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c
7415index 8e6ce13cc1..8b1a4c7d04 100644
7416--- a/sysdeps/x86_64/fpu/multiarch/e_powf.c
7417+++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
7418@@ -16,31 +16,34 @@
7419 License along with the GNU C Library; if not, see
7420 <https://www.gnu.org/licenses/>. */
7421
7422-#include <libm-alias-float.h>
7423-#include <libm-alias-finite.h>
7424+#include <sysdeps/x86/isa-level.h>
7425+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7426+# include <libm-alias-float.h>
7427+# include <libm-alias-finite.h>
7428
7429-#define powf __redirect_powf
7430-#define __DECL_SIMD___redirect_powf
7431-#include <math.h>
7432-#undef powf
7433+# define powf __redirect_powf
7434+# define __DECL_SIMD___redirect_powf
7435+# include <math.h>
7436+# undef powf
7437
7438-#define SYMBOL_NAME powf
7439-#include "ifunc-fma.h"
7440+# define SYMBOL_NAME powf
7441+# include "ifunc-fma.h"
7442
7443 libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
7444
7445-#ifdef SHARED
7446+# ifdef SHARED
7447 __hidden_ver1 (__powf, __GI___powf, __redirect_powf)
7448 __attribute__ ((visibility ("hidden")));
7449
7450 versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27);
7451 libm_alias_float_other (__pow, pow)
7452-#else
7453+# else
7454 libm_alias_float (__pow, pow)
7455-#endif
7456+# endif
7457
7458 strong_alias (__powf, __ieee754_powf)
7459 libm_alias_finite (__powf, __powf)
7460
7461-#define __powf __powf_sse2
7462+# define __powf __powf_sse2
7463+#endif
7464 #include <sysdeps/ieee754/flt-32/e_powf.c>
7465diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
7466index 71bad096a9..4d2c6ce006 100644
7467--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
7468+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
7469@@ -16,15 +16,18 @@
7470 License along with the GNU C Library; if not, see
7471 <https://www.gnu.org/licenses/>. */
7472
7473-#include <libm-alias-double.h>
7474+#include <sysdeps/x86/isa-level.h>
7475+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7476+# include <libm-alias-double.h>
7477
7478 extern double __redirect_atan (double);
7479
7480-#define SYMBOL_NAME atan
7481-#include "ifunc-avx-fma4.h"
7482+# define SYMBOL_NAME atan
7483+# include "ifunc-avx-fma4.h"
7484
7485 libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
7486 libm_alias_double (__atan, atan)
7487
7488-#define __atan __atan_sse2
7489+# define __atan __atan_sse2
7490+#endif
7491 #include <sysdeps/ieee754/dbl-64/s_atan.c>
7492diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
7493new file mode 100644
7494index 0000000000..e6c1106753
7495--- /dev/null
7496+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
7497@@ -0,0 +1,28 @@
7498+/* AVX implementation of ceil function.
7499+ Copyright (C) 2024 Free Software Foundation, Inc.
7500+ This file is part of the GNU C Library.
7501+
7502+ The GNU C Library is free software; you can redistribute it and/or
7503+ modify it under the terms of the GNU Lesser General Public
7504+ License as published by the Free Software Foundation; either
7505+ version 2.1 of the License, or (at your option) any later version.
7506+
7507+ The GNU C Library is distributed in the hope that it will be useful,
7508+ but WITHOUT ANY WARRANTY; without even the implied warranty of
7509+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7510+ Lesser General Public License for more details.
7511+
7512+ You should have received a copy of the GNU Lesser General Public
7513+ License along with the GNU C Library; if not, see
7514+ <https://www.gnu.org/licenses/>. */
7515+
7516+#include <sysdep.h>
7517+#include <libm-alias-double.h>
7518+
7519+ .text
7520+ENTRY(__ceil)
7521+ vroundsd $10, %xmm0, %xmm0, %xmm0
7522+ ret
7523+END(__ceil)
7524+
7525+libm_alias_double (__ceil, ceil)
7526diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
7527index 64119011ad..dba756c38f 100644
7528--- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
7529+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
7530@@ -17,8 +17,20 @@
7531
7532 #include <sysdep.h>
7533
7534+#include <sysdeps/x86/isa-level.h>
7535+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7536+# include <libm-alias-double.h>
7537+# define __ceil_sse41 __ceil
7538+ .text
7539+#else
7540 .section .text.sse4.1,"ax",@progbits
7541+#endif
7542+
7543 ENTRY(__ceil_sse41)
7544 roundsd $10, %xmm0, %xmm0
7545 ret
7546 END(__ceil_sse41)
7547+
7548+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7549+libm_alias_double (__ceil, ceil)
7550+#endif
7551diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
7552index cc028addee..46c8e91e19 100644
7553--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c
7554+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
7555@@ -16,17 +16,20 @@
7556 License along with the GNU C Library; if not, see
7557 <https://www.gnu.org/licenses/>. */
7558
7559-#define NO_MATH_REDIRECT
7560-#include <libm-alias-double.h>
7561+#include <sysdeps/x86/isa-level.h>
7562+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
7563+# define NO_MATH_REDIRECT
7564+# include <libm-alias-double.h>
7565
7566-#define ceil __redirect_ceil
7567-#define __ceil __redirect___ceil
7568-#include <math.h>
7569-#undef ceil
7570-#undef __ceil
7571+# define ceil __redirect_ceil
7572+# define __ceil __redirect___ceil
7573+# include <math.h>
7574+# undef ceil
7575+# undef __ceil
7576
7577-#define SYMBOL_NAME ceil
7578-#include "ifunc-sse4_1.h"
7579+# define SYMBOL_NAME ceil
7580+# include "ifunc-sse4_1.h"
7581
7582 libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
7583 libm_alias_double (__ceil, ceil)
7584+#endif
7585diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
7586new file mode 100644
7587index 0000000000..b4d8ac0455
7588--- /dev/null
7589+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
7590@@ -0,0 +1,28 @@
7591+/* AVX implementation of ceilf function.
7592+ Copyright (C) 2024 Free Software Foundation, Inc.
7593+ This file is part of the GNU C Library.
7594+
7595+ The GNU C Library is free software; you can redistribute it and/or
7596+ modify it under the terms of the GNU Lesser General Public
7597+ License as published by the Free Software Foundation; either
7598+ version 2.1 of the License, or (at your option) any later version.
7599+
7600+ The GNU C Library is distributed in the hope that it will be useful,
7601+ but WITHOUT ANY WARRANTY; without even the implied warranty of
7602+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7603+ Lesser General Public License for more details.
7604+
7605+ You should have received a copy of the GNU Lesser General Public
7606+ License along with the GNU C Library; if not, see
7607+ <https://www.gnu.org/licenses/>. */
7608+
7609+#include <sysdep.h>
7610+#include <libm-alias-float.h>
7611+
7612+ .text
7613+ENTRY(__ceilf)
7614+ vroundss $10, %xmm0, %xmm0, %xmm0
7615+ ret
7616+END(__ceilf)
7617+
7618+libm_alias_float (__ceil, ceil)
7619diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
7620index dd9a9f6b71..9abc87b91a 100644
7621--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
7622+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
7623@@ -17,8 +17,20 @@
7624
7625 #include <sysdep.h>
7626
7627+#include <sysdeps/x86/isa-level.h>
7628+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7629+# include <libm-alias-float.h>
7630+# define __ceilf_sse41 __ceilf
7631+ .text
7632+#else
7633 .section .text.sse4.1,"ax",@progbits
7634+#endif
7635+
7636 ENTRY(__ceilf_sse41)
7637 roundss $10, %xmm0, %xmm0
7638 ret
7639 END(__ceilf_sse41)
7640+
7641+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7642+libm_alias_float (__ceil, ceil)
7643+#endif
7644diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
7645index 97a0ca7d19..bb53108f73 100644
7646--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
7647+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
7648@@ -16,17 +16,20 @@
7649 License along with the GNU C Library; if not, see
7650 <https://www.gnu.org/licenses/>. */
7651
7652-#define NO_MATH_REDIRECT
7653-#include <libm-alias-float.h>
7654+#include <sysdeps/x86/isa-level.h>
7655+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
7656+# define NO_MATH_REDIRECT
7657+# include <libm-alias-float.h>
7658
7659-#define ceilf __redirect_ceilf
7660-#define __ceilf __redirect___ceilf
7661-#include <math.h>
7662-#undef ceilf
7663-#undef __ceilf
7664+# define ceilf __redirect_ceilf
7665+# define __ceilf __redirect___ceilf
7666+# include <math.h>
7667+# undef ceilf
7668+# undef __ceilf
7669
7670-#define SYMBOL_NAME ceilf
7671-#include "ifunc-sse4_1.h"
7672+# define SYMBOL_NAME ceilf
7673+# include "ifunc-sse4_1.h"
7674
7675 libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
7676 libm_alias_float (__ceil, ceil)
7677+#endif
7678diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
7679index 2703c576df..8a02e04538 100644
7680--- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c
7681+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
7682@@ -16,13 +16,18 @@
7683 License along with the GNU C Library; if not, see
7684 <https://www.gnu.org/licenses/>. */
7685
7686-#include <libm-alias-float.h>
7687+#include <sysdeps/x86/isa-level.h>
7688+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7689+# include <libm-alias-float.h>
7690
7691 extern float __redirect_cosf (float);
7692
7693-#define SYMBOL_NAME cosf
7694-#include "ifunc-fma.h"
7695+# define SYMBOL_NAME cosf
7696+# include "ifunc-fma.h"
7697
7698 libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
7699
7700 libm_alias_float (__cos, cos)
7701+#else
7702+# include <sysdeps/ieee754/flt-32/s_cosf.c>
7703+#endif
7704diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
7705index 8a2d69f9b2..d58ef3d8f5 100644
7706--- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c
7707+++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
7708@@ -16,21 +16,24 @@
7709 License along with the GNU C Library; if not, see
7710 <https://www.gnu.org/licenses/>. */
7711
7712-#include <libm-alias-double.h>
7713+#include <sysdeps/x86/isa-level.h>
7714+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7715+# include <libm-alias-double.h>
7716
7717 extern double __redirect_expm1 (double);
7718
7719-#define SYMBOL_NAME expm1
7720-#include "ifunc-fma.h"
7721+# define SYMBOL_NAME expm1
7722+# include "ifunc-fma.h"
7723
7724 libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ());
7725 libm_alias_double (__expm1, expm1)
7726
7727-#define __expm1 __expm1_sse2
7728+# define __expm1 __expm1_sse2
7729
7730 /* NB: __expm1 may be expanded to __expm1_sse2 in the following
7731 prototypes. */
7732 extern long double __expm1l (long double);
7733 extern long double __expm1f128 (long double);
7734
7735+#endif
7736 #include <sysdeps/ieee754/dbl-64/s_expm1.c>
7737diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
7738new file mode 100644
7739index 0000000000..ff74b5a8bf
7740--- /dev/null
7741+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
7742@@ -0,0 +1,28 @@
7743+/* AVX implementation of floor function.
7744+ Copyright (C) 2024 Free Software Foundation, Inc.
7745+ This file is part of the GNU C Library.
7746+
7747+ The GNU C Library is free software; you can redistribute it and/or
7748+ modify it under the terms of the GNU Lesser General Public
7749+ License as published by the Free Software Foundation; either
7750+ version 2.1 of the License, or (at your option) any later version.
7751+
7752+ The GNU C Library is distributed in the hope that it will be useful,
7753+ but WITHOUT ANY WARRANTY; without even the implied warranty of
7754+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7755+ Lesser General Public License for more details.
7756+
7757+ You should have received a copy of the GNU Lesser General Public
7758+ License along with the GNU C Library; if not, see
7759+ <https://www.gnu.org/licenses/>. */
7760+
7761+#include <sysdep.h>
7762+#include <libm-alias-double.h>
7763+
7764+ .text
7765+ENTRY(__floor)
7766+ vroundsd $9, %xmm0, %xmm0, %xmm0
7767+ ret
7768+END(__floor)
7769+
7770+libm_alias_double (__floor, floor)
7771diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
7772index 2f7521f39f..c9b9b0639b 100644
7773--- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
7774+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
7775@@ -17,8 +17,20 @@
7776
7777 #include <sysdep.h>
7778
7779+#include <sysdeps/x86/isa-level.h>
7780+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7781+# include <libm-alias-double.h>
7782+# define __floor_sse41 __floor
7783+ .text
7784+#else
7785 .section .text.sse4.1,"ax",@progbits
7786+#endif
7787+
7788 ENTRY(__floor_sse41)
7789 roundsd $9, %xmm0, %xmm0
7790 ret
7791 END(__floor_sse41)
7792+
7793+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7794+libm_alias_double (__floor, floor)
7795+#endif
7796diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c
7797index 8cebd48e10..2c87dd0056 100644
7798--- a/sysdeps/x86_64/fpu/multiarch/s_floor.c
7799+++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
7800@@ -16,17 +16,20 @@
7801 License along with the GNU C Library; if not, see
7802 <https://www.gnu.org/licenses/>. */
7803
7804-#define NO_MATH_REDIRECT
7805-#include <libm-alias-double.h>
7806+#include <sysdeps/x86/isa-level.h>
7807+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
7808+# define NO_MATH_REDIRECT
7809+# include <libm-alias-double.h>
7810
7811-#define floor __redirect_floor
7812-#define __floor __redirect___floor
7813-#include <math.h>
7814-#undef floor
7815-#undef __floor
7816+# define floor __redirect_floor
7817+# define __floor __redirect___floor
7818+# include <math.h>
7819+# undef floor
7820+# undef __floor
7821
7822-#define SYMBOL_NAME floor
7823-#include "ifunc-sse4_1.h"
7824+# define SYMBOL_NAME floor
7825+# include "ifunc-sse4_1.h"
7826
7827 libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
7828 libm_alias_double (__floor, floor)
7829+#endif
7830diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
7831new file mode 100644
7832index 0000000000..c378baae8e
7833--- /dev/null
7834+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
7835@@ -0,0 +1,28 @@
7836+/* AVX implementation of floorf function.
7837+ Copyright (C) 2024 Free Software Foundation, Inc.
7838+ This file is part of the GNU C Library.
7839+
7840+ The GNU C Library is free software; you can redistribute it and/or
7841+ modify it under the terms of the GNU Lesser General Public
7842+ License as published by the Free Software Foundation; either
7843+ version 2.1 of the License, or (at your option) any later version.
7844+
7845+ The GNU C Library is distributed in the hope that it will be useful,
7846+ but WITHOUT ANY WARRANTY; without even the implied warranty of
7847+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7848+ Lesser General Public License for more details.
7849+
7850+ You should have received a copy of the GNU Lesser General Public
7851+ License along with the GNU C Library; if not, see
7852+ <https://www.gnu.org/licenses/>. */
7853+
7854+#include <sysdep.h>
7855+#include <libm-alias-float.h>
7856+
7857+ .text
7858+ENTRY(__floorf)
7859+ vroundss $9, %xmm0, %xmm0, %xmm0
7860+ ret
7861+END(__floorf)
7862+
7863+libm_alias_float (__floor, floor)
7864diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
7865index 5f6020d27d..c2216899db 100644
7866--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
7867+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
7868@@ -17,8 +17,20 @@
7869
7870 #include <sysdep.h>
7871
7872+#include <sysdeps/x86/isa-level.h>
7873+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7874+# include <libm-alias-float.h>
7875+# define __floorf_sse41 __floorf
7876+ .text
7877+#else
7878 .section .text.sse4.1,"ax",@progbits
7879+#endif
7880+
7881 ENTRY(__floorf_sse41)
7882 roundss $9, %xmm0, %xmm0
7883 ret
7884 END(__floorf_sse41)
7885+
7886+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7887+libm_alias_float (__floor, floor)
7888+#endif
7889diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
7890index a14e18b03c..a277802b6d 100644
7891--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c
7892+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
7893@@ -16,17 +16,20 @@
7894 License along with the GNU C Library; if not, see
7895 <https://www.gnu.org/licenses/>. */
7896
7897-#define NO_MATH_REDIRECT
7898-#include <libm-alias-float.h>
7899+#include <sysdeps/x86/isa-level.h>
7900+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
7901+# define NO_MATH_REDIRECT
7902+# include <libm-alias-float.h>
7903
7904-#define floorf __redirect_floorf
7905-#define __floorf __redirect___floorf
7906-#include <math.h>
7907-#undef floorf
7908-#undef __floorf
7909+# define floorf __redirect_floorf
7910+# define __floorf __redirect___floorf
7911+# include <math.h>
7912+# undef floorf
7913+# undef __floorf
7914
7915-#define SYMBOL_NAME floorf
7916-#include "ifunc-sse4_1.h"
7917+# define SYMBOL_NAME floorf
7918+# include "ifunc-sse4_1.h"
7919
7920 libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
7921 libm_alias_float (__floor, floor)
7922+#endif
7923diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
7924index a8e1a3f21b..3fa1185d81 100644
7925--- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c
7926+++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
7927@@ -16,14 +16,17 @@
7928 License along with the GNU C Library; if not, see
7929 <https://www.gnu.org/licenses/>. */
7930
7931-#include <libm-alias-double.h>
7932+#include <sysdeps/x86/isa-level.h>
7933+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
7934+# include <libm-alias-double.h>
7935
7936 extern double __redirect_log1p (double);
7937
7938-#define SYMBOL_NAME log1p
7939-#include "ifunc-fma.h"
7940+# define SYMBOL_NAME log1p
7941+# include "ifunc-fma.h"
7942
7943 libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ());
7944
7945-#define __log1p __log1p_sse2
7946+# define __log1p __log1p_sse2
7947+#endif
7948 #include <sysdeps/ieee754/dbl-64/s_log1p.c>
7949diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
7950new file mode 100644
7951index 0000000000..5bfdf73c28
7952--- /dev/null
7953+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
7954@@ -0,0 +1,28 @@
7955+/* AVX implementation of nearbyint function.
7956+ Copyright (C) 2024 Free Software Foundation, Inc.
7957+ This file is part of the GNU C Library.
7958+
7959+ The GNU C Library is free software; you can redistribute it and/or
7960+ modify it under the terms of the GNU Lesser General Public
7961+ License as published by the Free Software Foundation; either
7962+ version 2.1 of the License, or (at your option) any later version.
7963+
7964+ The GNU C Library is distributed in the hope that it will be useful,
7965+ but WITHOUT ANY WARRANTY; without even the implied warranty of
7966+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7967+ Lesser General Public License for more details.
7968+
7969+ You should have received a copy of the GNU Lesser General Public
7970+ License along with the GNU C Library; if not, see
7971+ <https://www.gnu.org/licenses/>. */
7972+
7973+#include <sysdep.h>
7974+#include <libm-alias-double.h>
7975+
7976+ .text
7977+ENTRY(__nearbyint)
7978+ vroundsd $0xc, %xmm0, %xmm0, %xmm0
7979+ ret
7980+END(__nearbyint)
7981+
7982+libm_alias_double (__nearbyint, nearbyint)
7983diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
7984index 674f7eb40a..9d84410a1f 100644
7985--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
7986+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
7987@@ -17,8 +17,20 @@
7988
7989 #include <sysdep.h>
7990
7991+#include <sysdeps/x86/isa-level.h>
7992+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
7993+# include <libm-alias-double.h>
7994+# define __nearbyint_sse41 __nearbyint
7995+ .text
7996+#else
7997 .section .text.sse4.1,"ax",@progbits
7998+#endif
7999+
8000 ENTRY(__nearbyint_sse41)
8001 roundsd $0xc, %xmm0, %xmm0
8002 ret
8003 END(__nearbyint_sse41)
8004+
8005+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8006+libm_alias_double (__nearbyint, nearbyint)
8007+#endif
8008diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
8009index 693e42dd4e..057a7ca60f 100644
8010--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
8011+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
8012@@ -16,17 +16,20 @@
8013 License along with the GNU C Library; if not, see
8014 <https://www.gnu.org/licenses/>. */
8015
8016-#include <libm-alias-double.h>
8017+#include <sysdeps/x86/isa-level.h>
8018+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
8019+# include <libm-alias-double.h>
8020
8021-#define nearbyint __redirect_nearbyint
8022-#define __nearbyint __redirect___nearbyint
8023-#include <math.h>
8024-#undef nearbyint
8025-#undef __nearbyint
8026+# define nearbyint __redirect_nearbyint
8027+# define __nearbyint __redirect___nearbyint
8028+# include <math.h>
8029+# undef nearbyint
8030+# undef __nearbyint
8031
8032-#define SYMBOL_NAME nearbyint
8033-#include "ifunc-sse4_1.h"
8034+# define SYMBOL_NAME nearbyint
8035+# include "ifunc-sse4_1.h"
8036
8037 libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
8038 IFUNC_SELECTOR ());
8039 libm_alias_double (__nearbyint, nearbyint)
8040+#endif
8041diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
8042new file mode 100644
8043index 0000000000..1dbaed0324
8044--- /dev/null
8045+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
8046@@ -0,0 +1,28 @@
8047+/* AVX implmentation of nearbyintf function.
8048+ Copyright (C) 2024 Free Software Foundation, Inc.
8049+ This file is part of the GNU C Library.
8050+
8051+ The GNU C Library is free software; you can redistribute it and/or
8052+ modify it under the terms of the GNU Lesser General Public
8053+ License as published by the Free Software Foundation; either
8054+ version 2.1 of the License, or (at your option) any later version.
8055+
8056+ The GNU C Library is distributed in the hope that it will be useful,
8057+ but WITHOUT ANY WARRANTY; without even the implied warranty of
8058+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8059+ Lesser General Public License for more details.
8060+
8061+ You should have received a copy of the GNU Lesser General Public
8062+ License along with the GNU C Library; if not, see
8063+ <https://www.gnu.org/licenses/>. */
8064+
8065+#include <sysdep.h>
8066+#include <libm-alias-float.h>
8067+
8068+ .text
8069+ENTRY(__nearbyintf)
8070+ vroundss $0xc, %xmm0, %xmm0, %xmm0
8071+ ret
8072+END(__nearbyintf)
8073+
8074+libm_alias_float (__nearbyint, nearbyint)
8075diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
8076index 5892bd7563..3cf35f92d6 100644
8077--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
8078+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
8079@@ -17,8 +17,20 @@
8080
8081 #include <sysdep.h>
8082
8083+#include <sysdeps/x86/isa-level.h>
8084+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8085+# include <libm-alias-float.h>
8086+# define __nearbyintf_sse41 __nearbyintf
8087+ .text
8088+#else
8089 .section .text.sse4.1,"ax",@progbits
8090+#endif
8091+
8092 ENTRY(__nearbyintf_sse41)
8093 roundss $0xc, %xmm0, %xmm0
8094 ret
8095 END(__nearbyintf_sse41)
8096+
8097+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8098+libm_alias_float (__nearbyint, nearbyint)
8099+#endif
8100diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
8101index a0ac009f4b..41f374ba72 100644
8102--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
8103+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
8104@@ -16,17 +16,20 @@
8105 License along with the GNU C Library; if not, see
8106 <https://www.gnu.org/licenses/>. */
8107
8108-#include <libm-alias-float.h>
8109+#include <sysdeps/x86/isa-level.h>
8110+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
8111+# include <libm-alias-float.h>
8112
8113-#define nearbyintf __redirect_nearbyintf
8114-#define __nearbyintf __redirect___nearbyintf
8115-#include <math.h>
8116-#undef nearbyintf
8117-#undef __nearbyintf
8118+# define nearbyintf __redirect_nearbyintf
8119+# define __nearbyintf __redirect___nearbyintf
8120+# include <math.h>
8121+# undef nearbyintf
8122+# undef __nearbyintf
8123
8124-#define SYMBOL_NAME nearbyintf
8125-#include "ifunc-sse4_1.h"
8126+# define SYMBOL_NAME nearbyintf
8127+# include "ifunc-sse4_1.h"
8128
8129 libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
8130 IFUNC_SELECTOR ());
8131 libm_alias_float (__nearbyint, nearbyint)
8132+#endif
8133diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
8134new file mode 100644
8135index 0000000000..2b403b331f
8136--- /dev/null
8137+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
8138@@ -0,0 +1,28 @@
8139+/* AVX implementation of rint function.
8140+ Copyright (C) 2024 Free Software Foundation, Inc.
8141+ This file is part of the GNU C Library.
8142+
8143+ The GNU C Library is free software; you can redistribute it and/or
8144+ modify it under the terms of the GNU Lesser General Public
8145+ License as published by the Free Software Foundation; either
8146+ version 2.1 of the License, or (at your option) any later version.
8147+
8148+ The GNU C Library is distributed in the hope that it will be useful,
8149+ but WITHOUT ANY WARRANTY; without even the implied warranty of
8150+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8151+ Lesser General Public License for more details.
8152+
8153+ You should have received a copy of the GNU Lesser General Public
8154+ License along with the GNU C Library; if not, see
8155+ <https://www.gnu.org/licenses/>. */
8156+
8157+#include <sysdep.h>
8158+#include <libm-alias-double.h>
8159+
8160+ .text
8161+ENTRY(__rint)
8162+ vroundsd $4, %xmm0, %xmm0, %xmm0
8163+ ret
8164+END(__rint)
8165+
8166+libm_alias_double (__rint, rint)
8167diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
8168index 405372991b..8cd9cf759f 100644
8169--- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
8170+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
8171@@ -17,8 +17,20 @@
8172
8173 #include <sysdep.h>
8174
8175+#include <sysdeps/x86/isa-level.h>
8176+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8177+# include <libm-alias-double.h>
8178+# define __rint_sse41 __rint
8179+ .text
8180+#else
8181 .section .text.sse4.1,"ax",@progbits
8182+#endif
8183+
8184 ENTRY(__rint_sse41)
8185 roundsd $4, %xmm0, %xmm0
8186 ret
8187 END(__rint_sse41)
8188+
8189+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8190+libm_alias_double (__rint, rint)
8191+#endif
8192diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c
8193index 754c87e004..18623b7d99 100644
8194--- a/sysdeps/x86_64/fpu/multiarch/s_rint.c
8195+++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
8196@@ -16,17 +16,20 @@
8197 License along with the GNU C Library; if not, see
8198 <https://www.gnu.org/licenses/>. */
8199
8200-#define NO_MATH_REDIRECT
8201-#include <libm-alias-double.h>
8202+#include <sysdeps/x86/isa-level.h>
8203+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
8204+# define NO_MATH_REDIRECT
8205+# include <libm-alias-double.h>
8206
8207-#define rint __redirect_rint
8208-#define __rint __redirect___rint
8209-#include <math.h>
8210-#undef rint
8211-#undef __rint
8212+# define rint __redirect_rint
8213+# define __rint __redirect___rint
8214+# include <math.h>
8215+# undef rint
8216+# undef __rint
8217
8218-#define SYMBOL_NAME rint
8219-#include "ifunc-sse4_1.h"
8220+# define SYMBOL_NAME rint
8221+# include "ifunc-sse4_1.h"
8222
8223 libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
8224 libm_alias_double (__rint, rint)
8225+#endif
8226diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
8227new file mode 100644
8228index 0000000000..171c2867f4
8229--- /dev/null
8230+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
8231@@ -0,0 +1,28 @@
8232+/* AVX implementation of rintf function.
8233+ Copyright (C) 2024 Free Software Foundation, Inc.
8234+ This file is part of the GNU C Library.
8235+
8236+ The GNU C Library is free software; you can redistribute it and/or
8237+ modify it under the terms of the GNU Lesser General Public
8238+ License as published by the Free Software Foundation; either
8239+ version 2.1 of the License, or (at your option) any later version.
8240+
8241+ The GNU C Library is distributed in the hope that it will be useful,
8242+ but WITHOUT ANY WARRANTY; without even the implied warranty of
8243+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8244+ Lesser General Public License for more details.
8245+
8246+ You should have received a copy of the GNU Lesser General Public
8247+ License along with the GNU C Library; if not, see
8248+ <https://www.gnu.org/licenses/>. */
8249+
8250+#include <sysdep.h>
8251+#include <libm-alias-float.h>
8252+
8253+ .text
8254+ENTRY(__rintf)
8255+ vroundss $4, %xmm0, %xmm0, %xmm0
8256+ ret
8257+END(__rintf)
8258+
8259+libm_alias_float (__rint, rint)
8260diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
8261index 8ac67ce767..fc1e70f0c9 100644
8262--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
8263+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
8264@@ -17,8 +17,20 @@
8265
8266 #include <sysdep.h>
8267
8268+#include <sysdeps/x86/isa-level.h>
8269+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8270+# include <libm-alias-float.h>
8271+# define __rintf_sse41 __rintf
8272+ .text
8273+#else
8274 .section .text.sse4.1,"ax",@progbits
8275+#endif
8276+
8277 ENTRY(__rintf_sse41)
8278 roundss $4, %xmm0, %xmm0
8279 ret
8280 END(__rintf_sse41)
8281+
8282+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8283+libm_alias_float (__rint, rint)
8284+#endif
8285diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
8286index e9d6b7a5f2..e275368dec 100644
8287--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c
8288+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
8289@@ -16,17 +16,20 @@
8290 License along with the GNU C Library; if not, see
8291 <https://www.gnu.org/licenses/>. */
8292
8293-#define NO_MATH_REDIRECT
8294-#include <libm-alias-float.h>
8295+#include <sysdeps/x86/isa-level.h>
8296+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
8297+# define NO_MATH_REDIRECT
8298+# include <libm-alias-float.h>
8299
8300-#define rintf __redirect_rintf
8301-#define __rintf __redirect___rintf
8302-#include <math.h>
8303-#undef rintf
8304-#undef __rintf
8305+# define rintf __redirect_rintf
8306+# define __rintf __redirect___rintf
8307+# include <math.h>
8308+# undef rintf
8309+# undef __rintf
8310
8311-#define SYMBOL_NAME rintf
8312-#include "ifunc-sse4_1.h"
8313+# define SYMBOL_NAME rintf
8314+# include "ifunc-sse4_1.h"
8315
8316 libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
8317 libm_alias_float (__rint, rint)
8318+#endif
8319diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
8320new file mode 100644
8321index 0000000000..576790355c
8322--- /dev/null
8323+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
8324@@ -0,0 +1,28 @@
8325+/* AVX implementation of roundeven function.
8326+ Copyright (C) 2024 Free Software Foundation, Inc.
8327+ This file is part of the GNU C Library.
8328+
8329+ The GNU C Library is free software; you can redistribute it and/or
8330+ modify it under the terms of the GNU Lesser General Public
8331+ License as published by the Free Software Foundation; either
8332+ version 2.1 of the License, or (at your option) any later version.
8333+
8334+ The GNU C Library is distributed in the hope that it will be useful,
8335+ but WITHOUT ANY WARRANTY; without even the implied warranty of
8336+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8337+ Lesser General Public License for more details.
8338+
8339+ You should have received a copy of the GNU Lesser General Public
8340+ License along with the GNU C Library; if not, see
8341+ <https://www.gnu.org/licenses/>. */
8342+
8343+#include <sysdep.h>
8344+#include <libm-alias-double.h>
8345+
8346+ .text
8347+ENTRY(__roundeven)
8348+ vroundsd $8, %xmm0, %xmm0, %xmm0
8349+ ret
8350+END(__roundeven)
8351+
8352+libm_alias_double (__roundeven, roundeven)
8353diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
8354index 5ef102336b..f00be56c59 100644
8355--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
8356+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
8357@@ -17,8 +17,20 @@
8358
8359 #include <sysdep.h>
8360
8361+#include <sysdeps/x86/isa-level.h>
8362+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8363+# include <libm-alias-double.h>
8364+# define __roundeven_sse41 __roundeven
8365+ .text
8366+#else
8367 .section .text.sse4.1,"ax",@progbits
8368+#endif
8369+
8370 ENTRY(__roundeven_sse41)
8371 roundsd $8, %xmm0, %xmm0
8372 ret
8373 END(__roundeven_sse41)
8374+
8375+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8376+libm_alias_double (__roundeven, roundeven)
8377+#endif
8378diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
8379index 8737b32e26..139aad088f 100644
8380--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
8381+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
8382@@ -16,16 +16,19 @@
8383 License along with the GNU C Library; if not, see
8384 <https://www.gnu.org/licenses/>. */
8385
8386-#include <libm-alias-double.h>
8387+#include <sysdeps/x86/isa-level.h>
8388+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
8389+# include <libm-alias-double.h>
8390
8391-#define roundeven __redirect_roundeven
8392-#define __roundeven __redirect___roundeven
8393-#include <math.h>
8394-#undef roundeven
8395-#undef __roundeven
8396+# define roundeven __redirect_roundeven
8397+# define __roundeven __redirect___roundeven
8398+# include <math.h>
8399+# undef roundeven
8400+# undef __roundeven
8401
8402-#define SYMBOL_NAME roundeven
8403-#include "ifunc-sse4_1.h"
8404+# define SYMBOL_NAME roundeven
8405+# include "ifunc-sse4_1.h"
8406
8407 libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ());
8408 libm_alias_double (__roundeven, roundeven)
8409+#endif
8410diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
8411new file mode 100644
8412index 0000000000..42c359f4cd
8413--- /dev/null
8414+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
8415@@ -0,0 +1,28 @@
8416+/* AVX implementation of roundevenf function.
8417+ Copyright (C) 2024 Free Software Foundation, Inc.
8418+ This file is part of the GNU C Library.
8419+
8420+ The GNU C Library is free software; you can redistribute it and/or
8421+ modify it under the terms of the GNU Lesser General Public
8422+ License as published by the Free Software Foundation; either
8423+ version 2.1 of the License, or (at your option) any later version.
8424+
8425+ The GNU C Library is distributed in the hope that it will be useful,
8426+ but WITHOUT ANY WARRANTY; without even the implied warranty of
8427+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8428+ Lesser General Public License for more details.
8429+
8430+ You should have received a copy of the GNU Lesser General Public
8431+ License along with the GNU C Library; if not, see
8432+ <https://www.gnu.org/licenses/>. */
8433+
8434+#include <sysdep.h>
8435+#include <libm-alias-float.h>
8436+
8437+ .text
8438+ENTRY(__roundevenf)
8439+ vroundss $8, %xmm0, %xmm0, %xmm0
8440+ ret
8441+END(__roundevenf)
8442+
8443+libm_alias_float (__roundeven, roundeven)
8444diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
8445index 792c90ba07..6b148e4353 100644
8446--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
8447+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
8448@@ -17,8 +17,20 @@
8449
8450 #include <sysdep.h>
8451
8452+#include <sysdeps/x86/isa-level.h>
8453+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8454+# include <libm-alias-float.h>
8455+# define __roundevenf_sse41 __roundevenf
8456+ .text
8457+#else
8458 .section .text.sse4.1,"ax",@progbits
8459+#endif
8460+
8461 ENTRY(__roundevenf_sse41)
8462 roundss $8, %xmm0, %xmm0
8463 ret
8464 END(__roundevenf_sse41)
8465+
8466+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8467+libm_alias_float (__roundeven, roundeven)
8468+#endif
8469diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
8470index e96016a4d5..2fb090075d 100644
8471--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
8472+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
8473@@ -16,16 +16,19 @@
8474 License along with the GNU C Library; if not, see
8475 <https://www.gnu.org/licenses/>. */
8476
8477-#include <libm-alias-float.h>
8478+#include <sysdeps/x86/isa-level.h>
8479+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
8480+# include <libm-alias-float.h>
8481
8482-#define roundevenf __redirect_roundevenf
8483-#define __roundevenf __redirect___roundevenf
8484-#include <math.h>
8485-#undef roundevenf
8486-#undef __roundevenf
8487+# define roundevenf __redirect_roundevenf
8488+# define __roundevenf __redirect___roundevenf
8489+# include <math.h>
8490+# undef roundevenf
8491+# undef __roundevenf
8492
8493-#define SYMBOL_NAME roundevenf
8494-#include "ifunc-sse4_1.h"
8495+# define SYMBOL_NAME roundevenf
8496+# include "ifunc-sse4_1.h"
8497
8498 libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ());
8499 libm_alias_float (__roundeven, roundeven)
8500+#endif
8501diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
8502index 355cc0092e..21e77943a3 100644
8503--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
8504+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
8505@@ -16,24 +16,27 @@
8506 License along with the GNU C Library; if not, see
8507 <https://www.gnu.org/licenses/>. */
8508
8509-#include <libm-alias-double.h>
8510+#include <sysdeps/x86/isa-level.h>
8511+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
8512+# include <libm-alias-double.h>
8513
8514 extern double __redirect_sin (double);
8515 extern double __redirect_cos (double);
8516
8517-#define SYMBOL_NAME sin
8518-#include "ifunc-avx-fma4.h"
8519+# define SYMBOL_NAME sin
8520+# include "ifunc-avx-fma4.h"
8521
8522 libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
8523 libm_alias_double (__sin, sin)
8524
8525-#undef SYMBOL_NAME
8526-#define SYMBOL_NAME cos
8527-#include "ifunc-avx-fma4.h"
8528+# undef SYMBOL_NAME
8529+# define SYMBOL_NAME cos
8530+# include "ifunc-avx-fma4.h"
8531
8532 libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
8533 libm_alias_double (__cos, cos)
8534
8535-#define __cos __cos_sse2
8536-#define __sin __sin_sse2
8537+# define __cos __cos_sse2
8538+# define __sin __sin_sse2
8539+#endif
8540 #include <sysdeps/ieee754/dbl-64/s_sin.c>
8541diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
8542index 70107e999c..b35757f8de 100644
8543--- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c
8544+++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
8545@@ -16,15 +16,18 @@
8546 License along with the GNU C Library; if not, see
8547 <https://www.gnu.org/licenses/>. */
8548
8549-#include <libm-alias-double.h>
8550+#include <sysdeps/x86/isa-level.h>
8551+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
8552+# include <libm-alias-double.h>
8553
8554 extern void __redirect_sincos (double, double *, double *);
8555
8556-#define SYMBOL_NAME sincos
8557-#include "ifunc-fma4.h"
8558+# define SYMBOL_NAME sincos
8559+# include "ifunc-fma4.h"
8560
8561 libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ());
8562 libm_alias_double (__sincos, sincos)
8563
8564-#define __sincos __sincos_sse2
8565+# define __sincos __sincos_sse2
8566+#endif
8567 #include <sysdeps/ieee754/dbl-64/s_sincos.c>
8568diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
8569index 80bc028451..0ea9b40e84 100644
8570--- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
8571+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
8572@@ -16,13 +16,18 @@
8573 License along with the GNU C Library; if not, see
8574 <https://www.gnu.org/licenses/>. */
8575
8576-#include <libm-alias-float.h>
8577+#include <sysdeps/x86/isa-level.h>
8578+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
8579+# include <libm-alias-float.h>
8580
8581 extern void __redirect_sincosf (float, float *, float *);
8582
8583-#define SYMBOL_NAME sincosf
8584-#include "ifunc-fma.h"
8585+# define SYMBOL_NAME sincosf
8586+# include "ifunc-fma.h"
8587
8588 libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ());
8589
8590 libm_alias_float (__sincos, sincos)
8591+#else
8592+# include <sysdeps/ieee754/flt-32/s_sincosf.c>
8593+#endif
8594diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
8595index a32b9e9550..c61624e3ee 100644
8596--- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c
8597+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
8598@@ -16,13 +16,18 @@
8599 License along with the GNU C Library; if not, see
8600 <https://www.gnu.org/licenses/>. */
8601
8602-#include <libm-alias-float.h>
8603+#include <sysdeps/x86/isa-level.h>
8604+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
8605+# include <libm-alias-float.h>
8606
8607 extern float __redirect_sinf (float);
8608
8609-#define SYMBOL_NAME sinf
8610-#include "ifunc-fma.h"
8611+# define SYMBOL_NAME sinf
8612+# include "ifunc-fma.h"
8613
8614 libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
8615
8616 libm_alias_float (__sin, sin)
8617+#else
8618+# include <sysdeps/ieee754/flt-32/s_sinf.c>
8619+#endif
8620diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
8621index f9a2474a13..125d992ba1 100644
8622--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
8623+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
8624@@ -16,15 +16,18 @@
8625 License along with the GNU C Library; if not, see
8626 <https://www.gnu.org/licenses/>. */
8627
8628-#include <libm-alias-double.h>
8629+#include <sysdeps/x86/isa-level.h>
8630+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
8631+# include <libm-alias-double.h>
8632
8633 extern double __redirect_tan (double);
8634
8635-#define SYMBOL_NAME tan
8636-#include "ifunc-avx-fma4.h"
8637+# define SYMBOL_NAME tan
8638+# include "ifunc-avx-fma4.h"
8639
8640 libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
8641 libm_alias_double (__tan, tan)
8642
8643-#define __tan __tan_sse2
8644+# define __tan __tan_sse2
8645+#endif
8646 #include <sysdeps/ieee754/dbl-64/s_tan.c>
8647diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
8648new file mode 100644
8649index 0000000000..b3e87e9606
8650--- /dev/null
8651+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
8652@@ -0,0 +1,28 @@
8653+/* AVX implementation of trunc function.
8654+ Copyright (C) 2024 Free Software Foundation, Inc.
8655+ This file is part of the GNU C Library.
8656+
8657+ The GNU C Library is free software; you can redistribute it and/or
8658+ modify it under the terms of the GNU Lesser General Public
8659+ License as published by the Free Software Foundation; either
8660+ version 2.1 of the License, or (at your option) any later version.
8661+
8662+ The GNU C Library is distributed in the hope that it will be useful,
8663+ but WITHOUT ANY WARRANTY; without even the implied warranty of
8664+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8665+ Lesser General Public License for more details.
8666+
8667+ You should have received a copy of the GNU Lesser General Public
8668+ License along with the GNU C Library; if not, see
8669+ <https://www.gnu.org/licenses/>. */
8670+
8671+#include <sysdep.h>
8672+#include <libm-alias-double.h>
8673+
8674+ .text
8675+ENTRY(__trunc)
8676+ vroundsd $11, %xmm0, %xmm0, %xmm0
8677+ ret
8678+END(__trunc)
8679+
8680+libm_alias_double (__trunc, trunc)
8681diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
8682index b496a6ef49..2b79174eed 100644
8683--- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
8684+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
8685@@ -18,8 +18,20 @@
8686
8687 #include <sysdep.h>
8688
8689+#include <sysdeps/x86/isa-level.h>
8690+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8691+# include <libm-alias-double.h>
8692+# define __trunc_sse41 __trunc
8693+ .text
8694+#else
8695 .section .text.sse4.1,"ax",@progbits
8696+#endif
8697+
8698 ENTRY(__trunc_sse41)
8699 roundsd $11, %xmm0, %xmm0
8700 ret
8701 END(__trunc_sse41)
8702+
8703+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8704+libm_alias_double (__trunc, trunc)
8705+#endif
8706diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
8707index 9bc9df8744..ea89c4f85d 100644
8708--- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c
8709+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
8710@@ -16,17 +16,20 @@
8711 License along with the GNU C Library; if not, see
8712 <https://www.gnu.org/licenses/>. */
8713
8714-#define NO_MATH_REDIRECT
8715-#include <libm-alias-double.h>
8716+#include <sysdeps/x86/isa-level.h>
8717+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
8718+# define NO_MATH_REDIRECT
8719+# include <libm-alias-double.h>
8720
8721-#define trunc __redirect_trunc
8722-#define __trunc __redirect___trunc
8723-#include <math.h>
8724-#undef trunc
8725-#undef __trunc
8726+# define trunc __redirect_trunc
8727+# define __trunc __redirect___trunc
8728+# include <math.h>
8729+# undef trunc
8730+# undef __trunc
8731
8732-#define SYMBOL_NAME trunc
8733-#include "ifunc-sse4_1.h"
8734+# define SYMBOL_NAME trunc
8735+# include "ifunc-sse4_1.h"
8736
8737 libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
8738 libm_alias_double (__trunc, trunc)
8739+#endif
8740diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
8741new file mode 100644
8742index 0000000000..f31ac7d7f7
8743--- /dev/null
8744+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
8745@@ -0,0 +1,28 @@
8746+/* AVX implementation of truncf function.
8747+ Copyright (C) 2024 Free Software Foundation, Inc.
8748+ This file is part of the GNU C Library.
8749+
8750+ The GNU C Library is free software; you can redistribute it and/or
8751+ modify it under the terms of the GNU Lesser General Public
8752+ License as published by the Free Software Foundation; either
8753+ version 2.1 of the License, or (at your option) any later version.
8754+
8755+ The GNU C Library is distributed in the hope that it will be useful,
8756+ but WITHOUT ANY WARRANTY; without even the implied warranty of
8757+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8758+ Lesser General Public License for more details.
8759+
8760+ You should have received a copy of the GNU Lesser General Public
8761+ License along with the GNU C Library; if not, see
8762+ <https://www.gnu.org/licenses/>. */
8763+
8764+#include <sysdep.h>
8765+#include <libm-alias-float.h>
8766+
8767+ .text
8768+ENTRY(__truncf)
8769+ vroundss $11, %xmm0, %xmm0, %xmm0
8770+ ret
8771+END(__truncf)
8772+
8773+libm_alias_float (__trunc, trunc)
8774diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
8775index 22e9a83307..60498b2cb2 100644
8776--- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
8777+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
8778@@ -18,8 +18,20 @@
8779
8780 #include <sysdep.h>
8781
8782+#include <sysdeps/x86/isa-level.h>
8783+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8784+# include <libm-alias-float.h>
8785+# define __truncf_sse41 __truncf
8786+ .text
8787+#else
8788 .section .text.sse4.1,"ax",@progbits
8789+#endif
8790+
8791 ENTRY(__truncf_sse41)
8792 roundss $11, %xmm0, %xmm0
8793 ret
8794 END(__truncf_sse41)
8795+
8796+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
8797+libm_alias_float (__trunc, trunc)
8798+#endif
8799diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
8800index dae01d166a..92435ce39d 100644
8801--- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c
8802+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
8803@@ -16,17 +16,20 @@
8804 License along with the GNU C Library; if not, see
8805 <https://www.gnu.org/licenses/>. */
8806
8807-#define NO_MATH_REDIRECT
8808-#include <libm-alias-float.h>
8809+#include <sysdeps/x86/isa-level.h>
8810+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
8811+# define NO_MATH_REDIRECT
8812+# include <libm-alias-float.h>
8813
8814-#define truncf __redirect_truncf
8815-#define __truncf __redirect___truncf
8816-#include <math.h>
8817-#undef truncf
8818-#undef __truncf
8819+# define truncf __redirect_truncf
8820+# define __truncf __redirect___truncf
8821+# include <math.h>
8822+# undef truncf
8823+# undef __truncf
8824
8825-#define SYMBOL_NAME truncf
8826-#include "ifunc-sse4_1.h"
8827+# define SYMBOL_NAME truncf
8828+# include "ifunc-sse4_1.h"
8829
8830 libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
8831 libm_alias_float (__trunc, trunc)
8832+#endif
8833diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c
8834index 27eee98a0a..3584187e0e 100644
8835--- a/sysdeps/x86_64/fpu/multiarch/w_exp.c
8836+++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c
8837@@ -1 +1,6 @@
8838-#include <sysdeps/../math/w_exp.c>
8839+#include <sysdeps/x86/isa-level.h>
8840+#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
8841+# include <sysdeps/ieee754/dbl-64/w_exp.c>
8842+#else
8843+# include <sysdeps/../math/w_exp.c>
8844+#endif
8845diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c
8846index 9b2b018711..414ca3ca3d 100644
8847--- a/sysdeps/x86_64/fpu/multiarch/w_log.c
8848+++ b/sysdeps/x86_64/fpu/multiarch/w_log.c
8849@@ -1 +1,6 @@
8850-#include <sysdeps/../math/w_log.c>
8851+#include <sysdeps/x86/isa-level.h>
8852+#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
8853+# include <sysdeps/ieee754/dbl-64/w_log.c>
8854+#else
8855+# include <sysdeps/../math/w_log.c>
8856+#endif
8857diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c
8858index b50c1988de..d5fcc4f871 100644
8859--- a/sysdeps/x86_64/fpu/multiarch/w_pow.c
8860+++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c
8861@@ -1 +1,6 @@
8862-#include <sysdeps/../math/w_pow.c>
8863+#include <sysdeps/x86/isa-level.h>
8864+#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
8865+# include <sysdeps/ieee754/dbl-64/w_pow.c>
8866+#else
8867+# include <sysdeps/../math/w_pow.c>
8868+#endif
8869
8870commit 31da30f23cddd36db29d5b6a1c7619361b271fb4
8871Author: Charles Fol <folcharles@gmail.com>
8872Date: Thu Mar 28 12:25:38 2024 -0300
8873
8874 iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence (CVE-2024-2961)
8875
8876 ISO-2022-CN-EXT uses escape sequences to indicate character set changes
8877 (as specified by RFC 1922). While the SOdesignation has the expected
8878 bounds checks, neither SS2designation nor SS3designation have its;
8879 allowing a write overflow of 1, 2, or 3 bytes with fixed values:
8880 '$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'.
8881
8882 Checked on aarch64-linux-gnu.
8883
8884 Co-authored-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
8885 Reviewed-by: Carlos O'Donell <carlos@redhat.com>
8886 Tested-by: Carlos O'Donell <carlos@redhat.com>
8887
8888 (cherry picked from commit f9dc609e06b1136bb0408be9605ce7973a767ada)
8889
8890diff --git a/iconvdata/Makefile b/iconvdata/Makefile
8891index ea019ce5c0..7196a8744b 100644
8892--- a/iconvdata/Makefile
8893+++ b/iconvdata/Makefile
8894@@ -75,7 +75,8 @@ ifeq (yes,$(build-shared))
8895 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
8896 tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
8897 bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
8898- bug-iconv13 bug-iconv14 bug-iconv15
8899+ bug-iconv13 bug-iconv14 bug-iconv15 \
8900+ tst-iconv-iso-2022-cn-ext
8901 ifeq ($(have-thread-library),yes)
8902 tests += bug-iconv3
8903 endif
8904@@ -330,6 +331,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
8905 $(addprefix $(objpfx),$(modules.so))
8906 $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
8907 $(addprefix $(objpfx),$(modules.so))
8908+$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \
8909+ $(addprefix $(objpfx),$(modules.so))
8910
8911 $(objpfx)iconv-test.out: run-iconv-test.sh \
8912 $(addprefix $(objpfx), $(gconv-modules)) \
8913diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c
8914index b34c8a36f4..cce29b1969 100644
8915--- a/iconvdata/iso-2022-cn-ext.c
8916+++ b/iconvdata/iso-2022-cn-ext.c
8917@@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
8918 { \
8919 const char *escseq; \
8920 \
8921+ if (outptr + 4 > outend) \
8922+ { \
8923+ result = __GCONV_FULL_OUTPUT; \
8924+ break; \
8925+ } \
8926+ \
8927 assert (used == CNS11643_2_set); /* XXX */ \
8928 escseq = "*H"; \
8929 *outptr++ = ESC; \
8930@@ -587,6 +593,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
8931 { \
8932 const char *escseq; \
8933 \
8934+ if (outptr + 4 > outend) \
8935+ { \
8936+ result = __GCONV_FULL_OUTPUT; \
8937+ break; \
8938+ } \
8939+ \
8940 assert ((used >> 5) >= 3 && (used >> 5) <= 7); \
8941 escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2; \
8942 *outptr++ = ESC; \
8943diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c
8944new file mode 100644
8945index 0000000000..96a8765fd5
8946--- /dev/null
8947+++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c
8948@@ -0,0 +1,128 @@
8949+/* Verify ISO-2022-CN-EXT does not write out of the bounds.
8950+ Copyright (C) 2024 Free Software Foundation, Inc.
8951+ This file is part of the GNU C Library.
8952+
8953+ The GNU C Library is free software; you can redistribute it and/or
8954+ modify it under the terms of the GNU Lesser General Public
8955+ License as published by the Free Software Foundation; either
8956+ version 2.1 of the License, or (at your option) any later version.
8957+
8958+ The GNU C Library is distributed in the hope that it will be useful,
8959+ but WITHOUT ANY WARRANTY; without even the implied warranty of
8960+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8961+ Lesser General Public License for more details.
8962+
8963+ You should have received a copy of the GNU Lesser General Public
8964+ License along with the GNU C Library; if not, see
8965+ <https://www.gnu.org/licenses/>. */
8966+
8967+#include <stdio.h>
8968+#include <string.h>
8969+
8970+#include <errno.h>
8971+#include <iconv.h>
8972+#include <sys/mman.h>
8973+
8974+#include <support/xunistd.h>
8975+#include <support/check.h>
8976+#include <support/support.h>
8977+
8978+/* The test sets up a two memory page buffer with the second page marked
8979+ PROT_NONE to trigger a fault if the conversion writes beyond the exact
8980+ expected amount. Then we carry out various conversions and precisely
8981+ place the start of the output buffer in order to trigger a SIGSEGV if the
8982+ process writes anywhere between 1 and page sized bytes more (only one
8983+ PROT_NONE page is setup as a canary) than expected. These tests exercise
8984+ all three of the cases in ISO-2022-CN-EXT where the converter must switch
8985+ character sets and may run out of buffer space while doing the
8986+ operation. */
8987+
8988+static int
8989+do_test (void)
8990+{
8991+ iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8");
8992+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
8993+
8994+ char *ntf;
8995+ size_t ntfsize;
8996+ char *outbufbase;
8997+ {
8998+ int pgz = getpagesize ();
8999+ TEST_VERIFY_EXIT (pgz > 0);
9000+ ntfsize = 2 * pgz;
9001+
9002+ ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE
9003+ | MAP_ANONYMOUS, -1);
9004+ xmprotect (ntf + pgz, pgz, PROT_NONE);
9005+
9006+ outbufbase = ntf + pgz;
9007+ }
9008+
9009+ /* Check if SOdesignation escape sequence does not trigger an OOB write. */
9010+ {
9011+ char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2";
9012+
9013+ for (int i = 0; i < 9; i++)
9014+ {
9015+ char *inp = inbuf;
9016+ size_t inleft = sizeof (inbuf) - 1;
9017+
9018+ char *outp = outbufbase - i;
9019+ size_t outleft = i;
9020+
9021+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
9022+ == (size_t) -1);
9023+ TEST_COMPARE (errno, E2BIG);
9024+
9025+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
9026+ }
9027+ }
9028+
9029+ /* Same as before for SS2designation. */
9030+ {
9031+ char inbuf[] = "㴽 \xe3\xb4\xbd";
9032+
9033+ for (int i = 0; i < 14; i++)
9034+ {
9035+ char *inp = inbuf;
9036+ size_t inleft = sizeof (inbuf) - 1;
9037+
9038+ char *outp = outbufbase - i;
9039+ size_t outleft = i;
9040+
9041+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
9042+ == (size_t) -1);
9043+ TEST_COMPARE (errno, E2BIG);
9044+
9045+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
9046+ }
9047+ }
9048+
9049+ /* Same as before for SS3designation. */
9050+ {
9051+ char inbuf[] = "劄 \xe5\x8a\x84";
9052+
9053+ for (int i = 0; i < 14; i++)
9054+ {
9055+ char *inp = inbuf;
9056+ size_t inleft = sizeof (inbuf) - 1;
9057+
9058+ char *outp = outbufbase - i;
9059+ size_t outleft = i;
9060+
9061+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
9062+ == (size_t) -1);
9063+ TEST_COMPARE (errno, E2BIG);
9064+
9065+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
9066+ }
9067+ }
9068+
9069+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
9070+
9071+ xmunmap (ntf, ntfsize);
9072+
9073+ return 0;
9074+}
9075+
9076+#include <support/test-driver.c>
9077
9078commit e828914cf9f2fc2caa5bced0fc6a03cb78324979
9079Author: Florian Weimer <fweimer@redhat.com>
9080Date: Tue Apr 23 21:16:32 2024 +0200
9081
9082 nptl: Fix tst-cancel30 on kernels without ppoll_time64 support
9083
9084 Fall back to ppoll if ppoll_time64 fails with ENOSYS.
9085 Fixes commit 370da8a121c3ba9eeb2f13da15fc0f21f4136b25 ("nptl: Fix
9086 tst-cancel30 on sparc64").
9087
9088 Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
9089 (cherry picked from commit f4724843ada64a51d66f65d3199fe431f9d4c254)
9090
9091diff --git a/sysdeps/pthread/tst-cancel30.c b/sysdeps/pthread/tst-cancel30.c
9092index 3030660e5f..94ad6281bc 100644
9093--- a/sysdeps/pthread/tst-cancel30.c
9094+++ b/sysdeps/pthread/tst-cancel30.c
9095@@ -18,6 +18,7 @@
9096 License along with the GNU C Library; if not, see
9097 <https://www.gnu.org/licenses/>. */
9098
9099+#include <errno.h>
9100 #include <support/check.h>
9101 #include <support/xstdio.h>
9102 #include <support/xthread.h>
9103@@ -46,13 +47,19 @@ tf (void *arg)
9104
9105 /* Wait indefinitely for cancellation, which only works if asynchronous
9106 cancellation is enabled. */
9107-#if defined SYS_ppoll || defined SYS_ppoll_time64
9108-# ifndef SYS_ppoll_time64
9109-# define SYS_ppoll_time64 SYS_ppoll
9110+#ifdef SYS_ppoll_time64
9111+ long int ret = syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL);
9112+ (void) ret;
9113+# ifdef SYS_ppoll
9114+ if (ret == -1 && errno == ENOSYS)
9115+ syscall (SYS_ppoll, NULL, 0, NULL, NULL);
9116 # endif
9117- syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL);
9118 #else
9119+# ifdef SYS_ppoll
9120+ syscall (SYS_ppoll, NULL, 0, NULL, NULL);
9121+# else
9122 for (;;);
9123+# endif
9124 #endif
9125
9126 return 0;
9127
9128commit e701c7d761f6e5c48d8e9dd5da88cbe2e94943f4
9129Author: Florian Weimer <fweimer@redhat.com>
9130Date: Thu Apr 25 12:56:48 2024 +0200
9131
9132 i386: ulp update for SSE2 --disable-multi-arch configurations
9133
9134 (cherry picked from commit 3a3a4497421422aa854c855cbe5110ca7d598ffc)
9135
9136diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
9137index 84e6686eba..f2139fc172 100644
9138--- a/sysdeps/i386/fpu/libm-test-ulps
9139+++ b/sysdeps/i386/fpu/libm-test-ulps
9140@@ -1232,6 +1232,7 @@ ldouble: 6
9141
9142 Function: "hypot":
9143 double: 1
9144+float: 1
9145 float128: 1
9146 ldouble: 1
9147
9148
9149commit 2f8f157eb0cc7f1d8d9a3fcaa8c55bed53b092a8
9150Author: H.J. Lu <hjl.tools@gmail.com>
9151Date: Tue Apr 23 13:59:50 2024 -0700
9152
9153 x86: Define MINIMUM_X86_ISA_LEVEL in config.h [BZ #31676]
9154
9155 Define MINIMUM_X86_ISA_LEVEL at configure time to avoid
9156
9157 /usr/bin/ld: …/build/elf/librtld.os: in function `init_cpu_features':
9158 …/git/elf/../sysdeps/x86/cpu-features.c:1202: undefined reference to `_dl_runtime_resolve_fxsave'
9159 /usr/bin/ld: …/build/elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object
9160 /usr/bin/ld: final link failed: bad value
9161 collect2: error: ld returned 1 exit status
9162
9163 when glibc is built with -march=x86-64-v3 and configured with
9164 --with-rtld-early-cflags=-march=x86-64, which is used to allow ld.so to
9165 print an error message on unsupported CPUs:
9166
9167 Fatal glibc error: CPU does not support x86-64-v3
9168
9169 This fixes BZ #31676.
9170 Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
9171
9172 (cherry picked from commit 46c999741340ea559784c20a45077955b50aca43)
9173
9174diff --git a/config.h.in b/config.h.in
9175index 4d33c63a84..1e647de585 100644
9176--- a/config.h.in
9177+++ b/config.h.in
9178@@ -286,6 +286,9 @@
9179 /* Define if x86 ISA level should be included in shared libraries. */
9180 #undef INCLUDE_X86_ISA_LEVEL
9181
9182+/* The x86 ISA level. 1 for baseline. Undefined on non-x86. */
9183+#undef MINIMUM_X86_ISA_LEVEL
9184+
9185 /* Define if -msahf is enabled by default on x86. */
9186 #undef HAVE_X86_LAHF_SAHF
9187
9188diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
9189index 2a5421bb31..d28d9bcb29 100644
9190--- a/sysdeps/x86/configure
9191+++ b/sysdeps/x86/configure
9192@@ -151,6 +151,13 @@ printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; }
9193 else
9194 libc_cv_have_x86_isa_level=baseline
9195 fi
9196+if test $libc_cv_have_x86_isa_level = baseline; then
9197+ printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL 1" >>confdefs.h
9198+
9199+else
9200+ printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL $libc_cv_have_x86_isa_level" >>confdefs.h
9201+
9202+fi
9203 config_vars="$config_vars
9204 have-x86-isa-level = $libc_cv_have_x86_isa_level"
9205 config_vars="$config_vars
9206diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
9207index 78ff7c8f41..5b0acd03d2 100644
9208--- a/sysdeps/x86/configure.ac
9209+++ b/sysdeps/x86/configure.ac
9210@@ -105,6 +105,11 @@ EOF
9211 else
9212 libc_cv_have_x86_isa_level=baseline
9213 fi
9214+if test $libc_cv_have_x86_isa_level = baseline; then
9215+ AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, 1)
9216+else
9217+ AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level)
9218+fi
9219 LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level])
9220 LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4])
9221 LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
9222diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
9223index 11fe1ca90c..2c7f74212b 100644
9224--- a/sysdeps/x86/isa-level.h
9225+++ b/sysdeps/x86/isa-level.h
9226@@ -61,8 +61,10 @@
9227 # define __X86_ISA_V4 0
9228 #endif
9229
9230-#define MINIMUM_X86_ISA_LEVEL \
9231+#ifndef MINIMUM_X86_ISA_LEVEL
9232+# define MINIMUM_X86_ISA_LEVEL \
9233 (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
9234+#endif
9235
9236 /* Depending on the minimum ISA level, a feature check result can be a
9237 compile-time constant.. */
9238
9239commit 1263d583d2e28afb8be53f8d6922f0842036f35d
9240Author: Florian Weimer <fweimer@redhat.com>
9241Date: Thu Apr 25 15:00:45 2024 +0200
9242
9243 CVE-2024-33599: nscd: Stack-based buffer overflow in netgroup cache (bug 31677)
9244
9245 Using alloca matches what other caches do. The request length is
9246 bounded by MAXKEYLEN.
9247
9248 Reviewed-by: Carlos O'Donell <carlos@redhat.com>
9249 (cherry picked from commit 87801a8fd06db1d654eea3e4f7626ff476a9bdaa)
9250
9251diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
9252index 0c6e46f15c..f227dc7fa2 100644
9253--- a/nscd/netgroupcache.c
9254+++ b/nscd/netgroupcache.c
9255@@ -502,12 +502,13 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
9256 = (struct indataset *) mempool_alloc (db,
9257 sizeof (*dataset) + req->key_len,
9258 1);
9259- struct indataset dataset_mem;
9260 bool cacheable = true;
9261 if (__glibc_unlikely (dataset == NULL))
9262 {
9263 cacheable = false;
9264- dataset = &dataset_mem;
9265+ /* The alloca is safe because nscd_run_worker verfies that
9266+ key_len is not larger than MAXKEYLEN. */
9267+ dataset = alloca (sizeof (*dataset) + req->key_len);
9268 }
9269
9270 datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len,
9271
9272commit 5a508e0b508c8ad53bd0d2fb48fd71b242626341
9273Author: Florian Weimer <fweimer@redhat.com>
9274Date: Thu Apr 25 15:01:07 2024 +0200
9275
9276 CVE-2024-33600: nscd: Do not send missing not-found response in addgetnetgrentX (bug 31678)
9277
9278 If we failed to add a not-found response to the cache, the dataset
9279 point can be null, resulting in a null pointer dereference.
9280
9281 Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
9282 (cherry picked from commit 7835b00dbce53c3c87bbbb1754a95fb5e58187aa)
9283
9284diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
9285index f227dc7fa2..c18fe111f3 100644
9286--- a/nscd/netgroupcache.c
9287+++ b/nscd/netgroupcache.c
9288@@ -147,7 +147,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9289 /* No such service. */
9290 cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout,
9291 &key_copy);
9292- goto writeout;
9293+ goto maybe_cache_add;
9294 }
9295
9296 memset (&data, '\0', sizeof (data));
9297@@ -348,7 +348,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9298 {
9299 cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout,
9300 &key_copy);
9301- goto writeout;
9302+ goto maybe_cache_add;
9303 }
9304
9305 total = buffilled;
9306@@ -410,14 +410,12 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9307 }
9308
9309 if (he == NULL && fd != -1)
9310- {
9311- /* We write the dataset before inserting it to the database
9312- since while inserting this thread might block and so would
9313- unnecessarily let the receiver wait. */
9314- writeout:
9315+ /* We write the dataset before inserting it to the database since
9316+ while inserting this thread might block and so would
9317+ unnecessarily let the receiver wait. */
9318 writeall (fd, &dataset->resp, dataset->head.recsize);
9319- }
9320
9321+ maybe_cache_add:
9322 if (cacheable)
9323 {
9324 /* If necessary, we also propagate the data to disk. */
9325
9326commit c99f886de54446cd4447db6b44be93dabbdc2f8b
9327Author: Florian Weimer <fweimer@redhat.com>
9328Date: Thu Apr 25 15:01:07 2024 +0200
9329
9330 CVE-2024-33600: nscd: Avoid null pointer crashes after notfound response (bug 31678)
9331
9332 The addgetnetgrentX call in addinnetgrX may have failed to produce
9333 a result, so the result variable in addinnetgrX can be NULL.
9334 Use db->negtimeout as the fallback value if there is no result data;
9335 the timeout is also overwritten below.
9336
9337 Also avoid sending a second not-found response. (The client
9338 disconnects after receiving the first response, so the data stream did
9339 not go out of sync even without this fix.) It is still beneficial to
9340 add the negative response to the mapping, so that the client can get
9341 it from there in the future, instead of going through the socket.
9342
9343 Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
9344 (cherry picked from commit b048a482f088e53144d26a61c390bed0210f49f2)
9345
9346diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
9347index c18fe111f3..e22ffa5884 100644
9348--- a/nscd/netgroupcache.c
9349+++ b/nscd/netgroupcache.c
9350@@ -511,14 +511,15 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
9351
9352 datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len,
9353 sizeof (innetgroup_response_header),
9354- he == NULL ? 0 : dh->nreloads + 1, result->head.ttl);
9355+ he == NULL ? 0 : dh->nreloads + 1,
9356+ result == NULL ? db->negtimeout : result->head.ttl);
9357 /* Set the notfound status and timeout based on the result from
9358 getnetgrent. */
9359- dataset->head.notfound = result->head.notfound;
9360+ dataset->head.notfound = result == NULL || result->head.notfound;
9361 dataset->head.timeout = timeout;
9362
9363 dataset->resp.version = NSCD_VERSION;
9364- dataset->resp.found = result->resp.found;
9365+ dataset->resp.found = result != NULL && result->resp.found;
9366 /* Until we find a matching entry the result is 0. */
9367 dataset->resp.result = 0;
9368
9369@@ -566,7 +567,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
9370 goto out;
9371 }
9372
9373- if (he == NULL)
9374+ /* addgetnetgrentX may have already sent a notfound response. Do
9375+ not send another one. */
9376+ if (he == NULL && dataset->resp.found)
9377 {
9378 /* We write the dataset before inserting it to the database
9379 since while inserting this thread might block and so would
9380
9381commit a9a8d3eebb145779a18d90e3966009a1daa63cd8
9382Author: Florian Weimer <fweimer@redhat.com>
9383Date: Thu Apr 25 15:01:07 2024 +0200
9384
9385 CVE-2024-33601, CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX (bug 31680)
9386
9387 This avoids potential memory corruption when the underlying NSS
9388 callback function does not use the buffer space to store all strings
9389 (e.g., for constant strings).
9390
9391 Instead of custom buffer management, two scratch buffers are used.
9392 This increases stack usage somewhat.
9393
9394 Scratch buffer allocation failure is handled by return -1
9395 (an invalid timeout value) instead of terminating the process.
9396 This fixes bug 31679.
9397
9398 Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
9399 (cherry picked from commit c04a21e050d64a1193a6daab872bca2528bda44b)
9400
9401diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
9402index e22ffa5884..e8fe041846 100644
9403--- a/nscd/netgroupcache.c
9404+++ b/nscd/netgroupcache.c
9405@@ -23,6 +23,7 @@
9406 #include <stdlib.h>
9407 #include <unistd.h>
9408 #include <sys/mman.h>
9409+#include <scratch_buffer.h>
9410
9411 #include "../nss/netgroup.h"
9412 #include "nscd.h"
9413@@ -65,6 +66,16 @@ struct dataset
9414 char strdata[0];
9415 };
9416
9417+/* Send a notfound response to FD. Always returns -1 to indicate an
9418+ ephemeral error. */
9419+static time_t
9420+send_notfound (int fd)
9421+{
9422+ if (fd != -1)
9423+ TEMP_FAILURE_RETRY (send (fd, ¬found, sizeof (notfound), MSG_NOSIGNAL));
9424+ return -1;
9425+}
9426+
9427 /* Sends a notfound message and prepares a notfound dataset to write to the
9428 cache. Returns true if there was enough memory to allocate the dataset and
9429 returns the dataset in DATASETP, total bytes to write in TOTALP and the
9430@@ -83,8 +94,7 @@ do_notfound (struct database_dyn *db, int fd, request_header *req,
9431 total = sizeof (notfound);
9432 timeout = time (NULL) + db->negtimeout;
9433
9434- if (fd != -1)
9435- TEMP_FAILURE_RETRY (send (fd, ¬found, total, MSG_NOSIGNAL));
9436+ send_notfound (fd);
9437
9438 dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len, 1);
9439 /* If we cannot permanently store the result, so be it. */
9440@@ -109,11 +119,78 @@ do_notfound (struct database_dyn *db, int fd, request_header *req,
9441 return cacheable;
9442 }
9443
9444+struct addgetnetgrentX_scratch
9445+{
9446+ /* This is the result that the caller should use. It can be NULL,
9447+ point into buffer, or it can be in the cache. */
9448+ struct dataset *dataset;
9449+
9450+ struct scratch_buffer buffer;
9451+
9452+ /* Used internally in addgetnetgrentX as a staging area. */
9453+ struct scratch_buffer tmp;
9454+
9455+ /* Number of bytes in buffer that are actually used. */
9456+ size_t buffer_used;
9457+};
9458+
9459+static void
9460+addgetnetgrentX_scratch_init (struct addgetnetgrentX_scratch *scratch)
9461+{
9462+ scratch->dataset = NULL;
9463+ scratch_buffer_init (&scratch->buffer);
9464+ scratch_buffer_init (&scratch->tmp);
9465+
9466+ /* Reserve space for the header. */
9467+ scratch->buffer_used = sizeof (struct dataset);
9468+ static_assert (sizeof (struct dataset) < sizeof (scratch->tmp.__space),
9469+ "initial buffer space");
9470+ memset (scratch->tmp.data, 0, sizeof (struct dataset));
9471+}
9472+
9473+static void
9474+addgetnetgrentX_scratch_free (struct addgetnetgrentX_scratch *scratch)
9475+{
9476+ scratch_buffer_free (&scratch->buffer);
9477+ scratch_buffer_free (&scratch->tmp);
9478+}
9479+
9480+/* Copy LENGTH bytes from S into SCRATCH. Returns NULL if SCRATCH
9481+ could not be resized, otherwise a pointer to the copy. */
9482+static char *
9483+addgetnetgrentX_append_n (struct addgetnetgrentX_scratch *scratch,
9484+ const char *s, size_t length)
9485+{
9486+ while (true)
9487+ {
9488+ size_t remaining = scratch->buffer.length - scratch->buffer_used;
9489+ if (remaining >= length)
9490+ break;
9491+ if (!scratch_buffer_grow_preserve (&scratch->buffer))
9492+ return NULL;
9493+ }
9494+ char *copy = scratch->buffer.data + scratch->buffer_used;
9495+ memcpy (copy, s, length);
9496+ scratch->buffer_used += length;
9497+ return copy;
9498+}
9499+
9500+/* Copy S into SCRATCH, including its null terminator. Returns false
9501+ if SCRATCH could not be resized. */
9502+static bool
9503+addgetnetgrentX_append (struct addgetnetgrentX_scratch *scratch, const char *s)
9504+{
9505+ if (s == NULL)
9506+ s = "";
9507+ return addgetnetgrentX_append_n (scratch, s, strlen (s) + 1) != NULL;
9508+}
9509+
9510+/* Caller must initialize and free *SCRATCH. If the return value is
9511+ negative, this function has sent a notfound response. */
9512 static time_t
9513 addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9514 const char *key, uid_t uid, struct hashentry *he,
9515- struct datahead *dh, struct dataset **resultp,
9516- void **tofreep)
9517+ struct datahead *dh, struct addgetnetgrentX_scratch *scratch)
9518 {
9519 if (__glibc_unlikely (debug_level > 0))
9520 {
9521@@ -132,14 +209,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9522
9523 char *key_copy = NULL;
9524 struct __netgrent data;
9525- size_t buflen = MAX (1024, sizeof (*dataset) + req->key_len);
9526- size_t buffilled = sizeof (*dataset);
9527- char *buffer = NULL;
9528 size_t nentries = 0;
9529 size_t group_len = strlen (key) + 1;
9530 struct name_list *first_needed
9531 = alloca (sizeof (struct name_list) + group_len);
9532- *tofreep = NULL;
9533
9534 if (netgroup_database == NULL
9535 && !__nss_database_get (nss_database_netgroup, &netgroup_database))
9536@@ -151,8 +224,6 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9537 }
9538
9539 memset (&data, '\0', sizeof (data));
9540- buffer = xmalloc (buflen);
9541- *tofreep = buffer;
9542 first_needed->next = first_needed;
9543 memcpy (first_needed->name, key, group_len);
9544 data.needed_groups = first_needed;
9545@@ -195,8 +266,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9546 while (1)
9547 {
9548 int e;
9549- status = getfct.f (&data, buffer + buffilled,
9550- buflen - buffilled - req->key_len, &e);
9551+ status = getfct.f (&data, scratch->tmp.data,
9552+ scratch->tmp.length, &e);
9553 if (status == NSS_STATUS_SUCCESS)
9554 {
9555 if (data.type == triple_val)
9556@@ -204,68 +275,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9557 const char *nhost = data.val.triple.host;
9558 const char *nuser = data.val.triple.user;
9559 const char *ndomain = data.val.triple.domain;
9560-
9561- size_t hostlen = strlen (nhost ?: "") + 1;
9562- size_t userlen = strlen (nuser ?: "") + 1;
9563- size_t domainlen = strlen (ndomain ?: "") + 1;
9564-
9565- if (nhost == NULL || nuser == NULL || ndomain == NULL
9566- || nhost > nuser || nuser > ndomain)
9567- {
9568- const char *last = nhost;
9569- if (last == NULL
9570- || (nuser != NULL && nuser > last))
9571- last = nuser;
9572- if (last == NULL
9573- || (ndomain != NULL && ndomain > last))
9574- last = ndomain;
9575-
9576- size_t bufused
9577- = (last == NULL
9578- ? buffilled
9579- : last + strlen (last) + 1 - buffer);
9580-
9581- /* We have to make temporary copies. */
9582- size_t needed = hostlen + userlen + domainlen;
9583-
9584- if (buflen - req->key_len - bufused < needed)
9585- {
9586- buflen += MAX (buflen, 2 * needed);
9587- /* Save offset in the old buffer. We don't
9588- bother with the NULL check here since
9589- we'll do that later anyway. */
9590- size_t nhostdiff = nhost - buffer;
9591- size_t nuserdiff = nuser - buffer;
9592- size_t ndomaindiff = ndomain - buffer;
9593-
9594- char *newbuf = xrealloc (buffer, buflen);
9595- /* Fix up the triplet pointers into the new
9596- buffer. */
9597- nhost = (nhost ? newbuf + nhostdiff
9598- : NULL);
9599- nuser = (nuser ? newbuf + nuserdiff
9600- : NULL);
9601- ndomain = (ndomain ? newbuf + ndomaindiff
9602- : NULL);
9603- *tofreep = buffer = newbuf;
9604- }
9605-
9606- nhost = memcpy (buffer + bufused,
9607- nhost ?: "", hostlen);
9608- nuser = memcpy ((char *) nhost + hostlen,
9609- nuser ?: "", userlen);
9610- ndomain = memcpy ((char *) nuser + userlen,
9611- ndomain ?: "", domainlen);
9612- }
9613-
9614- char *wp = buffer + buffilled;
9615- wp = memmove (wp, nhost ?: "", hostlen);
9616- wp += hostlen;
9617- wp = memmove (wp, nuser ?: "", userlen);
9618- wp += userlen;
9619- wp = memmove (wp, ndomain ?: "", domainlen);
9620- wp += domainlen;
9621- buffilled = wp - buffer;
9622+ if (!(addgetnetgrentX_append (scratch, nhost)
9623+ && addgetnetgrentX_append (scratch, nuser)
9624+ && addgetnetgrentX_append (scratch, ndomain)))
9625+ return send_notfound (fd);
9626 ++nentries;
9627 }
9628 else
9629@@ -317,8 +330,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9630 }
9631 else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE)
9632 {
9633- buflen *= 2;
9634- *tofreep = buffer = xrealloc (buffer, buflen);
9635+ if (!scratch_buffer_grow (&scratch->tmp))
9636+ return send_notfound (fd);
9637 }
9638 else if (status == NSS_STATUS_RETURN
9639 || status == NSS_STATUS_NOTFOUND
9640@@ -351,10 +364,17 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9641 goto maybe_cache_add;
9642 }
9643
9644- total = buffilled;
9645+ /* Capture the result size without the key appended. */
9646+ total = scratch->buffer_used;
9647+
9648+ /* Make a copy of the key. The scratch buffer must not move after
9649+ this point. */
9650+ key_copy = addgetnetgrentX_append_n (scratch, key, req->key_len);
9651+ if (key_copy == NULL)
9652+ return send_notfound (fd);
9653
9654 /* Fill in the dataset. */
9655- dataset = (struct dataset *) buffer;
9656+ dataset = scratch->buffer.data;
9657 timeout = datahead_init_pos (&dataset->head, total + req->key_len,
9658 total - offsetof (struct dataset, resp),
9659 he == NULL ? 0 : dh->nreloads + 1,
9660@@ -363,11 +383,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9661 dataset->resp.version = NSCD_VERSION;
9662 dataset->resp.found = 1;
9663 dataset->resp.nresults = nentries;
9664- dataset->resp.result_len = buffilled - sizeof (*dataset);
9665-
9666- assert (buflen - buffilled >= req->key_len);
9667- key_copy = memcpy (buffer + buffilled, key, req->key_len);
9668- buffilled += req->key_len;
9669+ dataset->resp.result_len = total - sizeof (*dataset);
9670
9671 /* Now we can determine whether on refill we have to create a new
9672 record or not. */
9673@@ -398,7 +414,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9674 if (__glibc_likely (newp != NULL))
9675 {
9676 /* Adjust pointer into the memory block. */
9677- key_copy = (char *) newp + (key_copy - buffer);
9678+ key_copy = (char *) newp + (key_copy - (char *) dataset);
9679
9680 dataset = memcpy (newp, dataset, total + req->key_len);
9681 cacheable = true;
9682@@ -439,7 +455,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
9683 }
9684
9685 out:
9686- *resultp = dataset;
9687+ scratch->dataset = dataset;
9688
9689 return timeout;
9690 }
9691@@ -460,6 +476,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
9692 if (user != NULL)
9693 key = strchr (key, '\0') + 1;
9694 const char *domain = *key++ ? key : NULL;
9695+ struct addgetnetgrentX_scratch scratch;
9696+
9697+ addgetnetgrentX_scratch_init (&scratch);
9698
9699 if (__glibc_unlikely (debug_level > 0))
9700 {
9701@@ -475,12 +494,8 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
9702 group, group_len,
9703 db, uid);
9704 time_t timeout;
9705- void *tofree;
9706 if (result != NULL)
9707- {
9708- timeout = result->head.timeout;
9709- tofree = NULL;
9710- }
9711+ timeout = result->head.timeout;
9712 else
9713 {
9714 request_header req_get =
9715@@ -489,7 +504,10 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
9716 .key_len = group_len
9717 };
9718 timeout = addgetnetgrentX (db, -1, &req_get, group, uid, NULL, NULL,
9719- &result, &tofree);
9720+ &scratch);
9721+ result = scratch.dataset;
9722+ if (timeout < 0)
9723+ goto out;
9724 }
9725
9726 struct indataset
9727@@ -603,7 +621,7 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
9728 }
9729
9730 out:
9731- free (tofree);
9732+ addgetnetgrentX_scratch_free (&scratch);
9733 return timeout;
9734 }
9735
9736@@ -613,11 +631,12 @@ addgetnetgrentX_ignore (struct database_dyn *db, int fd, request_header *req,
9737 const char *key, uid_t uid, struct hashentry *he,
9738 struct datahead *dh)
9739 {
9740- struct dataset *ignore;
9741- void *tofree;
9742- time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh,
9743- &ignore, &tofree);
9744- free (tofree);
9745+ struct addgetnetgrentX_scratch scratch;
9746+ addgetnetgrentX_scratch_init (&scratch);
9747+ time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, &scratch);
9748+ addgetnetgrentX_scratch_free (&scratch);
9749+ if (timeout < 0)
9750+ timeout = 0;
9751 return timeout;
9752 }
9753
9754@@ -661,5 +680,9 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he,
9755 .key_len = he->len
9756 };
9757
9758- return addinnetgrX (db, -1, &req, db->data + he->key, he->owner, he, dh);
9759+ int timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner,
9760+ he, dh);
9761+ if (timeout < 0)
9762+ timeout = 0;
9763+ return timeout;
9764 }
9765
9766commit fd658f026f25cf59e8db243bc3b3e09cd5a20ba0
9767Author: H.J. Lu <hjl.tools@gmail.com>
9768Date: Thu Apr 25 08:06:52 2024 -0700
9769
9770 elf: Also compile dl-misc.os with $(rtld-early-cflags)
9771
9772 Also compile dl-misc.os with $(rtld-early-cflags) to avoid
9773
9774 Program received signal SIGILL, Illegal instruction.
9775 0x00007ffff7fd36ea in _dl_strtoul (nptr=nptr@entry=0x7fffffffe2c9 "2",
9776 endptr=endptr@entry=0x7fffffffd728) at dl-misc.c:156
9777 156 bool positive = true;
9778 (gdb) bt
9779 #0 0x00007ffff7fd36ea in _dl_strtoul (nptr=nptr@entry=0x7fffffffe2c9 "2",
9780 endptr=endptr@entry=0x7fffffffd728) at dl-misc.c:156
9781 #1 0x00007ffff7fdb1a9 in tunable_initialize (
9782 cur=cur@entry=0x7ffff7ffbc00 <tunable_list+2176>,
9783 strval=strval@entry=0x7fffffffe2c9 "2", len=len@entry=1)
9784 at dl-tunables.c:131
9785 #2 0x00007ffff7fdb3a2 in parse_tunables (valstring=<optimized out>)
9786 at dl-tunables.c:258
9787 #3 0x00007ffff7fdb5d9 in __GI___tunables_init (envp=0x7fffffffdd58)
9788 at dl-tunables.c:288
9789 #4 0x00007ffff7fe44c3 in _dl_sysdep_start (
9790 start_argptr=start_argptr@entry=0x7fffffffdcb0,
9791 dl_main=dl_main@entry=0x7ffff7fe5f80 <dl_main>)
9792 at ../sysdeps/unix/sysv/linux/dl-sysdep.c:110
9793 #5 0x00007ffff7fe5cae in _dl_start_final (arg=0x7fffffffdcb0) at rtld.c:494
9794 #6 _dl_start (arg=0x7fffffffdcb0) at rtld.c:581
9795 #7 0x00007ffff7fe4b38 in _start ()
9796 (gdb)
9797
9798 when setting GLIBC_TUNABLES in glibc compiled with APX.
9799 Reviewed-by: Florian Weimer <fweimer@redhat.com>
9800
9801 (cherry picked from commit 049b7684c912dd32b67b1b15b0f43bf07d5f512e)
9802
9803diff --git a/elf/Makefile b/elf/Makefile
9804index 69aa423c4b..a50a988e73 100644
9805--- a/elf/Makefile
9806+++ b/elf/Makefile
9807@@ -170,6 +170,7 @@ CFLAGS-.op += $(call elide-stack-protector,.op,$(elide-routines.os))
9808 CFLAGS-.os += $(call elide-stack-protector,.os,$(all-rtld-routines))
9809
9810 # Add the requested compiler flags to the early startup code.
9811+CFLAGS-dl-misc.os += $(rtld-early-cflags)
9812 CFLAGS-dl-printf.os += $(rtld-early-cflags)
9813 CFLAGS-dl-setup_hash.os += $(rtld-early-cflags)
9814 CFLAGS-dl-sysdep.os += $(rtld-early-cflags)
9815
9816commit 9831f98c266a8d56d1bf729b709c08e40375540c
9817Author: Florian Weimer <fweimer@redhat.com>
9818Date: Fri Apr 19 14:38:17 2024 +0200
9819
9820 login: Check default sizes of structs utmp, utmpx, lastlog
9821
9822 The default <utmp-size.h> is for ports with a 64-bit time_t.
9823 Ports with a 32-bit time_t or with __WORDSIZE_TIME64_COMPAT32=1
9824 need to override it.
9825
9826 Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
9827 (cherry picked from commit 4d4da5aab936504b2d3eca3146e109630d9093c4)
9828
9829diff --git a/login/Makefile b/login/Makefile
9830index 1e22008a61..b26ac42bfc 100644
9831--- a/login/Makefile
9832+++ b/login/Makefile
9833@@ -44,7 +44,7 @@ subdir-dirs = programs
9834 vpath %.c programs
9835
9836 tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \
9837- tst-pututxline-lockfail tst-pututxline-cache
9838+ tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size
9839
9840 # Empty compatibility library for old binaries.
9841 extra-libs := libutil
9842diff --git a/login/tst-utmp-size.c b/login/tst-utmp-size.c
9843new file mode 100644
9844index 0000000000..1b7f7ff042
9845--- /dev/null
9846+++ b/login/tst-utmp-size.c
9847@@ -0,0 +1,33 @@
9848+/* Check expected sizes of struct utmp, struct utmpx, struct lastlog.
9849+ Copyright (C) 2024 Free Software Foundation, Inc.
9850+ This file is part of the GNU C Library.
9851+
9852+ The GNU C Library is free software; you can redistribute it and/or
9853+ modify it under the terms of the GNU Lesser General Public
9854+ License as published by the Free Software Foundation; either
9855+ version 2.1 of the License, or (at your option) any later version.
9856+
9857+ The GNU C Library is distributed in the hope that it will be useful,
9858+ but WITHOUT ANY WARRANTY; without even the implied warranty of
9859+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9860+ Lesser General Public License for more details.
9861+
9862+ You should have received a copy of the GNU Lesser General Public
9863+ License along with the GNU C Library; if not, see
9864+ <https://www.gnu.org/licenses/>. */
9865+
9866+#include <utmp.h>
9867+#include <utmpx.h>
9868+#include <utmp-size.h>
9869+
9870+static int
9871+do_test (void)
9872+{
9873+ _Static_assert (sizeof (struct utmp) == UTMP_SIZE, "struct utmp size");
9874+ _Static_assert (sizeof (struct utmpx) == UTMP_SIZE, "struct utmpx size");
9875+ _Static_assert (sizeof (struct lastlog) == LASTLOG_SIZE,
9876+ "struct lastlog size");
9877+ return 0;
9878+}
9879+
9880+#include <support/test-driver.c>
9881diff --git a/sysdeps/arc/utmp-size.h b/sysdeps/arc/utmp-size.h
9882new file mode 100644
9883index 0000000000..a247fcd3da
9884--- /dev/null
9885+++ b/sysdeps/arc/utmp-size.h
9886@@ -0,0 +1,3 @@
9887+/* arc has less padding than other architectures with 64-bit time_t. */
9888+#define UTMP_SIZE 392
9889+#define LASTLOG_SIZE 296
9890diff --git a/sysdeps/arm/utmp-size.h b/sysdeps/arm/utmp-size.h
9891new file mode 100644
9892index 0000000000..8f21ebe1b6
9893--- /dev/null
9894+++ b/sysdeps/arm/utmp-size.h
9895@@ -0,0 +1,2 @@
9896+#define UTMP_SIZE 384
9897+#define LASTLOG_SIZE 292
9898diff --git a/sysdeps/csky/utmp-size.h b/sysdeps/csky/utmp-size.h
9899new file mode 100644
9900index 0000000000..8f21ebe1b6
9901--- /dev/null
9902+++ b/sysdeps/csky/utmp-size.h
9903@@ -0,0 +1,2 @@
9904+#define UTMP_SIZE 384
9905+#define LASTLOG_SIZE 292
9906diff --git a/sysdeps/generic/utmp-size.h b/sysdeps/generic/utmp-size.h
9907new file mode 100644
9908index 0000000000..89dbe878b0
9909--- /dev/null
9910+++ b/sysdeps/generic/utmp-size.h
9911@@ -0,0 +1,23 @@
9912+/* Expected sizes of utmp-related structures stored in files. 64-bit version.
9913+ Copyright (C) 2024 Free Software Foundation, Inc.
9914+ This file is part of the GNU C Library.
9915+
9916+ The GNU C Library is free software; you can redistribute it and/or
9917+ modify it under the terms of the GNU Lesser General Public
9918+ License as published by the Free Software Foundation; either
9919+ version 2.1 of the License, or (at your option) any later version.
9920+
9921+ The GNU C Library is distributed in the hope that it will be useful,
9922+ but WITHOUT ANY WARRANTY; without even the implied warranty of
9923+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9924+ Lesser General Public License for more details.
9925+
9926+ You should have received a copy of the GNU Lesser General Public
9927+ License along with the GNU C Library; if not, see
9928+ <https://www.gnu.org/licenses/>. */
9929+
9930+/* Expected size, in bytes, of struct utmp and struct utmpx. */
9931+#define UTMP_SIZE 400
9932+
9933+/* Expected size, in bytes, of struct lastlog. */
9934+#define LASTLOG_SIZE 296
9935diff --git a/sysdeps/hppa/utmp-size.h b/sysdeps/hppa/utmp-size.h
9936new file mode 100644
9937index 0000000000..8f21ebe1b6
9938--- /dev/null
9939+++ b/sysdeps/hppa/utmp-size.h
9940@@ -0,0 +1,2 @@
9941+#define UTMP_SIZE 384
9942+#define LASTLOG_SIZE 292
9943diff --git a/sysdeps/m68k/utmp-size.h b/sysdeps/m68k/utmp-size.h
9944new file mode 100644
9945index 0000000000..5946685819
9946--- /dev/null
9947+++ b/sysdeps/m68k/utmp-size.h
9948@@ -0,0 +1,3 @@
9949+/* m68k has 2-byte alignment. */
9950+#define UTMP_SIZE 382
9951+#define LASTLOG_SIZE 292
9952diff --git a/sysdeps/microblaze/utmp-size.h b/sysdeps/microblaze/utmp-size.h
9953new file mode 100644
9954index 0000000000..8f21ebe1b6
9955--- /dev/null
9956+++ b/sysdeps/microblaze/utmp-size.h
9957@@ -0,0 +1,2 @@
9958+#define UTMP_SIZE 384
9959+#define LASTLOG_SIZE 292
9960diff --git a/sysdeps/mips/utmp-size.h b/sysdeps/mips/utmp-size.h
9961new file mode 100644
9962index 0000000000..8f21ebe1b6
9963--- /dev/null
9964+++ b/sysdeps/mips/utmp-size.h
9965@@ -0,0 +1,2 @@
9966+#define UTMP_SIZE 384
9967+#define LASTLOG_SIZE 292
9968diff --git a/sysdeps/nios2/utmp-size.h b/sysdeps/nios2/utmp-size.h
9969new file mode 100644
9970index 0000000000..8f21ebe1b6
9971--- /dev/null
9972+++ b/sysdeps/nios2/utmp-size.h
9973@@ -0,0 +1,2 @@
9974+#define UTMP_SIZE 384
9975+#define LASTLOG_SIZE 292
9976diff --git a/sysdeps/or1k/utmp-size.h b/sysdeps/or1k/utmp-size.h
9977new file mode 100644
9978index 0000000000..6b3653aa4d
9979--- /dev/null
9980+++ b/sysdeps/or1k/utmp-size.h
9981@@ -0,0 +1,3 @@
9982+/* or1k has less padding than other architectures with 64-bit time_t. */
9983+#define UTMP_SIZE 392
9984+#define LASTLOG_SIZE 296
9985diff --git a/sysdeps/powerpc/utmp-size.h b/sysdeps/powerpc/utmp-size.h
9986new file mode 100644
9987index 0000000000..8f21ebe1b6
9988--- /dev/null
9989+++ b/sysdeps/powerpc/utmp-size.h
9990@@ -0,0 +1,2 @@
9991+#define UTMP_SIZE 384
9992+#define LASTLOG_SIZE 292
9993diff --git a/sysdeps/riscv/utmp-size.h b/sysdeps/riscv/utmp-size.h
9994new file mode 100644
9995index 0000000000..8f21ebe1b6
9996--- /dev/null
9997+++ b/sysdeps/riscv/utmp-size.h
9998@@ -0,0 +1,2 @@
9999+#define UTMP_SIZE 384
10000+#define LASTLOG_SIZE 292
10001diff --git a/sysdeps/sh/utmp-size.h b/sysdeps/sh/utmp-size.h
10002new file mode 100644
10003index 0000000000..8f21ebe1b6
10004--- /dev/null
10005+++ b/sysdeps/sh/utmp-size.h
10006@@ -0,0 +1,2 @@
10007+#define UTMP_SIZE 384
10008+#define LASTLOG_SIZE 292
10009diff --git a/sysdeps/sparc/utmp-size.h b/sysdeps/sparc/utmp-size.h
10010new file mode 100644
10011index 0000000000..8f21ebe1b6
10012--- /dev/null
10013+++ b/sysdeps/sparc/utmp-size.h
10014@@ -0,0 +1,2 @@
10015+#define UTMP_SIZE 384
10016+#define LASTLOG_SIZE 292
10017diff --git a/sysdeps/x86/utmp-size.h b/sysdeps/x86/utmp-size.h
10018new file mode 100644
10019index 0000000000..8f21ebe1b6
10020--- /dev/null
10021+++ b/sysdeps/x86/utmp-size.h
10022@@ -0,0 +1,2 @@
10023+#define UTMP_SIZE 384
10024+#define LASTLOG_SIZE 292
10025
10026commit 836d43b98973e0845b739ff5d3aad3af09dc7d0f
10027Author: Florian Weimer <fweimer@redhat.com>
10028Date: Fri Apr 19 14:38:17 2024 +0200
10029
10030 login: structs utmp, utmpx, lastlog _TIME_BITS independence (bug 30701)
10031
10032 These structs describe file formats under /var/log, and should not
10033 depend on the definition of _TIME_BITS. This is achieved by
10034 defining __WORDSIZE_TIME64_COMPAT32 to 1 on 32-bit ports that
10035 support 32-bit time_t values (where __time_t is 32 bits).
10036
10037 Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
10038 (cherry picked from commit 9abdae94c7454c45e02e97e4ed1eb1b1915d13d8)
10039
10040diff --git a/bits/wordsize.h b/bits/wordsize.h
10041index 14edae3a11..53013a9275 100644
10042--- a/bits/wordsize.h
10043+++ b/bits/wordsize.h
10044@@ -21,7 +21,9 @@
10045 #define __WORDSIZE32_PTRDIFF_LONG
10046
10047 /* Set to 1 in order to force time types to be 32 bits instead of 64 bits in
10048- struct lastlog and struct utmp{,x} on 64-bit ports. This may be done in
10049+ struct lastlog and struct utmp{,x}. This may be done in
10050 order to make 64-bit ports compatible with 32-bit ports. Set to 0 for
10051- 64-bit ports where the time types are 64-bits or for any 32-bit ports. */
10052+ 64-bit ports where the time types are 64-bits and new 32-bit ports
10053+ where time_t is 64 bits, and there is no companion architecture with
10054+ 32-bit time_t. */
10055 #define __WORDSIZE_TIME64_COMPAT32
10056diff --git a/login/Makefile b/login/Makefile
10057index b26ac42bfc..f91190e3dc 100644
10058--- a/login/Makefile
10059+++ b/login/Makefile
10060@@ -44,7 +44,9 @@ subdir-dirs = programs
10061 vpath %.c programs
10062
10063 tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \
10064- tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size
10065+ tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size tst-utmp-size-64
10066+
10067+CFLAGS-tst-utmp-size-64.c += -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64
10068
10069 # Empty compatibility library for old binaries.
10070 extra-libs := libutil
10071diff --git a/login/tst-utmp-size-64.c b/login/tst-utmp-size-64.c
10072new file mode 100644
10073index 0000000000..7a581a4c12
10074--- /dev/null
10075+++ b/login/tst-utmp-size-64.c
10076@@ -0,0 +1,2 @@
10077+/* The on-disk layout must not change in time64 mode. */
10078+#include "tst-utmp-size.c"
10079diff --git a/sysdeps/arm/bits/wordsize.h b/sysdeps/arm/bits/wordsize.h
10080new file mode 100644
10081index 0000000000..6ecbfe7c86
10082--- /dev/null
10083+++ b/sysdeps/arm/bits/wordsize.h
10084@@ -0,0 +1,21 @@
10085+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
10086+ This file is part of the GNU C Library.
10087+
10088+ The GNU C Library is free software; you can redistribute it and/or
10089+ modify it under the terms of the GNU Lesser General Public
10090+ License as published by the Free Software Foundation; either
10091+ version 2.1 of the License, or (at your option) any later version.
10092+
10093+ The GNU C Library is distributed in the hope that it will be useful,
10094+ but WITHOUT ANY WARRANTY; without even the implied warranty of
10095+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10096+ Lesser General Public License for more details.
10097+
10098+ You should have received a copy of the GNU Lesser General Public
10099+ License along with the GNU C Library; if not, see
10100+ <https://www.gnu.org/licenses/>. */
10101+
10102+#define __WORDSIZE 32
10103+#define __WORDSIZE_TIME64_COMPAT32 1
10104+#define __WORDSIZE32_SIZE_ULONG 0
10105+#define __WORDSIZE32_PTRDIFF_LONG 0
10106diff --git a/sysdeps/csky/bits/wordsize.h b/sysdeps/csky/bits/wordsize.h
10107new file mode 100644
10108index 0000000000..6ecbfe7c86
10109--- /dev/null
10110+++ b/sysdeps/csky/bits/wordsize.h
10111@@ -0,0 +1,21 @@
10112+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
10113+ This file is part of the GNU C Library.
10114+
10115+ The GNU C Library is free software; you can redistribute it and/or
10116+ modify it under the terms of the GNU Lesser General Public
10117+ License as published by the Free Software Foundation; either
10118+ version 2.1 of the License, or (at your option) any later version.
10119+
10120+ The GNU C Library is distributed in the hope that it will be useful,
10121+ but WITHOUT ANY WARRANTY; without even the implied warranty of
10122+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10123+ Lesser General Public License for more details.
10124+
10125+ You should have received a copy of the GNU Lesser General Public
10126+ License along with the GNU C Library; if not, see
10127+ <https://www.gnu.org/licenses/>. */
10128+
10129+#define __WORDSIZE 32
10130+#define __WORDSIZE_TIME64_COMPAT32 1
10131+#define __WORDSIZE32_SIZE_ULONG 0
10132+#define __WORDSIZE32_PTRDIFF_LONG 0
10133diff --git a/sysdeps/m68k/bits/wordsize.h b/sysdeps/m68k/bits/wordsize.h
10134new file mode 100644
10135index 0000000000..6ecbfe7c86
10136--- /dev/null
10137+++ b/sysdeps/m68k/bits/wordsize.h
10138@@ -0,0 +1,21 @@
10139+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
10140+ This file is part of the GNU C Library.
10141+
10142+ The GNU C Library is free software; you can redistribute it and/or
10143+ modify it under the terms of the GNU Lesser General Public
10144+ License as published by the Free Software Foundation; either
10145+ version 2.1 of the License, or (at your option) any later version.
10146+
10147+ The GNU C Library is distributed in the hope that it will be useful,
10148+ but WITHOUT ANY WARRANTY; without even the implied warranty of
10149+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10150+ Lesser General Public License for more details.
10151+
10152+ You should have received a copy of the GNU Lesser General Public
10153+ License along with the GNU C Library; if not, see
10154+ <https://www.gnu.org/licenses/>. */
10155+
10156+#define __WORDSIZE 32
10157+#define __WORDSIZE_TIME64_COMPAT32 1
10158+#define __WORDSIZE32_SIZE_ULONG 0
10159+#define __WORDSIZE32_PTRDIFF_LONG 0
10160diff --git a/sysdeps/microblaze/bits/wordsize.h b/sysdeps/microblaze/bits/wordsize.h
10161new file mode 100644
10162index 0000000000..6ecbfe7c86
10163--- /dev/null
10164+++ b/sysdeps/microblaze/bits/wordsize.h
10165@@ -0,0 +1,21 @@
10166+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
10167+ This file is part of the GNU C Library.
10168+
10169+ The GNU C Library is free software; you can redistribute it and/or
10170+ modify it under the terms of the GNU Lesser General Public
10171+ License as published by the Free Software Foundation; either
10172+ version 2.1 of the License, or (at your option) any later version.
10173+
10174+ The GNU C Library is distributed in the hope that it will be useful,
10175+ but WITHOUT ANY WARRANTY; without even the implied warranty of
10176+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10177+ Lesser General Public License for more details.
10178+
10179+ You should have received a copy of the GNU Lesser General Public
10180+ License along with the GNU C Library; if not, see
10181+ <https://www.gnu.org/licenses/>. */
10182+
10183+#define __WORDSIZE 32
10184+#define __WORDSIZE_TIME64_COMPAT32 1
10185+#define __WORDSIZE32_SIZE_ULONG 0
10186+#define __WORDSIZE32_PTRDIFF_LONG 0
10187diff --git a/sysdeps/mips/bits/wordsize.h b/sysdeps/mips/bits/wordsize.h
10188index 57f0f2a22f..30dd3fd85d 100644
10189--- a/sysdeps/mips/bits/wordsize.h
10190+++ b/sysdeps/mips/bits/wordsize.h
10191@@ -19,11 +19,7 @@
10192
10193 #define __WORDSIZE _MIPS_SZPTR
10194
10195-#if _MIPS_SIM == _ABI64
10196-# define __WORDSIZE_TIME64_COMPAT32 1
10197-#else
10198-# define __WORDSIZE_TIME64_COMPAT32 0
10199-#endif
10200+#define __WORDSIZE_TIME64_COMPAT32 1
10201
10202 #if __WORDSIZE == 32
10203 #define __WORDSIZE32_SIZE_ULONG 0
10204diff --git a/sysdeps/nios2/bits/wordsize.h b/sysdeps/nios2/bits/wordsize.h
10205new file mode 100644
10206index 0000000000..6ecbfe7c86
10207--- /dev/null
10208+++ b/sysdeps/nios2/bits/wordsize.h
10209@@ -0,0 +1,21 @@
10210+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
10211+ This file is part of the GNU C Library.
10212+
10213+ The GNU C Library is free software; you can redistribute it and/or
10214+ modify it under the terms of the GNU Lesser General Public
10215+ License as published by the Free Software Foundation; either
10216+ version 2.1 of the License, or (at your option) any later version.
10217+
10218+ The GNU C Library is distributed in the hope that it will be useful,
10219+ but WITHOUT ANY WARRANTY; without even the implied warranty of
10220+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10221+ Lesser General Public License for more details.
10222+
10223+ You should have received a copy of the GNU Lesser General Public
10224+ License along with the GNU C Library; if not, see
10225+ <https://www.gnu.org/licenses/>. */
10226+
10227+#define __WORDSIZE 32
10228+#define __WORDSIZE_TIME64_COMPAT32 1
10229+#define __WORDSIZE32_SIZE_ULONG 0
10230+#define __WORDSIZE32_PTRDIFF_LONG 0
10231diff --git a/sysdeps/powerpc/powerpc32/bits/wordsize.h b/sysdeps/powerpc/powerpc32/bits/wordsize.h
10232index 04ca9debf0..6993fb6b29 100644
10233--- a/sysdeps/powerpc/powerpc32/bits/wordsize.h
10234+++ b/sysdeps/powerpc/powerpc32/bits/wordsize.h
10235@@ -2,10 +2,9 @@
10236
10237 #if defined __powerpc64__
10238 # define __WORDSIZE 64
10239-# define __WORDSIZE_TIME64_COMPAT32 1
10240 #else
10241 # define __WORDSIZE 32
10242-# define __WORDSIZE_TIME64_COMPAT32 0
10243 # define __WORDSIZE32_SIZE_ULONG 0
10244 # define __WORDSIZE32_PTRDIFF_LONG 0
10245 #endif
10246+#define __WORDSIZE_TIME64_COMPAT32 1
10247diff --git a/sysdeps/powerpc/powerpc64/bits/wordsize.h b/sysdeps/powerpc/powerpc64/bits/wordsize.h
10248index 04ca9debf0..6993fb6b29 100644
10249--- a/sysdeps/powerpc/powerpc64/bits/wordsize.h
10250+++ b/sysdeps/powerpc/powerpc64/bits/wordsize.h
10251@@ -2,10 +2,9 @@
10252
10253 #if defined __powerpc64__
10254 # define __WORDSIZE 64
10255-# define __WORDSIZE_TIME64_COMPAT32 1
10256 #else
10257 # define __WORDSIZE 32
10258-# define __WORDSIZE_TIME64_COMPAT32 0
10259 # define __WORDSIZE32_SIZE_ULONG 0
10260 # define __WORDSIZE32_PTRDIFF_LONG 0
10261 #endif
10262+#define __WORDSIZE_TIME64_COMPAT32 1
10263diff --git a/sysdeps/sh/bits/wordsize.h b/sysdeps/sh/bits/wordsize.h
10264new file mode 100644
10265index 0000000000..6ecbfe7c86
10266--- /dev/null
10267+++ b/sysdeps/sh/bits/wordsize.h
10268@@ -0,0 +1,21 @@
10269+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
10270+ This file is part of the GNU C Library.
10271+
10272+ The GNU C Library is free software; you can redistribute it and/or
10273+ modify it under the terms of the GNU Lesser General Public
10274+ License as published by the Free Software Foundation; either
10275+ version 2.1 of the License, or (at your option) any later version.
10276+
10277+ The GNU C Library is distributed in the hope that it will be useful,
10278+ but WITHOUT ANY WARRANTY; without even the implied warranty of
10279+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10280+ Lesser General Public License for more details.
10281+
10282+ You should have received a copy of the GNU Lesser General Public
10283+ License along with the GNU C Library; if not, see
10284+ <https://www.gnu.org/licenses/>. */
10285+
10286+#define __WORDSIZE 32
10287+#define __WORDSIZE_TIME64_COMPAT32 1
10288+#define __WORDSIZE32_SIZE_ULONG 0
10289+#define __WORDSIZE32_PTRDIFF_LONG 0
10290diff --git a/sysdeps/sparc/sparc32/bits/wordsize.h b/sysdeps/sparc/sparc32/bits/wordsize.h
10291index 4bbd2e63b4..a2e79e0fa9 100644
10292--- a/sysdeps/sparc/sparc32/bits/wordsize.h
10293+++ b/sysdeps/sparc/sparc32/bits/wordsize.h
10294@@ -1,6 +1,6 @@
10295 /* Determine the wordsize from the preprocessor defines. */
10296
10297 #define __WORDSIZE 32
10298-#define __WORDSIZE_TIME64_COMPAT32 0
10299+#define __WORDSIZE_TIME64_COMPAT32 1
10300 #define __WORDSIZE32_SIZE_ULONG 0
10301 #define __WORDSIZE32_PTRDIFF_LONG 0
10302diff --git a/sysdeps/sparc/sparc64/bits/wordsize.h b/sysdeps/sparc/sparc64/bits/wordsize.h
10303index 2f66f10d72..ea103e5970 100644
10304--- a/sysdeps/sparc/sparc64/bits/wordsize.h
10305+++ b/sysdeps/sparc/sparc64/bits/wordsize.h
10306@@ -2,10 +2,9 @@
10307
10308 #if defined __arch64__ || defined __sparcv9
10309 # define __WORDSIZE 64
10310-# define __WORDSIZE_TIME64_COMPAT32 1
10311 #else
10312 # define __WORDSIZE 32
10313-# define __WORDSIZE_TIME64_COMPAT32 0
10314 # define __WORDSIZE32_SIZE_ULONG 0
10315 # define __WORDSIZE32_PTRDIFF_LONG 0
10316 #endif
10317+#define __WORDSIZE_TIME64_COMPAT32 1
10318diff --git a/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h
10319new file mode 100644
10320index 0000000000..6ecbfe7c86
10321--- /dev/null
10322+++ b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h
10323@@ -0,0 +1,21 @@
10324+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
10325+ This file is part of the GNU C Library.
10326+
10327+ The GNU C Library is free software; you can redistribute it and/or
10328+ modify it under the terms of the GNU Lesser General Public
10329+ License as published by the Free Software Foundation; either
10330+ version 2.1 of the License, or (at your option) any later version.
10331+
10332+ The GNU C Library is distributed in the hope that it will be useful,
10333+ but WITHOUT ANY WARRANTY; without even the implied warranty of
10334+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10335+ Lesser General Public License for more details.
10336+
10337+ You should have received a copy of the GNU Lesser General Public
10338+ License along with the GNU C Library; if not, see
10339+ <https://www.gnu.org/licenses/>. */
10340+
10341+#define __WORDSIZE 32
10342+#define __WORDSIZE_TIME64_COMPAT32 1
10343+#define __WORDSIZE32_SIZE_ULONG 0
10344+#define __WORDSIZE32_PTRDIFF_LONG 0
10345diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
10346index 04ca9debf0..6993fb6b29 100644
10347--- a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
10348+++ b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
10349@@ -2,10 +2,9 @@
10350
10351 #if defined __powerpc64__
10352 # define __WORDSIZE 64
10353-# define __WORDSIZE_TIME64_COMPAT32 1
10354 #else
10355 # define __WORDSIZE 32
10356-# define __WORDSIZE_TIME64_COMPAT32 0
10357 # define __WORDSIZE32_SIZE_ULONG 0
10358 # define __WORDSIZE32_PTRDIFF_LONG 0
10359 #endif
10360+#define __WORDSIZE_TIME64_COMPAT32 1
10361diff --git a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
10362index 7562875ee2..ea103e5970 100644
10363--- a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
10364+++ b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
10365@@ -2,10 +2,9 @@
10366
10367 #if defined __arch64__ || defined __sparcv9
10368 # define __WORDSIZE 64
10369-# define __WORDSIZE_TIME64_COMPAT32 1
10370 #else
10371 # define __WORDSIZE 32
10372 # define __WORDSIZE32_SIZE_ULONG 0
10373 # define __WORDSIZE32_PTRDIFF_LONG 0
10374-# define __WORDSIZE_TIME64_COMPAT32 0
10375 #endif
10376+#define __WORDSIZE_TIME64_COMPAT32 1
10377diff --git a/sysdeps/x86/bits/wordsize.h b/sysdeps/x86/bits/wordsize.h
10378index 70f652bca1..3f40aa76f9 100644
10379--- a/sysdeps/x86/bits/wordsize.h
10380+++ b/sysdeps/x86/bits/wordsize.h
10381@@ -8,10 +8,9 @@
10382 #define __WORDSIZE32_PTRDIFF_LONG 0
10383 #endif
10384
10385+#define __WORDSIZE_TIME64_COMPAT32 1
10386+
10387 #ifdef __x86_64__
10388-# define __WORDSIZE_TIME64_COMPAT32 1
10389 /* Both x86-64 and x32 use the 64-bit system call interface. */
10390 # define __SYSCALL_WORDSIZE 64
10391-#else
10392-# define __WORDSIZE_TIME64_COMPAT32 0
10393 #endif
10394
10395commit acc56074b0a5127631a64640aef1b7c5c103ebd8
10396Author: Florian Weimer <fweimer@redhat.com>
10397Date: Thu May 2 17:06:19 2024 +0200
10398
10399 nscd: Use time_t for return type of addgetnetgrentX
10400
10401 Using int may give false results for future dates (timeouts after the
10402 year 2028).
10403
10404 Fixes commit 04a21e050d64a1193a6daab872bca2528bda44b ("CVE-2024-33601,
10405 CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX
10406 (bug 31680)").
10407
10408 Reviewed-by: Carlos O'Donell <carlos@redhat.com>
10409 (cherry picked from commit 4bbca1a44691a6e9adcee5c6798a707b626bc331)
10410
10411diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
10412index e8fe041846..01d554af9c 100644
10413--- a/nscd/netgroupcache.c
10414+++ b/nscd/netgroupcache.c
10415@@ -680,8 +680,8 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he,
10416 .key_len = he->len
10417 };
10418
10419- int timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner,
10420- he, dh);
10421+ time_t timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner,
10422+ he, dh);
10423 if (timeout < 0)
10424 timeout = 0;
10425 return timeout;
10426
10427commit 273a835fe7c685cc54266bb8b502787bad5e9bae
10428Author: Carlos O'Donell <carlos@redhat.com>
10429Date: Tue Apr 23 13:30:37 2024 -0400
10430
10431 time: Allow later version licensing.
10432
10433 The FSF's Licensing and Compliance Lab noted a discrepancy in the
10434 licensing of several files in the glibc package.
10435
10436 When timespect_get.c was impelemented the license did not include
10437 the standard ", or (at your option) any later version." text.
10438
10439 Change the license in timespec_get.c and all copied files to match
10440 the expected license.
10441
10442 This change was previously approved in principle by the FSF in
10443 RT ticket #1316403. And a similar instance was fixed in
10444 commit 46703efa02f6ddebce5ee54c92f7c32598de0de6.
10445
10446 (cherry picked from commit 91695ee4598b39d181ab8df579b888a8863c4cab)
10447
10448diff --git a/sysdeps/unix/sysv/linux/timespec_get.c b/sysdeps/unix/sysv/linux/timespec_get.c
10449index c6e5e66289..778d1e3354 100644
10450--- a/sysdeps/unix/sysv/linux/timespec_get.c
10451+++ b/sysdeps/unix/sysv/linux/timespec_get.c
10452@@ -5,7 +5,7 @@
10453 The GNU C Library is free software; you can redistribute it and/or
10454 modify it under the terms of the GNU Lesser General Public
10455 License as published by the Free Software Foundation; either
10456- version 2.1 of the License.
10457+ version 2.1 of the License, or (at your option) any later version.
10458
10459 The GNU C Library is distributed in the hope that it will be useful,
10460 but WITHOUT ANY WARRANTY; without even the implied warranty of
10461diff --git a/sysdeps/unix/sysv/linux/timespec_getres.c b/sysdeps/unix/sysv/linux/timespec_getres.c
10462index 5acebe2a2c..2eef9e512c 100644
10463--- a/sysdeps/unix/sysv/linux/timespec_getres.c
10464+++ b/sysdeps/unix/sysv/linux/timespec_getres.c
10465@@ -5,7 +5,7 @@
10466 The GNU C Library is free software; you can redistribute it and/or
10467 modify it under the terms of the GNU Lesser General Public
10468 License as published by the Free Software Foundation; either
10469- version 2.1 of the License.
10470+ version 2.1 of the License, or (at your option) any later version.
10471
10472 The GNU C Library is distributed in the hope that it will be useful,
10473 but WITHOUT ANY WARRANTY; without even the implied warranty of
10474diff --git a/time/timespec_get.c b/time/timespec_get.c
10475index b031e42ca2..26a044bca6 100644
10476--- a/time/timespec_get.c
10477+++ b/time/timespec_get.c
10478@@ -4,7 +4,7 @@
10479 The GNU C Library is free software; you can redistribute it and/or
10480 modify it under the terms of the GNU Lesser General Public
10481 License as published by the Free Software Foundation; either
10482- version 2.1 of the License.
10483+ version 2.1 of the License, or (at your option) any later version.
10484
10485 The GNU C Library is distributed in the hope that it will be useful,
10486 but WITHOUT ANY WARRANTY; without even the implied warranty of
10487diff --git a/time/timespec_getres.c b/time/timespec_getres.c
10488index edb397507c..2e18b8bcac 100644
10489--- a/time/timespec_getres.c
10490+++ b/time/timespec_getres.c
10491@@ -5,7 +5,7 @@
10492 The GNU C Library is free software; you can redistribute it and/or
10493 modify it under the terms of the GNU Lesser General Public
10494 License as published by the Free Software Foundation; either
10495- version 2.1 of the License.
10496+ version 2.1 of the License, or (at your option) any later version.
10497
10498 The GNU C Library is distributed in the hope that it will be useful,
10499 but WITHOUT ANY WARRANTY; without even the implied warranty of
10500
10501commit 3148714ab61ad61281bae5a30f530d637034ac3b
10502Author: Gabi Falk <gabifalk@gmx.com>
10503Date: Tue Apr 30 20:05:02 2024 +0000
10504
10505 i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk
10506
10507 /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy_chk.o): in function `__memcpy_chk':
10508 /home/bmg/src/glibc/debug/../sysdeps/i386/memcpy_chk.S:29: multiple definition of `__memcpy_chk';/home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy_chk.o): in function `__mempcpy_chk': /home/bmg/src/glibc/debug/../sysdeps/i386/mempcpy_chk.S:28: multiple definition of `__mempcpy_chk'; /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here
10509
10510 After this change, the static library built for i586, regardless of PIC
10511 options, contains implementations of these functions respectively from
10512 sysdeps/i386/memcpy_chk.S and sysdeps/i386/mempcpy_chk.S. This ensures
10513 that memcpy and mempcpy won't pull in __chk_fail and the routines it
10514 calls.
10515
10516 Reported-by: Florian Weimer <fweimer@redhat.com>
10517 Signed-off-by: Gabi Falk <gabifalk@gmx.com>
10518 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
10519 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
10520 (cherry picked from commit 789894a2f554d4503ecb2f13b2b4e93e43414f33)
10521
10522diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S
10523index 3e26f112d6..79856d498a 100644
10524--- a/sysdeps/i386/i586/memcpy.S
10525+++ b/sysdeps/i386/i586/memcpy.S
10526@@ -26,7 +26,7 @@
10527 #define LEN SRC+4
10528
10529 .text
10530-#if defined PIC && IS_IN (libc)
10531+#if defined SHARED && IS_IN (libc)
10532 ENTRY (__memcpy_chk)
10533 movl 12(%esp), %eax
10534 cmpl %eax, 16(%esp)
10535
10536commit ad92c483a4bd34db1cfb3eb625212ea64848244f
10537Author: Gabi Falk <gabifalk@gmx.com>
10538Date: Tue Apr 30 20:05:03 2024 +0000
10539
10540 i686: Fix multiple definitions of __memmove_chk and __memset_chk
10541
10542 Commit c73c96a4a1af1326df7f96eec58209e1e04066d8 updated memcpy.S and
10543 mempcpy.S, but omitted memmove.S and memset.S. As a result, the static
10544 library built as PIC, whether with or without multiarch support,
10545 contains two definitions for each of the __memmove_chk and __memset_chk
10546 symbols.
10547
10548 /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../i686-pc-linux-gnu/bin/ld: /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset-ia32.o): in function `__memset_chk':
10549 /var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/string/../sysdeps/i386/i686/memset.S:32: multiple definition of `__memset_chk'; /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset_chk.o):/var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/debug/../sysdeps/i386/i686/multiarch/memset_chk.c:24: first defined here
10550
10551 After this change, regardless of PIC options, the static library, built
10552 for i686 with multiarch contains implementations of these functions
10553 respectively from debug/memmove_chk.c and debug/memset_chk.c, and
10554 without multiarch contains implementations of these functions
10555 respectively from sysdeps/i386/memmove_chk.S and
10556 sysdeps/i386/memset_chk.S. This ensures that memmove and memset won't
10557 pull in __chk_fail and the routines it calls.
10558
10559 Reported-by: Sam James <sam@gentoo.org>
10560 Tested-by: Sam James <sam@gentoo.org>
10561 Fixes: c73c96a4a1 ("i686: Fix build with --disable-multiarch")
10562 Signed-off-by: Gabi Falk <gabifalk@gmx.com>
10563 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
10564 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
10565 (cherry picked from commit 5a2cf833f5772d6c37c7adac388dd9af9cc1c4b9)
10566
10567diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
10568index f230359ad6..effd958120 100644
10569--- a/sysdeps/i386/i686/memmove.S
10570+++ b/sysdeps/i386/i686/memmove.S
10571@@ -29,7 +29,7 @@
10572 #define SRC DEST+4
10573 #define LEN SRC+4
10574
10575-#if defined PIC && IS_IN (libc)
10576+#if defined SHARED && IS_IN (libc)
10577 ENTRY_CHK (__memmove_chk)
10578 movl 12(%esp), %eax
10579 cmpl %eax, 16(%esp)
10580diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
10581index f02f5a6df7..ab06771ea0 100644
10582--- a/sysdeps/i386/i686/memset.S
10583+++ b/sysdeps/i386/i686/memset.S
10584@@ -27,7 +27,7 @@
10585 #define LEN CHR+4
10586
10587 .text
10588-#if defined PIC && IS_IN (libc)
10589+#if defined SHARED && IS_IN (libc)
10590 ENTRY_CHK (__memset_chk)
10591 movl 12(%esp), %eax
10592 cmpl %eax, 16(%esp)
10593
10594commit ff110b2591f0bdeccd121c3726af19c62d6fb184
10595Author: Gabi Falk <gabifalk@gmx.com>
10596Date: Tue Apr 30 20:05:04 2024 +0000
10597
10598 Add a test to check for duplicate definitions in the static library
10599
10600 This change follows two previous fixes addressing multiple definitions
10601 of __memcpy_chk and __mempcpy_chk functions on i586, and __memmove_chk
10602 and __memset_chk functions on i686. The test is intended to prevent
10603 such issues from occurring in the future.
10604
10605 Signed-off-by: Gabi Falk <gabifalk@gmx.com>
10606 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
10607 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
10608 (cherry picked from commit ded2e0753e9c46debeb2e0d26c5e560d2581d314)
10609
10610diff --git a/Makefile b/Makefile
10611index 7052b46df8..2e351c0321 100644
10612--- a/Makefile
10613+++ b/Makefile
10614@@ -577,6 +577,13 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh
10615 $(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \
10616 $(evaluate-test)
10617
10618+# Link libc.a as a whole to verify that it does not contain multiple
10619+# definitions of any symbols.
10620+tests-special += $(objpfx)link-static-libc.out
10621+$(objpfx)link-static-libc.out:
10622+ $(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
10623+ $(evaluate-test)
10624+
10625 # Print test summary for tests in $1 .sum file;
10626 # $2 is optional test identifier.
10627 # Fail if there are unexpected failures in the test results.
10628
10629commit fa616ea3730cb42046d19f28d611be0bc390af7c
10630Author: Sam James <sam@gentoo.org>
10631Date: Sat May 4 13:28:13 2024 +0100
10632
10633 Revert "Add a test to check for duplicate definitions in the static library"
10634
10635 This reverts commit ff110b2591f0bdeccd121c3726af19c62d6fb184.
10636
10637 I had the wrong cherry-pick reference (the commit content is right; it's
10638 just referring to a base that isn't upstream), but let's revert and reapply
10639 for clarity.
10640
10641 Signed-off-by: Sam James <sam@gentoo.org>
10642
10643diff --git a/Makefile b/Makefile
10644index 2e351c0321..7052b46df8 100644
10645--- a/Makefile
10646+++ b/Makefile
10647@@ -577,13 +577,6 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh
10648 $(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \
10649 $(evaluate-test)
10650
10651-# Link libc.a as a whole to verify that it does not contain multiple
10652-# definitions of any symbols.
10653-tests-special += $(objpfx)link-static-libc.out
10654-$(objpfx)link-static-libc.out:
10655- $(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
10656- $(evaluate-test)
10657-
10658 # Print test summary for tests in $1 .sum file;
10659 # $2 is optional test identifier.
10660 # Fail if there are unexpected failures in the test results.
10661
10662commit c16871e662cd0f3370173d916864b19e69f1bc9a
10663Author: Sam James <sam@gentoo.org>
10664Date: Sat May 4 13:28:51 2024 +0100
10665
10666 Revert "i686: Fix multiple definitions of __memmove_chk and __memset_chk"
10667
10668 This reverts commit ad92c483a4bd34db1cfb3eb625212ea64848244f.
10669
10670 I had the wrong cherry-pick reference (the commit content is right; it's
10671 just referring to a base that isn't upstream), but let's revert and reapply
10672 for clarity.
10673
10674 Signed-off-by: Sam James <sam@gentoo.org>
10675
10676diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
10677index effd958120..f230359ad6 100644
10678--- a/sysdeps/i386/i686/memmove.S
10679+++ b/sysdeps/i386/i686/memmove.S
10680@@ -29,7 +29,7 @@
10681 #define SRC DEST+4
10682 #define LEN SRC+4
10683
10684-#if defined SHARED && IS_IN (libc)
10685+#if defined PIC && IS_IN (libc)
10686 ENTRY_CHK (__memmove_chk)
10687 movl 12(%esp), %eax
10688 cmpl %eax, 16(%esp)
10689diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
10690index ab06771ea0..f02f5a6df7 100644
10691--- a/sysdeps/i386/i686/memset.S
10692+++ b/sysdeps/i386/i686/memset.S
10693@@ -27,7 +27,7 @@
10694 #define LEN CHR+4
10695
10696 .text
10697-#if defined SHARED && IS_IN (libc)
10698+#if defined PIC && IS_IN (libc)
10699 ENTRY_CHK (__memset_chk)
10700 movl 12(%esp), %eax
10701 cmpl %eax, 16(%esp)
10702
10703commit 5141d4d83c17406f0eaea3e345ef2b52e10f386e
10704Author: Sam James <sam@gentoo.org>
10705Date: Sat May 4 13:28:54 2024 +0100
10706
10707 Revert "i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk"
10708
10709 This reverts commit 3148714ab61ad61281bae5a30f530d637034ac3b.
10710
10711 I had the wrong cherry-pick reference (the commit content is right; it's
10712 just referring to a base that isn't upstream), but let's revert and reapply
10713 for clarity.
10714
10715 Signed-off-by: Sam James <sam@gentoo.org>
10716
10717diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S
10718index 79856d498a..3e26f112d6 100644
10719--- a/sysdeps/i386/i586/memcpy.S
10720+++ b/sysdeps/i386/i586/memcpy.S
10721@@ -26,7 +26,7 @@
10722 #define LEN SRC+4
10723
10724 .text
10725-#if defined SHARED && IS_IN (libc)
10726+#if defined PIC && IS_IN (libc)
10727 ENTRY (__memcpy_chk)
10728 movl 12(%esp), %eax
10729 cmpl %eax, 16(%esp)
10730
10731commit 8323a83abd73446dc434aceff66219712c09140b
10732Author: Gabi Falk <gabifalk@gmx.com>
10733Date: Tue Apr 30 20:05:02 2024 +0000
10734
10735 i586: Fix multiple definitions of __memcpy_chk and __mempcpy_chk
10736
10737 /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy_chk.o): in function `__memcpy_chk':
10738 /home/bmg/src/glibc/debug/../sysdeps/i386/memcpy_chk.S:29: multiple definition of `__memcpy_chk';/home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(memcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here /home/bmg/install/compilers/x86_64-linux-gnu/lib/gcc/x86_64-glibc-linux-gnu/13.2.1/../../../../x86_64-glibc-linux-gnu/bin/ld: /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy_chk.o): in function `__mempcpy_chk': /home/bmg/src/glibc/debug/../sysdeps/i386/mempcpy_chk.S:28: multiple definition of `__mempcpy_chk'; /home/bmg/build/glibcs/i586-linux-gnu/glibc/libc.a(mempcpy.o):/home/bmg/src/glibc/string/../sysdeps/i386/i586/memcpy.S:31: first defined here
10739
10740 After this change, the static library built for i586, regardless of PIC
10741 options, contains implementations of these functions respectively from
10742 sysdeps/i386/memcpy_chk.S and sysdeps/i386/mempcpy_chk.S. This ensures
10743 that memcpy and mempcpy won't pull in __chk_fail and the routines it
10744 calls.
10745
10746 Reported-by: Florian Weimer <fweimer@redhat.com>
10747 Signed-off-by: Gabi Falk <gabifalk@gmx.com>
10748 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
10749 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
10750 (cherry picked from commit 0fdf4ba48ccce5abf567340b0ab8fa8ed8a9bc6e)
10751
10752diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S
10753index 3e26f112d6..79856d498a 100644
10754--- a/sysdeps/i386/i586/memcpy.S
10755+++ b/sysdeps/i386/i586/memcpy.S
10756@@ -26,7 +26,7 @@
10757 #define LEN SRC+4
10758
10759 .text
10760-#if defined PIC && IS_IN (libc)
10761+#if defined SHARED && IS_IN (libc)
10762 ENTRY (__memcpy_chk)
10763 movl 12(%esp), %eax
10764 cmpl %eax, 16(%esp)
10765
10766commit 8b005d7869debac4d5cd67f65e49a0fad89da9ad
10767Author: Gabi Falk <gabifalk@gmx.com>
10768Date: Tue Apr 30 20:05:03 2024 +0000
10769
10770 i686: Fix multiple definitions of __memmove_chk and __memset_chk
10771
10772 Commit c73c96a4a1af1326df7f96eec58209e1e04066d8 updated memcpy.S and
10773 mempcpy.S, but omitted memmove.S and memset.S. As a result, the static
10774 library built as PIC, whether with or without multiarch support,
10775 contains two definitions for each of the __memmove_chk and __memset_chk
10776 symbols.
10777
10778 /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../i686-pc-linux-gnu/bin/ld: /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset-ia32.o): in function `__memset_chk':
10779 /var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/string/../sysdeps/i386/i686/memset.S:32: multiple definition of `__memset_chk'; /usr/lib/gcc/i686-pc-linux-gnu/14/../../../../lib/libc.a(memset_chk.o):/var/tmp/portage/sys-libs/glibc-2.39-r3/work/glibc-2.39/debug/../sysdeps/i386/i686/multiarch/memset_chk.c:24: first defined here
10780
10781 After this change, regardless of PIC options, the static library, built
10782 for i686 with multiarch contains implementations of these functions
10783 respectively from debug/memmove_chk.c and debug/memset_chk.c, and
10784 without multiarch contains implementations of these functions
10785 respectively from sysdeps/i386/memmove_chk.S and
10786 sysdeps/i386/memset_chk.S. This ensures that memmove and memset won't
10787 pull in __chk_fail and the routines it calls.
10788
10789 Reported-by: Sam James <sam@gentoo.org>
10790 Tested-by: Sam James <sam@gentoo.org>
10791 Fixes: c73c96a4a1 ("i686: Fix build with --disable-multiarch")
10792 Signed-off-by: Gabi Falk <gabifalk@gmx.com>
10793 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
10794 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
10795 (cherry picked from commit 5a2cf833f5772d6c37c7adac388dd9af9cc1c4b9)
10796
10797diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
10798index f230359ad6..effd958120 100644
10799--- a/sysdeps/i386/i686/memmove.S
10800+++ b/sysdeps/i386/i686/memmove.S
10801@@ -29,7 +29,7 @@
10802 #define SRC DEST+4
10803 #define LEN SRC+4
10804
10805-#if defined PIC && IS_IN (libc)
10806+#if defined SHARED && IS_IN (libc)
10807 ENTRY_CHK (__memmove_chk)
10808 movl 12(%esp), %eax
10809 cmpl %eax, 16(%esp)
10810diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
10811index f02f5a6df7..ab06771ea0 100644
10812--- a/sysdeps/i386/i686/memset.S
10813+++ b/sysdeps/i386/i686/memset.S
10814@@ -27,7 +27,7 @@
10815 #define LEN CHR+4
10816
10817 .text
10818-#if defined PIC && IS_IN (libc)
10819+#if defined SHARED && IS_IN (libc)
10820 ENTRY_CHK (__memset_chk)
10821 movl 12(%esp), %eax
10822 cmpl %eax, 16(%esp)
10823
10824commit f8e462342189525e4605cf233b8f798d1c7f398d
10825Author: Gabi Falk <gabifalk@gmx.com>
10826Date: Tue Apr 30 20:05:04 2024 +0000
10827
10828 Add a test to check for duplicate definitions in the static library
10829
10830 This change follows two previous fixes addressing multiple definitions
10831 of __memcpy_chk and __mempcpy_chk functions on i586, and __memmove_chk
10832 and __memset_chk functions on i686. The test is intended to prevent
10833 such issues from occurring in the future.
10834
10835 Signed-off-by: Gabi Falk <gabifalk@gmx.com>
10836 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
10837 Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
10838 (cherry picked from commit ded2e0753e9c46debeb2e0d26c5e560d2581d314)
10839
10840diff --git a/Makefile b/Makefile
10841index 7052b46df8..2e351c0321 100644
10842--- a/Makefile
10843+++ b/Makefile
10844@@ -577,6 +577,13 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh
10845 $(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \
10846 $(evaluate-test)
10847
10848+# Link libc.a as a whole to verify that it does not contain multiple
10849+# definitions of any symbols.
10850+tests-special += $(objpfx)link-static-libc.out
10851+$(objpfx)link-static-libc.out:
10852+ $(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
10853+ $(evaluate-test)
10854+
10855 # Print test summary for tests in $1 .sum file;
10856 # $2 is optional test identifier.
10857 # Fail if there are unexpected failures in the test results.