commit 6daa77104520ca992a9369bd01cccd4d98c82984 Author: Andreas K. Hüttel Date: Sun Jul 21 19:02:10 2024 +0200 Replace advisories directory Signed-off-by: Andreas K. Hüttel diff --git a/advisories/GLIBC-SA-2023-0001 b/advisories/GLIBC-SA-2023-0001 deleted file mode 100644 index 3d19c91b6a..0000000000 --- a/advisories/GLIBC-SA-2023-0001 +++ /dev/null @@ -1,14 +0,0 @@ -printf: incorrect output for integers with thousands separator and width field - -When the printf family of functions is called with a format specifier -that uses an (enable grouping) and a minimum width -specifier, the resulting output could be larger than reasonably expected -by a caller that computed a tight bound on the buffer size. The -resulting larger than expected output could result in a buffer overflow -in the printf family of functions. - -CVE-Id: CVE-2023-25139 -Public-Date: 2023-02-02 -Vulnerable-Commit: e88b9f0e5cc50cab57a299dc7efe1a4eb385161d (2.37) -Fix-Commit: c980549cc6a1c03c23cc2fe3e7b0fe626a0364b0 (2.38) -Fix-Commit: 07b9521fc6369d000216b96562ff7c0ed32a16c4 (2.37-4) diff --git a/advisories/GLIBC-SA-2023-0002 b/advisories/GLIBC-SA-2023-0002 deleted file mode 100644 index 5122669a64..0000000000 --- a/advisories/GLIBC-SA-2023-0002 +++ /dev/null @@ -1,15 +0,0 @@ -getaddrinfo: Stack read overflow in no-aaaa mode - -If the system is configured in no-aaaa mode via /etc/resolv.conf, -getaddrinfo is called for the AF_UNSPEC address family, and a DNS -response is received over TCP that is larger than 2048 bytes, -getaddrinfo may potentially disclose stack contents via the returned -address data, or crash. - -CVE-Id: CVE-2023-4527 -Public-Date: 2023-09-12 -Vulnerable-Commit: f282cdbe7f436c75864e5640a409a10485e9abb2 (2.36) -Fix-Commit: bd77dd7e73e3530203be1c52c8a29d08270cb25d (2.39) -Fix-Commit: 4ea972b7edd7e36610e8cde18bf7a8149d7bac4f (2.36-113) -Fix-Commit: b7529346025a130fee483d42178b5c118da971bb (2.37-38) -Fix-Commit: b25508dd774b617f99419bdc3cf2ace4560cd2d6 (2.38-19) diff --git a/advisories/GLIBC-SA-2023-0003 b/advisories/GLIBC-SA-2023-0003 deleted file mode 100644 index d3aef80348..0000000000 --- a/advisories/GLIBC-SA-2023-0003 +++ /dev/null @@ -1,15 +0,0 @@ -getaddrinfo: Potential use-after-free - -When an NSS plugin only implements the _gethostbyname2_r and -_getcanonname_r callbacks, getaddrinfo could use memory that was freed -during buffer resizing, potentially causing a crash or read or write to -arbitrary memory. - -CVE-Id: CVE-2023-4806 -Public-Date: 2023-09-12 -Fix-Commit: 973fe93a5675c42798b2161c6f29c01b0e243994 (2.39) -Fix-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420) -Fix-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270) -Fix-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115) -Fix-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39) -Fix-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20) diff --git a/advisories/GLIBC-SA-2023-0004 b/advisories/GLIBC-SA-2023-0004 deleted file mode 100644 index 5286a7aa54..0000000000 --- a/advisories/GLIBC-SA-2023-0004 +++ /dev/null @@ -1,16 +0,0 @@ -tunables: local privilege escalation through buffer overflow - -If a tunable of the form NAME=NAME=VAL is passed in the environment of a -setuid program and NAME is valid, it may result in a buffer overflow, -which could be exploited to achieve escalated privileges. This flaw was -introduced in glibc 2.34. - -CVE-Id: CVE-2023-4911 -Public-Date: 2023-10-03 -Vulnerable-Commit: 2ed18c5b534d9e92fc006202a5af0df6b72e7aca (2.34) -Fix-Commit: 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa (2.39) -Fix-Commit: dcc367f148bc92e7f3778a125f7a416b093964d9 (2.34-423) -Fix-Commit: c84018a05aec80f5ee6f682db0da1130b0196aef (2.35-274) -Fix-Commit: 22955ad85186ee05834e47e665056148ca07699c (2.36-118) -Fix-Commit: b4e23c75aea756b4bddc4abcf27a1c6dca8b6bd3 (2.37-45) -Fix-Commit: 750a45a783906a19591fb8ff6b7841470f1f5701 (2.38-27) diff --git a/advisories/GLIBC-SA-2023-0005 b/advisories/GLIBC-SA-2023-0005 deleted file mode 100644 index cc4eb90b82..0000000000 --- a/advisories/GLIBC-SA-2023-0005 +++ /dev/null @@ -1,18 +0,0 @@ -getaddrinfo: DoS due to memory leak - -The fix for CVE-2023-4806 introduced a memory leak when an application -calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED -flags set. - -CVE-Id: CVE-2023-5156 -Public-Date: 2023-09-25 -Vulnerable-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420) -Vulnerable-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270) -Vulnerable-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115) -Vulnerable-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39) -Vulnerable-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20) -Fix-Commit: 8006457ab7e1cd556b919f477348a96fe88f2e49 (2.34-421) -Fix-Commit: 17092c0311f954e6f3c010f73ce3a78c24ac279a (2.35-272) -Fix-Commit: 856bac55f98dc840e7c27cfa82262b933385de90 (2.36-116) -Fix-Commit: 4473d1b87d04b25cdd0e0354814eeaa421328268 (2.37-42) -Fix-Commit: 5ee59ca371b99984232d7584fe2b1a758b4421d3 (2.38-24) diff --git a/advisories/GLIBC-SA-2024-0001 b/advisories/GLIBC-SA-2024-0001 deleted file mode 100644 index 28931c75ae..0000000000 --- a/advisories/GLIBC-SA-2024-0001 +++ /dev/null @@ -1,15 +0,0 @@ -syslog: Heap buffer overflow in __vsyslog_internal - -__vsyslog_internal did not handle a case where printing a SYSLOG_HEADER -containing a long program name failed to update the required buffer -size, leading to the allocation and overflow of a too-small buffer on -the heap. - -CVE-Id: CVE-2023-6246 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: 6bd0e4efcc78f3c0115e5ea9739a1642807450da (2.39) -Fix-Commit: 23514c72b780f3da097ecf33a793b7ba9c2070d2 (2.38-42) -Fix-Commit: 97a4292aa4a2642e251472b878d0ec4c46a0e59a (2.37-57) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: d1a83b6767f68b3cb5b4b4ea2617254acd040c82 (2.36-126) diff --git a/advisories/GLIBC-SA-2024-0002 b/advisories/GLIBC-SA-2024-0002 deleted file mode 100644 index 940bfcf2fc..0000000000 --- a/advisories/GLIBC-SA-2024-0002 +++ /dev/null @@ -1,15 +0,0 @@ -syslog: Heap buffer overflow in __vsyslog_internal - -__vsyslog_internal used the return value of snprintf/vsnprintf to -calculate buffer sizes for memory allocation. If these functions (for -any reason) failed and returned -1, the resulting buffer would be too -small to hold output. - -CVE-Id: CVE-2023-6779 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: 7e5a0c286da33159d47d0122007aac016f3e02cd (2.39) -Fix-Commit: d0338312aace5bbfef85e03055e1212dd0e49578 (2.38-43) -Fix-Commit: 67062eccd9a65d7fda9976a56aeaaf6c25a80214 (2.37-58) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: 2bc9d7c002bdac38b5c2a3f11b78e309d7765b83 (2.36-127) diff --git a/advisories/GLIBC-SA-2024-0003 b/advisories/GLIBC-SA-2024-0003 deleted file mode 100644 index b43a5150ab..0000000000 --- a/advisories/GLIBC-SA-2024-0003 +++ /dev/null @@ -1,13 +0,0 @@ -syslog: Integer overflow in __vsyslog_internal - -__vsyslog_internal calculated a buffer size by adding two integers, but -did not first check if the addition would overflow. - -CVE-Id: CVE-2023-6780 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: ddf542da94caf97ff43cc2875c88749880b7259b (2.39) -Fix-Commit: d37c2b20a4787463d192b32041c3406c2bd91de0 (2.38-44) -Fix-Commit: 2b58cba076e912961ceaa5fa58588e4b10f791c0 (2.37-59) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: b9b7d6a27aa0632f334352fa400771115b3c69b7 (2.36-128) diff --git a/advisories/GLIBC-SA-2024-0004 b/advisories/GLIBC-SA-2024-0004 deleted file mode 100644 index 08df2b3118..0000000000 --- a/advisories/GLIBC-SA-2024-0004 +++ /dev/null @@ -1,28 +0,0 @@ -ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence - -The iconv() function in the GNU C Library versions 2.39 and older may -overflow the output buffer passed to it by up to 4 bytes when converting -strings to the ISO-2022-CN-EXT character set, which may be used to -crash an application or overwrite a neighbouring variable. - -ISO-2022-CN-EXT uses escape sequences to indicate character set changes -(as specified by RFC 1922). While the SOdesignation has the expected -bounds checks, neither SS2designation nor SS3designation have its; -allowing a write overflow of 1, 2, or 3 bytes with fixed values: -'$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'. - -CVE-Id: CVE-2024-2961 -Public-Date: 2024-04-17 -Vulnerable-Commit: 755104edc75c53f4a0e7440334e944ad3c6b32fc (2.1.93-169) -Fix-Commit: f9dc609e06b1136bb0408be9605ce7973a767ada (2.40) -Fix-Commit: 31da30f23cddd36db29d5b6a1c7619361b271fb4 (2.39-31) -Fix-Commit: e1135387deded5d73924f6ca20c72a35dc8e1bda (2.38-66) -Fix-Commit: 89ce64b269a897a7780e4c73a7412016381c6ecf (2.37-89) -Fix-Commit: 4ed98540a7fd19f458287e783ae59c41e64df7b5 (2.36-164) -Fix-Commit: 36280d1ce5e245aabefb877fe4d3c6cff95dabfa (2.35-315) -Fix-Commit: a8b0561db4b9847ebfbfec20075697d5492a363c (2.34-459) -Fix-Commit: ed4f16ff6bed3037266f1fa682ebd32a18fce29c (2.33-263) -Fix-Commit: 682ad4c8623e611a971839990ceef00346289cc9 (2.32-140) -Fix-Commit: 3703c32a8d304c1ee12126134ce69be965f38000 (2.31-154) - -Reported-By: Charles Fol diff --git a/advisories/GLIBC-SA-2024-0005 b/advisories/GLIBC-SA-2024-0005 deleted file mode 100644 index a59596610a..0000000000 --- a/advisories/GLIBC-SA-2024-0005 +++ /dev/null @@ -1,22 +0,0 @@ -nscd: Stack-based buffer overflow in netgroup cache - -If the Name Service Cache Daemon's (nscd) fixed size cache is exhausted -by client requests then a subsequent client request for netgroup data -may result in a stack-based buffer overflow. This flaw was introduced -in glibc 2.15 when the cache was added to nscd. - -This vulnerability is only present in the nscd binary. - -CVE-Id: CVE-2024-33599 -Public-Date: 2024-04-23 -Vulnerable-Commit: 684ae515993269277448150a1ca70db3b94aa5bd (2.15) -Fix-Commit: 69c58d5ef9f584ea198bd00f7964d364d0e6b921 (2.31-155) -Fix-Commit: a77064893bfe8a701770e2f53a4d33805bc47a5a (2.32-141) -Fix-Commit: 5c75001a96abcd50cbdb74df24c3f013188d076e (2.33-264) -Fix-Commit: 52f73e5c4e29b14e79167272297977f360ae1e97 (2.34-460) -Fix-Commit: 7a95873543ce225376faf13bb71c43dea6d24f86 (2.35-316) -Fix-Commit: caa3151ca460bdd9330adeedd68c3112d97bffe4 (2.36-165) -Fix-Commit: f75c298e747b2b8b41b1c2f551c011a52c41bfd1 (2.37-91) -Fix-Commit: 5968aebb86164034b8f8421b4abab2f837a5bdaf (2.38-72) -Fix-Commit: 1263d583d2e28afb8be53f8d6922f0842036f35d (2.39-35) -Fix-Commit: 87801a8fd06db1d654eea3e4f7626ff476a9bdaa (2.40) diff --git a/advisories/GLIBC-SA-2024-0006 b/advisories/GLIBC-SA-2024-0006 deleted file mode 100644 index d44148d3d9..0000000000 --- a/advisories/GLIBC-SA-2024-0006 +++ /dev/null @@ -1,32 +0,0 @@ -nscd: Null pointer crash after notfound response - -If the Name Service Cache Daemon's (nscd) cache fails to add a not-found -netgroup response to the cache, the client request can result in a null -pointer dereference. This flaw was introduced in glibc 2.15 when the -cache was added to nscd. - -This vulnerability is only present in the nscd binary. - -CVE-Id: CVE-2024-33600 -Public-Date: 2024-04-24 -Vulnerable-Commit: 684ae515993269277448150a1ca70db3b94aa5bd (2.15) -Fix-Commit: b048a482f088e53144d26a61c390bed0210f49f2 (2.40) -Fix-Commit: 7835b00dbce53c3c87bbbb1754a95fb5e58187aa (2.40) -Fix-Commit: c99f886de54446cd4447db6b44be93dabbdc2f8b (2.39-37) -Fix-Commit: 5a508e0b508c8ad53bd0d2fb48fd71b242626341 (2.39-36) -Fix-Commit: 2ae9446c1b7a3064743b4a51c0bbae668ee43e4c (2.38-74) -Fix-Commit: 541ea5172aa658c4bd5c6c6d6fd13903c3d5bb0a (2.38-73) -Fix-Commit: a8070b31043c7585c36ba68a74298c4f7af075c3 (2.37-93) -Fix-Commit: 5eea50c4402e39588de98aa1d4469a79774703d4 (2.37-92) -Fix-Commit: f205b3af56740e3b014915b1bd3b162afe3407ef (2.36-167) -Fix-Commit: c34f470a615b136170abd16142da5dd0c024f7d1 (2.36-166) -Fix-Commit: bafadc589fbe21ae330e8c2af74db9da44a17660 (2.35-318) -Fix-Commit: 4370bef52b0f3f3652c6aa13d7a9bb3ac079746d (2.35-317) -Fix-Commit: 1f94122289a9bf7dba573f5d60327aaa2b85cf2e (2.34-462) -Fix-Commit: 966d6ac9e40222b84bb21674cc4f83c8d72a5a26 (2.34-461) -Fix-Commit: e3eef1b8fbdd3a7917af466ca9c4b7477251ca79 (2.33-266) -Fix-Commit: f20a8d696b13c6261b52a6434899121f8b19d5a7 (2.33-265) -Fix-Commit: be602180146de37582a3da3a0caa4b719645de9c (2.32-143) -Fix-Commit: 394eae338199078b7961b051c191539870742d7b (2.32-142) -Fix-Commit: 8d7949183760170c61e55def723c1d8050187874 (2.31-157) -Fix-Commit: 304ce5fe466c4762b21b36c26926a4657b59b53e (2.31-156) diff --git a/advisories/GLIBC-SA-2024-0007 b/advisories/GLIBC-SA-2024-0007 deleted file mode 100644 index b6928fa27a..0000000000 --- a/advisories/GLIBC-SA-2024-0007 +++ /dev/null @@ -1,28 +0,0 @@ -nscd: netgroup cache may terminate daemon on memory allocation failure - -The Name Service Cache Daemon's (nscd) netgroup cache uses xmalloc or -xrealloc and these functions may terminate the process due to a memory -allocation failure resulting in a denial of service to the clients. The -flaw was introduced in glibc 2.15 when the cache was added to nscd. - -This vulnerability is only present in the nscd binary. - -Subsequent refactoring of the netgroup cache only added more uses of -xmalloc and xrealloc. Uses of xmalloc and xrealloc in other parts of -nscd only occur during startup of the daemon and so are not affected by -client requests that could trigger an out of memory followed by -termination. - -CVE-Id: CVE-2024-33601 -Public-Date: 2024-04-24 -Vulnerable-Commit: 684ae515993269277448150a1ca70db3b94aa5bd (2.15) -Fix-Commit: c04a21e050d64a1193a6daab872bca2528bda44b (2.40) -Fix-Commit: a9a8d3eebb145779a18d90e3966009a1daa63cd8 (2.39-38) -Fix-Commit: 71af8ca864345d39b746d5cee84b94b430fad5db (2.38-75) -Fix-Commit: 6e106dc214d6a033a4e945d1c6cf58061f1c5f1f (2.37-94) -Fix-Commit: b6742463694b1dfdd5120b91ee21cf05d15ec2e2 (2.36-168) -Fix-Commit: 7a5864cac60e06000394128a5a2817b03542f5a3 (2.35-319) -Fix-Commit: 86f1d5f4129c373ac6fb6df5bcf38273838843cb (2.34-463) -Fix-Commit: 4d27d4b9a188786fc6a56745506cec2acfc51f83 (2.33-267) -Fix-Commit: 3ed195a8ec89da281e3c4bf887a13d281b72d8f4 (2.32-144) -Fix-Commit: bbf5a58ccb55679217f94de706164d15372fbbc0 (2.31-158) diff --git a/advisories/GLIBC-SA-2024-0008 b/advisories/GLIBC-SA-2024-0008 deleted file mode 100644 index d93e2a6f0b..0000000000 --- a/advisories/GLIBC-SA-2024-0008 +++ /dev/null @@ -1,26 +0,0 @@ -nscd: netgroup cache assumes NSS callback uses in-buffer strings - -The Name Service Cache Daemon's (nscd) netgroup cache can corrupt memory -when the NSS callback does not store all strings in the provided buffer. -The flaw was introduced in glibc 2.15 when the cache was added to nscd. - -This vulnerability is only present in the nscd binary. - -There is no guarantee from the NSS callback API that the returned -strings are all within the buffer. However, the netgroup cache code -assumes that the NSS callback uses in-buffer strings and if it doesn't -the buffer resizing logic could lead to potential memory corruption. - -CVE-Id: CVE-2024-33602 -Public-Date: 2024-04-24 -Vulnerable-Commit: 684ae515993269277448150a1ca70db3b94aa5bd (2.15) -Fix-Commit: c04a21e050d64a1193a6daab872bca2528bda44b (2.40) -Fix-Commit: a9a8d3eebb145779a18d90e3966009a1daa63cd8 (2.39-38) -Fix-Commit: 71af8ca864345d39b746d5cee84b94b430fad5db (2.38-75) -Fix-Commit: 6e106dc214d6a033a4e945d1c6cf58061f1c5f1f (2.37-94) -Fix-Commit: b6742463694b1dfdd5120b91ee21cf05d15ec2e2 (2.36-168) -Fix-Commit: 7a5864cac60e06000394128a5a2817b03542f5a3 (2.35-319) -Fix-Commit: 86f1d5f4129c373ac6fb6df5bcf38273838843cb (2.34-463) -Fix-Commit: 4d27d4b9a188786fc6a56745506cec2acfc51f83 (2.33-267) -Fix-Commit: 3ed195a8ec89da281e3c4bf887a13d281b72d8f4 (2.32-144) -Fix-Commit: bbf5a58ccb55679217f94de706164d15372fbbc0 (2.31-158) diff --git a/advisories/README b/advisories/README deleted file mode 100644 index b8f8a829ca..0000000000 --- a/advisories/README +++ /dev/null @@ -1,77 +0,0 @@ -GNU C Library Security Advisory Format -====================================== - -Security advisories in this directory follow a simple git commit log -format, with a heading and free-format description augmented with tags -to allow parsing key information. References to code changes are -specific to the glibc repository and follow a specific format: - - Tag-name: (release-version) - -The indicates a specific commit in the repository. The -release-version indicates the publicly consumable release in which this -commit is known to exist. The release-version is derived from the -git-describe format, (i.e. stripped out from glibc-2.34.NNN-gxxxx) and -is of the form 2.34-NNN. If the -NNN suffix is absent, it means that -the change is in that release tarball, otherwise the change is on the -release/2.YY/master branch and not in any released tarball. - -The following tags are currently being used: - -CVE-Id: -This is the CVE-Id assigned under the CVE Program -(https://www.cve.org/). - -Public-Date: -The date this issue became publicly known. - -Vulnerable-Commit: -The commit that introduced this vulnerability. There could be multiple -entries, one for each release branch in the glibc repository; the -release-version portion of this tag should tell you which branch this is -on. - -Fix-Commit: -The commit that fixed this vulnerability. There could be multiple -entries for each release branch in the glibc repository, indicating that -all of those commits contributed to fixing that issue in each of those -branches. - -Reported-By: -The entity that reported this issue. There could be multiple entries, one for -each reporter. - -Adding an Advisory ------------------- - -An advisory for a CVE needs to be added on the master branch in two steps: - -1. Add the text of the advisory without any Fix-Commit tags along with - the fix for the CVE. Add the Vulnerable-Commit tag, if applicable. - The advisories directory does not exist in release branches, so keep - the advisory text commit distinct from the code changes, to ease - backports. Ask for the GLIBC-SA advisory number from the security - team. - -2. Finish all backports on release branches and then back on the msater - branch, add all commit refs to the advisory using the Fix-Commit - tags. Don't bother adding the release-version subscript since the - next step will overwrite it. - -3. Run the process-advisories.sh script in the scripts directory on the - advisory: - - scripts/process-advisories.sh update GLIBC-SA-YYYY-NNNN - - (replace YYYY-NNNN with the actual advisory number). - -4. Verify the updated advisory and push the result. - -Getting a NEWS snippet from advisories --------------------------------------- - -Run: - - scripts/process-advisories.sh news - -and copy the content into the NEWS file. commit 8bbb8d7b16cae777eed06bc9d2e059fc00be24e9 Author: Florian Weimer Date: Wed Jul 24 12:06:47 2024 +0200 resolv: Allow short error responses to match any query (bug 31890) Reviewed-by: DJ Delorie (cherry picked from commit 691a3b2e9bfaba842e46a5ccb7f5e6ea144c3ade) diff --git a/NEWS b/NEWS index 31281ac408..66b755ed34 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,12 @@ See the end for copying conditions. Please send GNU C library bug reports via using `glibc' in the "product" field. +Version 2.41 + +The following bugs are resolved with this release: + + [31890] resolv: Allow short error responses to match any DNS query + Version 2.40 Major new features: diff --git a/resolv/Makefile b/resolv/Makefile index 5f44f5896b..d927e337d9 100644 --- a/resolv/Makefile +++ b/resolv/Makefile @@ -106,6 +106,7 @@ tests += \ tst-resolv-nondecimal \ tst-resolv-res_init-multi \ tst-resolv-search \ + tst-resolv-short-response \ tst-resolv-trailing \ # This test calls __res_context_send directly, which is not exported @@ -299,6 +300,8 @@ $(objpfx)tst-resolv-nondecimal: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-qtypes: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-rotate: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-search: $(objpfx)libresolv.so $(shared-thread-library) +$(objpfx)tst-resolv-short-response: $(objpfx)libresolv.so \ + $(shared-thread-library) $(objpfx)tst-resolv-trailing: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-threads: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-txnid-collision: $(objpfx)libresolv.a \ diff --git a/resolv/res_send.c b/resolv/res_send.c index ea7cf192b2..572e72c32f 100644 --- a/resolv/res_send.c +++ b/resolv/res_send.c @@ -1199,19 +1199,30 @@ send_dg(res_state statp, } /* Check for the correct header layout and a matching - question. */ + question. Some recursive resolvers send REFUSED + without copying back the question section + (producing a response that is only HFIXEDSZ bytes + long). Skip query matching in this case. */ + bool thisansp_error = (anhp->rcode == SERVFAIL || + anhp->rcode == NOTIMP || + anhp->rcode == REFUSED); + bool skip_query_match = (*thisresplenp == HFIXEDSZ + && ntohs (anhp->qdcount) == 0 + && thisansp_error); int matching_query = 0; /* Default to no matching query. */ if (!recvresp1 && anhp->id == hp->id - && __libc_res_queriesmatch (buf, buf + buflen, - *thisansp, - *thisansp + *thisanssizp)) + && (skip_query_match + || __libc_res_queriesmatch (buf, buf + buflen, + *thisansp, + *thisansp + *thisanssizp))) matching_query = 1; if (!recvresp2 && anhp->id == hp2->id - && __libc_res_queriesmatch (buf2, buf2 + buflen2, - *thisansp, - *thisansp + *thisanssizp)) + && (skip_query_match + || __libc_res_queriesmatch (buf2, buf2 + buflen2, + *thisansp, + *thisansp + *thisanssizp))) matching_query = 2; if (matching_query == 0) /* Spurious UDP packet. Drop it and continue @@ -1221,9 +1232,7 @@ send_dg(res_state statp, goto wait; } - if (anhp->rcode == SERVFAIL || - anhp->rcode == NOTIMP || - anhp->rcode == REFUSED) { + if (thisansp_error) { next_ns: if (recvresp1 || (buf2 != NULL && recvresp2)) { *resplen2 = 0; diff --git a/resolv/tst-resolv-short-response.c b/resolv/tst-resolv-short-response.c new file mode 100644 index 0000000000..cf1e39876f --- /dev/null +++ b/resolv/tst-resolv-short-response.c @@ -0,0 +1,112 @@ +/* Test for spurious timeouts with short 12-byte responses (bug 31890). + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* The rcode in the initial response. */ +static volatile int rcode; + +static void +response (const struct resolv_response_context *ctx, + struct resolv_response_builder *b, + const char *qname, uint16_t qclass, uint16_t qtype) +{ + switch (ctx->server_index) + { + case 0: + /* First server times out. */ + struct resolv_response_flags flags = {.rcode = rcode}; + resolv_response_init (b, flags); + break; + case 1: + /* Second server sends reply. */ + resolv_response_init (b, (struct resolv_response_flags) {}); + resolv_response_add_question (b, qname, qclass, qtype); + resolv_response_section (b, ns_s_an); + resolv_response_open_record (b, qname, qclass, qtype, 0); + switch (qtype) + { + case T_A: + { + char ipv4[4] = {192, 0, 2, 17}; + resolv_response_add_data (b, &ipv4, sizeof (ipv4)); + } + break; + case T_AAAA: + { + char ipv6[16] + = {0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + resolv_response_add_data (b, &ipv6, sizeof (ipv6)); + } + break; + default: + FAIL_EXIT1 ("unexpected TYPE%d query", qtype); + } + resolv_response_close_record (b); + break; + default: + FAIL_EXIT1 ("unexpected query to server %d", ctx->server_index); + } +} + +static void +check_one (void) +{ + + /* The buggy 1-second query timeout results in 30 seconds of delay, + which triggers a test timeout failure. */ + for (int i = 0; i < 10; ++i) + { + check_hostent ("www.example", gethostbyname ("www.example"), + "name: www.example\n" + "address: 192.0.2.17\n"); + check_hostent ("www.example", gethostbyname2 ("www.example", AF_INET6), + "name: www.example\n" + "address: 2001:db8::1\n"); + } +} + +static int +do_test (void) +{ + struct resolv_test *aux = resolv_test_start + ((struct resolv_redirect_config) + { + .response_callback = response, + }); + + _res.options |= RES_SNGLKUP; + + rcode = 2; /* SERVFAIL. */ + check_one (); + + rcode = 4; /* NOTIMP. */ + check_one (); + + rcode = 5; /* REFUSED. */ + check_one (); + + resolv_test_end (aux); + + return 0; +} + +#include commit ef141426630da0946f0af8732eddce9b9a52b2d3 Author: Florian Weimer Date: Wed Jul 24 12:06:47 2024 +0200 resolv: Do not wait for non-existing second DNS response after error (bug 30081) In single-request mode, there is no second response after an error because the second query has not been sent yet. Waiting for it introduces an unnecessary timeout. Reviewed-by: DJ Delorie (cherry picked from commit af625987d619388a100b153520d3ee308bda9889) diff --git a/NEWS b/NEWS index 66b755ed34..4d7100b21c 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,7 @@ Version 2.41 The following bugs are resolved with this release: + [30081] resolv: Do not wait for non-existing second DNS response after error [31890] resolv: Allow short error responses to match any DNS query Version 2.40 diff --git a/resolv/Makefile b/resolv/Makefile index d927e337d9..abff7fc007 100644 --- a/resolv/Makefile +++ b/resolv/Makefile @@ -106,6 +106,7 @@ tests += \ tst-resolv-nondecimal \ tst-resolv-res_init-multi \ tst-resolv-search \ + tst-resolv-semi-failure \ tst-resolv-short-response \ tst-resolv-trailing \ @@ -300,6 +301,8 @@ $(objpfx)tst-resolv-nondecimal: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-qtypes: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-rotate: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-search: $(objpfx)libresolv.so $(shared-thread-library) +$(objpfx)tst-resolv-semi-failure: $(objpfx)libresolv.so \ + $(shared-thread-library) $(objpfx)tst-resolv-short-response: $(objpfx)libresolv.so \ $(shared-thread-library) $(objpfx)tst-resolv-trailing: $(objpfx)libresolv.so $(shared-thread-library) diff --git a/resolv/res_send.c b/resolv/res_send.c index 572e72c32f..9c77613f37 100644 --- a/resolv/res_send.c +++ b/resolv/res_send.c @@ -1238,7 +1238,7 @@ send_dg(res_state statp, *resplen2 = 0; return resplen; } - if (buf2 != NULL) + if (buf2 != NULL && !single_request) { /* No data from the first reply. */ resplen = 0; diff --git a/resolv/tst-resolv-semi-failure.c b/resolv/tst-resolv-semi-failure.c new file mode 100644 index 0000000000..aa9798b5a7 --- /dev/null +++ b/resolv/tst-resolv-semi-failure.c @@ -0,0 +1,133 @@ +/* Test parallel failure/success responses (bug 30081). + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* The rcode in the initial response. */ +static volatile int rcode; + +/* Whether to fail the initial A query (!fail_aaaa) or the initial + AAAA query (fail_aaaa). */ +static volatile bool fail_aaaa; + +static void +response (const struct resolv_response_context *ctx, + struct resolv_response_builder *b, + const char *qname, uint16_t qclass, uint16_t qtype) +{ + /* Handle the failing query. */ + if ((fail_aaaa && qtype == T_AAAA) && ctx->server_index == 0) + { + struct resolv_response_flags flags = {.rcode = rcode}; + resolv_response_init (b, flags); + return; + } + + /* Otherwise produce a response. */ + resolv_response_init (b, (struct resolv_response_flags) {}); + resolv_response_add_question (b, qname, qclass, qtype); + resolv_response_section (b, ns_s_an); + resolv_response_open_record (b, qname, qclass, qtype, 0); + switch (qtype) + { + case T_A: + { + char ipv4[4] = {192, 0, 2, 17}; + resolv_response_add_data (b, &ipv4, sizeof (ipv4)); + } + break; + case T_AAAA: + { + char ipv6[16] + = {0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + resolv_response_add_data (b, &ipv6, sizeof (ipv6)); + } + break; + default: + FAIL_EXIT1 ("unexpected TYPE%d query", qtype); + } + resolv_response_close_record (b); +} + +static void +check_one (void) +{ + + /* The buggy 1-second query timeout results in 30 seconds of delay, + which triggers are test timeout failure. */ + for (int i = 0; i < 30; ++i) + { + static const struct addrinfo hints = + { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *ai; + int ret = getaddrinfo ("www.example", "80", &hints, &ai); + const char *expected; + if (ret == 0 && ai->ai_next != NULL) + expected = ("address: STREAM/TCP 192.0.2.17 80\n" + "address: STREAM/TCP 2001:db8::1 80\n"); + else + /* Only one response because the AAAA lookup failure is + treated as an ignoreable error. */ + expected = "address: STREAM/TCP 192.0.2.17 80\n"; + check_addrinfo ("www.example", ai, ret, expected); + if (ret == 0) + freeaddrinfo (ai); + } +} + +static int +do_test (void) +{ + for (int do_single_lookup = 0; do_single_lookup < 2; ++do_single_lookup) + { + struct resolv_test *aux = resolv_test_start + ((struct resolv_redirect_config) + { + .response_callback = response, + }); + + if (do_single_lookup) + _res.options |= RES_SNGLKUP; + + for (int do_fail_aaaa = 0; do_fail_aaaa < 2; ++do_fail_aaaa) + { + fail_aaaa = do_fail_aaaa; + + rcode = 2; /* SERVFAIL. */ + check_one (); + + rcode = 4; /* NOTIMP. */ + check_one (); + + rcode = 5; /* REFUSED. */ + check_one (); + } + + resolv_test_end (aux); + } + + return 0; +} + +#include diff --git a/resolv/tst-resolv-short-response.c b/resolv/tst-resolv-short-response.c index cf1e39876f..be354ae1c7 100644 --- a/resolv/tst-resolv-short-response.c +++ b/resolv/tst-resolv-short-response.c @@ -81,6 +81,18 @@ check_one (void) check_hostent ("www.example", gethostbyname2 ("www.example", AF_INET6), "name: www.example\n" "address: 2001:db8::1\n"); + static const struct addrinfo hints = + { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *ai; + int ret = getaddrinfo ("www.example", "80", &hints, &ai); + check_addrinfo ("www.example", ai, ret, + "address: STREAM/TCP 192.0.2.17 80\n" + "address: STREAM/TCP 2001:db8::1 80\n"); + if (ret == 0) + freeaddrinfo (ai); } } commit b6aeba2de157ba0cdc8fb0aed67b632b3490f383 Author: Florian Weimer Date: Wed Jul 24 12:50:17 2024 +0200 manual: Do not mention STATIC_TLS in dynamic linker hardening recommendations The current toolchain does not consistently generate it, and glibc does not use it. Reviewed-by: Szabolcs Nagy (cherry picked from commit 90842d3980064ef410b998b22170ad409b76b9fd) diff --git a/manual/dynlink.texi b/manual/dynlink.texi index 03565d4fb0..1500a53de6 100644 --- a/manual/dynlink.texi +++ b/manual/dynlink.texi @@ -993,21 +993,21 @@ The dynamic segment should also mention @code{BIND_NOW} on the enough). @item -For shared objects (not main programs), if the program header has a -@code{PT_TLS} segment, the dynamic segment (as shown by @samp{readelf --dW}) should contain the @code{STATIC_TLS} flag on the @code{FLAGS} -line. - -If @code{STATIC_TLS} is missing in shared objects, ensure that the -appropriate relocations for GNU2 TLS descriptors are used (for example, +Ensure that only static TLS relocations (thread-pointer relative offset +locations) are used, for example @code{R_AARCH64_TLS_TPREL} and +@code{X86_64_TPOFF64}. As the second-best option, and only if +compatibility with non-hardened applications using @code{dlopen} is +needed, GNU2 TLS descriptor relocations can be used (for example, @code{R_AARCH64_TLSDESC} or @code{R_X86_64_TLSDESC}). @item -There should not be a reference to the symbols @code{__tls_get_addr}, -@code{__tls_get_offset}, @code{__tls_get_addr_opt} in the dynamic symbol -table (in the @samp{readelf -sDW} output). Thread-local storage must be -accessed using the initial-exec (static) model, or using GNU2 TLS -descriptors. +There should not be references to the traditional TLS function symbols +@code{__tls_get_addr}, @code{__tls_get_offset}, +@code{__tls_get_addr_opt} in the dynamic symbol table (in the +@samp{readelf -sDW} output). Supporting global dynamic TLS relocations +(such as @code{R_AARCH64_TLS_DTPMOD}, @code{R_AARCH64_TLS_DTPREL}, +@code{R_X86_64_DTPMOD64}, @code{R_X86_64_DTPOFF64}) should not be used, +either. @item Likewise, the functions @code{dlopen}, @code{dlmopen}, @code{dlclose} commit 145b5886379c8de4f0a1bca3556a4c3d7b6c24b2 Author: Florian Weimer Date: Wed Jul 24 13:42:16 2024 +0200 Fix version number in NEWS file diff --git a/NEWS b/NEWS index 4d7100b21c..6b62f55658 100644 --- a/NEWS +++ b/NEWS @@ -5,7 +5,7 @@ See the end for copying conditions. Please send GNU C library bug reports via using `glibc' in the "product" field. -Version 2.41 +Version 2.40.1 The following bugs are resolved with this release: commit 2aebac5e158277d852b87b0cbd4af2b2d10ac387 Author: Miguel Martín Date: Tue Jul 16 17:14:56 2024 +0200 malloc: avoid global locks in tst-aligned_alloc-lib.c Make sure the DSO used by aligned_alloc/calloc/malloc tests does not get a global lock on multithreaded tests. Reviewed-by: Arjun Shankar (cherry picked from commit 9a27b566b2048f599048f2f4afe1cce06c4ef43d) diff --git a/malloc/tst-aligned_alloc-lib.c b/malloc/tst-aligned_alloc-lib.c index 0205df5acf..9ef1f839c1 100644 --- a/malloc/tst-aligned_alloc-lib.c +++ b/malloc/tst-aligned_alloc-lib.c @@ -17,37 +17,38 @@ License along with the GNU C Library; see the file COPYING.LIB. If not, see . */ -#include #include #include +#include extern void *__libc_malloc (size_t size); extern void *__libc_calloc (size_t n, size_t size); +__thread unsigned int seed = 0; + int aligned_alloc_count = 0; int libc_malloc_count = 0; int libc_calloc_count = 0; -/* Get a random alignment value. Biased towards the smaller values. Must be - a power of 2. */ -static size_t get_random_alignment (void) -{ - size_t aligns[] = { - 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384 - }; - - return aligns[random () % array_length (aligns)]; -} - -static void *get_random_alloc (size_t size) +static void * +get_random_alloc (size_t size) { void *retval; size_t align; + struct timespec tp; + + if (seed == 0) + { + clock_gettime (CLOCK_REALTIME, &tp); + seed = tp.tv_nsec; + } - switch (random() % 3) - { + switch (rand_r (&seed) % 3) + { case 1: - align = get_random_alignment (); + /* Get a random alignment value. Biased towards the smaller + * values up to 16384. Must be a power of 2. */ + align = 1 << rand_r (&seed) % 15; retval = aligned_alloc (align, size); aligned_alloc_count++; break; @@ -59,13 +60,13 @@ static void *get_random_alloc (size_t size) retval = __libc_malloc (size); libc_malloc_count++; break; - } + } return retval; } - -void * __random_malloc (size_t size) +void * +__random_malloc (size_t size) { return get_random_alloc (size); } commit 5d2a931a8167a288374c3a38dc10fe0492ab5ffe Author: Miguel Martín Date: Tue Jul 16 17:14:57 2024 +0200 malloc: add multi-threaded tests for aligned_alloc/calloc/malloc Improve aligned_alloc/calloc/malloc test coverage by adding multi-threaded tests with random memory allocations and with/without cross-thread memory deallocations. Perform a number of memory allocation calls with random sizes limited to 0xffff. Use the existing DSO ('malloc/tst-aligned_alloc-lib.c') to randomize allocator selection. The multi-threaded allocation/deallocation is staged as described below: - Stage 1: Half of the threads will be allocating memory and the other half will be waiting for them to finish the allocation. - Stage 2: Half of the threads will be allocating memory and the other half will be deallocating memory. - Stage 3: Half of the threads will be deallocating memory and the second half waiting on them to finish. Add 'malloc/tst-aligned-alloc-random-thread.c' where each thread will deallocate only the memory that was previously allocated by itself. Add 'malloc/tst-aligned-alloc-random-thread-cross.c' where each thread will deallocate memory that was previously allocated by another thread. The intention is to be able to utilize existing malloc testing to ensure that similar allocation APIs are also exposed to the same rigors. Reviewed-by: Arjun Shankar (cherry picked from commit b0fbcb7d0051a68baf26b2aed51a8a31c34d68e5) diff --git a/malloc/Makefile b/malloc/Makefile index 02aff1bd1d..98d507a6eb 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -28,6 +28,8 @@ tests := \ mallocbug \ tst-aligned-alloc \ tst-aligned-alloc-random \ + tst-aligned-alloc-random-thread \ + tst-aligned-alloc-random-thread-cross \ tst-alloc_buffer \ tst-calloc \ tst-free-errno \ @@ -151,6 +153,8 @@ ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes) # the tests expect specific internal behavior that is changed due to linking to # libmcheck.a. tests-exclude-mcheck = \ + tst-aligned-alloc-random-thread \ + tst-aligned-alloc-random-thread-cross \ tst-compathooks-off \ tst-compathooks-on \ tst-malloc-backtrace \ @@ -415,7 +419,11 @@ $(objpfx)tst-mallocstate: $(objpfx)libc_malloc_debug.so $(objpfx)tst-mallocstate-malloc-check: $(objpfx)libc_malloc_debug.so $(objpfx)tst-aligned-alloc-random.out: $(objpfx)tst-aligned_alloc-lib.so +$(objpfx)tst-aligned-alloc-random-thread.out: $(objpfx)tst-aligned_alloc-lib.so +$(objpfx)tst-aligned-alloc-random-thread-cross.out: $(objpfx)tst-aligned_alloc-lib.so $(objpfx)tst-malloc-random.out: $(objpfx)tst-aligned_alloc-lib.so tst-aligned-alloc-random-ENV = LD_PRELOAD=$(objpfx)tst-aligned_alloc-lib.so +tst-aligned-alloc-random-thread-ENV = LD_PRELOAD=$(objpfx)tst-aligned_alloc-lib.so +tst-aligned-alloc-random-thread-cross-ENV = LD_PRELOAD=$(objpfx)tst-aligned_alloc-lib.so tst-malloc-random-ENV = LD_PRELOAD=$(objpfx)tst-aligned_alloc-lib.so diff --git a/malloc/tst-aligned-alloc-random-thread-cross.c b/malloc/tst-aligned-alloc-random-thread-cross.c new file mode 100644 index 0000000000..360ecc56ee --- /dev/null +++ b/malloc/tst-aligned-alloc-random-thread-cross.c @@ -0,0 +1,19 @@ +/* multi-threaded memory allocation and cross-thread deallocation test. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ +#define CROSS_THREAD_DEALLOC +#include "tst-aligned-alloc-random-thread.c" diff --git a/malloc/tst-aligned-alloc-random-thread.c b/malloc/tst-aligned-alloc-random-thread.c new file mode 100644 index 0000000000..e95f79250a --- /dev/null +++ b/malloc/tst-aligned-alloc-random-thread.c @@ -0,0 +1,145 @@ +/* multi-threaded memory allocation/deallocation test. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef ITERATIONS +# define ITERATIONS 16 +#endif + +#ifndef NUM_THREADS +# define NUM_THREADS 8 +#endif + +#ifndef NUM_ALLOCATIONS +# define NUM_ALLOCATIONS 2048 +#endif + +static pthread_barrier_t barrier; + +__thread unsigned int seed; + +typedef struct +{ + int id; + pthread_t thread; +} thread; + +thread threads[NUM_THREADS]; + +void *allocations[NUM_THREADS][NUM_ALLOCATIONS]; + +void +run_thread_dealloc (int id) +{ + for (int i = 0; i < NUM_ALLOCATIONS; i++) + { + free (allocations[id][i]); + allocations[id][i] = NULL; + } +} + +void +run_thread_alloc (int id) +{ + size_t msb, size; + for (int i = 0; i < NUM_ALLOCATIONS; i++) + { + msb = 1 << rand_r (&seed) % 16; + size = msb + rand_r (&seed) % msb; + allocations[id][i] = malloc (size); + TEST_VERIFY_EXIT (allocations[id][i] != NULL); + } +} + +void * +run_allocations (void *arg) +{ + int id = *((int *) arg); + seed = time (NULL) + id; + + /* Stage 1: First half o the threads allocating memory and the second + * half waiting for them to finish + */ + if (id < NUM_THREADS / 2) + run_thread_alloc (id); + + xpthread_barrier_wait (&barrier); + + /* Stage 2: Half of the threads allocationg memory and the other + * half deallocating: + * - In the non cross-thread dealloc scenario the first half will be + * deallocating the memory allocated by themselves in stage 1 and the + * second half will be allocating memory. + * - In the cross-thread dealloc scenario the first half will continue + * to allocate memory and the second half will deallocate the memory + * allocated by the first half in stage 1. + */ + if (id < NUM_THREADS / 2) +#ifndef CROSS_THREAD_DEALLOC + run_thread_dealloc (id); +#else + run_thread_alloc (id + NUM_THREADS / 2); +#endif + else +#ifndef CROSS_THREAD_DEALLOC + run_thread_alloc (id); +#else + run_thread_dealloc (id - NUM_THREADS / 2); +#endif + + xpthread_barrier_wait (&barrier); + + // Stage 3: Second half of the threads deallocating and the first half + // waiting for them to finish. + if (id >= NUM_THREADS / 2) + run_thread_dealloc (id); + + return NULL; +} + +static int +do_test (void) +{ + xpthread_barrier_init (&barrier, NULL, NUM_THREADS); + + for (int i = 0; i < ITERATIONS; i++) + { + for (int t = 0; t < NUM_THREADS; t++) + { + threads[t].id = t; + threads[t].thread + = xpthread_create (NULL, run_allocations, &threads[t].id); + } + + for (int t = 0; t < NUM_THREADS; t++) + xpthread_join (threads[t].thread); + } + + return 0; +} + +#include commit 65fbcfe58991194301a7e4fb9c53ab936573e711 Author: Arjun Shankar Date: Mon Jul 29 14:30:59 2024 +0200 manual/stdio: Clarify putc and putwc The manual entry for `putc' described what "most systems" do instead of describing the glibc implementation and its guarantees. This commit fixes that by warning that putc may be implemented as a macro that double-evaluates `stream', and removing the performance claim. Even though the current `putc' implementation does not double-evaluate `stream', offering this obscure guarantee as an extension to what POSIX allows does not seem very useful. The entry for `putwc' is also edited to bring it in line with `putc'. Reviewed-by: Florian Weimer (cherry picked from commit 10de4a47ef3f481592e3c62eb07bcda23e9fde4d) diff --git a/manual/stdio.texi b/manual/stdio.texi index f5e289d58a..f9529a098d 100644 --- a/manual/stdio.texi +++ b/manual/stdio.texi @@ -903,21 +903,21 @@ This function is a GNU extension. @deftypefun int putc (int @var{c}, FILE *@var{stream}) @standards{ISO, stdio.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} -This is just like @code{fputc}, except that most systems implement it as +This is just like @code{fputc}, except that it may be implemented as a macro, making it faster. One consequence is that it may evaluate the @var{stream} argument more than once, which is an exception to the -general rule for macros. @code{putc} is usually the best function to -use for writing a single character. +general rule for macros. Therefore, @var{stream} should never be an +expression with side-effects. @end deftypefun @deftypefun wint_t putwc (wchar_t @var{wc}, FILE *@var{stream}) @standards{ISO, wchar.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} -This is just like @code{fputwc}, except that it can be implement as +This is just like @code{fputwc}, except that it may be implemented as a macro, making it faster. One consequence is that it may evaluate the @var{stream} argument more than once, which is an exception to the -general rule for macros. @code{putwc} is usually the best function to -use for writing a single wide character. +general rule for macros. Therefore, @var{stream} should never be an +expression with side-effects. @end deftypefun @deftypefun int putc_unlocked (int @var{c}, FILE *@var{stream}) commit 132a72f93cb4ad9f16b8469dc061de5f75f6a44e Author: Lukas Bulwahn Date: Mon Jul 29 11:08:17 2024 +0200 manual: make setrlimit() description less ambiguous The existing description for setrlimit() has some ambiguity. It could be understood to have the semantics of getrlimit(), i.e., the limits from the process are stored in the provided rlp pointer. Make the description more explicit that rlp are the input values, and that the limits of the process is changed with this function. Reviewed-by: Florian Weimer (cherry picked from commit aedbf08891069fc029ed021e4dba933eb877b394) diff --git a/manual/resource.texi b/manual/resource.texi index c9b21dedeb..25966bcb64 100644 --- a/manual/resource.texi +++ b/manual/resource.texi @@ -192,8 +192,8 @@ If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a @standards{BSD, sys/resource.h} @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} @c Direct syscall on most systems; lock-taking critical section on HURD. -Store the current and maximum limits for the resource @var{resource} -in @code{*@var{rlp}}. +Change the current and maximum limits of the process for the resource +@var{resource} to the values provided in @code{*@var{rlp}}. The return value is @code{0} on success and @code{-1} on failure. The following @code{errno} error condition is possible: commit 509166c9a53ad81217a28cc67ba2a688f02fc477 Author: Florian Weimer Date: Wed Jun 26 11:27:54 2024 +0200 Enhance test coverage for strnlen, wcsnlen This commit adds string/test-strnlen-nonarray and wcsmbs/test-wcsnlen-nonarray. Reviewed-by: Noah Goldstein (cherry picked from commit 783d4c0b81889c39a9ddf13b60d0fde4040fb1c0) diff --git a/string/Makefile b/string/Makefile index 8f31fa49e6..2e20fc00fd 100644 --- a/string/Makefile +++ b/string/Makefile @@ -184,6 +184,7 @@ tests := \ test-strncpy \ test-strndup \ test-strnlen \ + test-strnlen-nonarray \ test-strpbrk \ test-strrchr \ test-strspn \ diff --git a/string/test-Xnlen-nonarray.c b/string/test-Xnlen-nonarray.c new file mode 100644 index 0000000000..499bef2041 --- /dev/null +++ b/string/test-Xnlen-nonarray.c @@ -0,0 +1,133 @@ +/* Test non-array inputs to string length functions. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This skeleton file is included from string/test-strnlen-nonarray.c + and wcsmbs/test-wcsnlen-nonarray.c to test that reading of the array + stops at the first null character. + + TEST_IDENTIFIER must be the test function identifier. TEST_NAME is + the same as a string. + + CHAR must be defined as the character type. */ + +#include +#include +#include +#include +#include +#include +#include + +typedef __typeof (TEST_IDENTIFIER) *proto_t; + +#define TEST_MAIN +#include "test-string.h" + +IMPL (TEST_IDENTIFIER, 1) + +static int +test_main (void) +{ + enum { buffer_length = 256 }; + TEST_VERIFY_EXIT (sysconf (_SC_PAGESIZE) >= buffer_length); + + test_init (); + + /* Buffer layout: There are a_count 'A' character followed by + zero_count null character, for a total of buffer_length + character: + + AAAAA...AAAAA 00000 ... 00000 (unmapped page follows) + \ / \ / + (a_count) (zero_count) + \___ (buffer_length) ___/ + ^ + | + start_offset + + The buffer length does not change, but a_count (and thus _zero) + and start_offset vary. + + If start_offset == buffer_length, only 0 is a valid length + argument. The result is 0. + + Otherwwise, if zero_count > 0 (if there a null characters in the + buffer), then any length argument is valid. If start_offset < + a_count (i.e., there is a non-null character at start_offset), the + result is the minimum of a_count - start_offset and the length + argument. Otherwise the result is 0. + + Otherwise, there are no null characters before the unmapped page. + The length argument must not be greater than buffer_length - + start_offset, and the result is the length argument. */ + + struct support_next_to_fault ntf + = support_next_to_fault_allocate (buffer_length * sizeof (CHAR)); + CHAR *buffer = (CHAR *) ntf.buffer; + + FOR_EACH_IMPL (impl, 0) + { + printf ("info: testing %s\n", impl->name); + for (size_t i = 0; i < buffer_length; ++i) + buffer[i] = 'A'; + + for (int zero_count = 0; zero_count <= buffer_length; ++zero_count) + { + if (zero_count > 0) + buffer[buffer_length - zero_count] = 0; + int a_count = buffer_length - zero_count; + for (int start_offset = 0; start_offset <= buffer_length; + ++start_offset) + { + CHAR *start_pointer = buffer + start_offset; + if (start_offset == buffer_length) + TEST_COMPARE (CALL (impl, buffer + start_offset, 0), 0); + else if (zero_count > 0) + for (int length_argument = 0; + length_argument <= 2 * buffer_length; + ++length_argument) + { + if (test_verbose) + printf ("zero_count=%d a_count=%d start_offset=%d" + " length_argument=%d\n", + zero_count, a_count, start_offset, + length_argument); + if (start_offset < a_count) + TEST_COMPARE (CALL (impl, start_pointer, length_argument), + MIN (a_count - start_offset, + length_argument)); + else + TEST_COMPARE (CALL (impl, start_pointer, length_argument), + 0); + } + else + for (int length_argument = 0; + length_argument <= buffer_length - start_offset; + ++length_argument) + TEST_COMPARE (CALL (impl, start_pointer, length_argument), + length_argument); + } + } + } + + support_next_to_fault_free (&ntf); + + return 0; +} + +#include diff --git a/string/test-strnlen-nonarray.c b/string/test-strnlen-nonarray.c new file mode 100644 index 0000000000..0ad05756d9 --- /dev/null +++ b/string/test-strnlen-nonarray.c @@ -0,0 +1,4 @@ +#define TEST_IDENTIFIER strnlen +#define TEST_NAME "strnlen" +typedef char CHAR; +#include "test-Xnlen-nonarray.c" diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile index 1cddd8cc6d..c51c9b4f1f 100644 --- a/wcsmbs/Makefile +++ b/wcsmbs/Makefile @@ -160,6 +160,7 @@ tests := \ test-wcsncmp \ test-wcsncpy \ test-wcsnlen \ + test-wcsnlen-nonarray \ test-wcspbrk \ test-wcsrchr \ test-wcsspn \ diff --git a/wcsmbs/test-wcsnlen-nonarray.c b/wcsmbs/test-wcsnlen-nonarray.c new file mode 100644 index 0000000000..a4b21fecd3 --- /dev/null +++ b/wcsmbs/test-wcsnlen-nonarray.c @@ -0,0 +1,5 @@ +#include +#define TEST_IDENTIFIER wcsnlen +#define TEST_NAME "wcsnlen" +typedef wchar_t CHAR; +#include "../string/test-Xnlen-nonarray.c" commit 46f19b234244f4654b9e3898ac1c27de86068222 Author: Florian Weimer Date: Thu Jun 27 16:26:56 2024 +0200 Enhanced test coverage for strncmp, wcsncmp Add string/test-strncmp-nonarray and wcsmbs/test-wcsncmp-nonarray. This is the test that uncovered bug 31934. Test run time is more than one minute on a fairly current system, so turn these into xtests that do not run automatically. Reviewed-by: Noah Goldstein (cherry picked from commit 54252394c25ddf0062e288d4a6ab7a885f8ae009) diff --git a/string/Makefile b/string/Makefile index 2e20fc00fd..1dff405c27 100644 --- a/string/Makefile +++ b/string/Makefile @@ -236,7 +236,10 @@ tests-unsupported += $(tests-translation) endif # This test allocates a lot of memory and can run for a long time. -xtests = tst-strcoll-overflow +xtests += tst-strcoll-overflow + +# This test runs for a long time. +xtests += test-strncmp-nonarray # This test needs libdl. ifeq (yes,$(build-shared)) diff --git a/string/test-Xncmp-nonarray.c b/string/test-Xncmp-nonarray.c new file mode 100644 index 0000000000..9f3a3ca75d --- /dev/null +++ b/string/test-Xncmp-nonarray.c @@ -0,0 +1,183 @@ +/* Test non-array inputs to string comparison functions. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This skeleton file is included from string/test-strncmp-nonarray.c and + wcsmbs/test-wcsncmp-nonarray.c to test that reading of the arrays stops + at the first null character. + + TEST_IDENTIFIER must be the test function identifier. TEST_NAME is + the same as a string. + + CHAR must be defined as the character type. */ + +#include +#include +#include +#include +#include +#include +#include + +/* Much shorter than test-Xnlen-nonarray.c because of deeply nested loops. */ +enum { buffer_length = 80 }; + +/* The test buffer layout follows what is described test-Xnlen-nonarray.c, + except that there two buffers, left and right. The variables + a_count, zero_count, start_offset are all duplicated. */ + +/* Return the maximum string length for a string that starts at + start_offset. */ +static int +string_length (int a_count, int start_offset) +{ + if (start_offset == buffer_length || start_offset >= a_count) + return 0; + else + return a_count - start_offset; +} + +/* This is the valid maximum length argument computation for + strnlen/wcsnlen. See text-Xnlen-nonarray.c. */ +static int +maximum_length (int start_offset, int zero_count) +{ + if (start_offset == buffer_length) + return 0; + else if (zero_count > 0) + /* Effectively unbounded, but we need to stop fairly low, + otherwise testing takes too long. */ + return buffer_length + 32; + else + return buffer_length - start_offset; +} + +typedef __typeof (TEST_IDENTIFIER) *proto_t; + +#define TEST_MAIN +#include "test-string.h" + +IMPL (TEST_IDENTIFIER, 1) + +static int +test_main (void) +{ + TEST_VERIFY_EXIT (sysconf (_SC_PAGESIZE) >= buffer_length); + test_init (); + + struct support_next_to_fault left_ntf + = support_next_to_fault_allocate (buffer_length * sizeof (CHAR)); + CHAR *left_buffer = (CHAR *) left_ntf.buffer; + struct support_next_to_fault right_ntf + = support_next_to_fault_allocate (buffer_length * sizeof (CHAR)); + CHAR *right_buffer = (CHAR *) right_ntf.buffer; + + FOR_EACH_IMPL (impl, 0) + { + printf ("info: testing %s\n", impl->name); + for (size_t i = 0; i < buffer_length; ++i) + left_buffer[i] = 'A'; + + for (int left_zero_count = 0; left_zero_count <= buffer_length; + ++left_zero_count) + { + if (left_zero_count > 0) + left_buffer[buffer_length - left_zero_count] = 0; + int left_a_count = buffer_length - left_zero_count; + for (size_t i = 0; i < buffer_length; ++i) + right_buffer[i] = 'A'; + for (int right_zero_count = 0; right_zero_count <= buffer_length; + ++right_zero_count) + { + if (right_zero_count > 0) + right_buffer[buffer_length - right_zero_count] = 0; + int right_a_count = buffer_length - right_zero_count; + for (int left_start_offset = 0; + left_start_offset <= buffer_length; + ++left_start_offset) + { + CHAR *left_start_pointer = left_buffer + left_start_offset; + int left_maxlen + = maximum_length (left_start_offset, left_zero_count); + int left_length + = string_length (left_a_count, left_start_offset); + for (int right_start_offset = 0; + right_start_offset <= buffer_length; + ++right_start_offset) + { + CHAR *right_start_pointer + = right_buffer + right_start_offset; + int right_maxlen + = maximum_length (right_start_offset, right_zero_count); + int right_length + = string_length (right_a_count, right_start_offset); + + /* Maximum length is modelled after strnlen/wcsnlen, + and must be valid for both pointer arguments at + the same time. */ + int maxlen = MIN (left_maxlen, right_maxlen); + + for (int length_argument = 0; length_argument <= maxlen; + ++length_argument) + { + if (test_verbose) + { + printf ("left: zero_count=%d" + " a_count=%d start_offset=%d\n", + left_zero_count, left_a_count, + left_start_offset); + printf ("right: zero_count=%d" + " a_count=%d start_offset=%d\n", + right_zero_count, right_a_count, + right_start_offset); + printf ("length argument: %d\n", + length_argument); + } + + /* Effective lengths bounded by length argument. + The effective length determines the + outcome of the comparison. */ + int left_effective + = MIN (left_length, length_argument); + int right_effective + = MIN (right_length, length_argument); + if (left_effective == right_effective) + TEST_COMPARE (CALL (impl, + left_start_pointer, + right_start_pointer, + length_argument), 0); + else if (left_effective < right_effective) + TEST_COMPARE (CALL (impl, + left_start_pointer, + right_start_pointer, + length_argument) < 0, 1); + else + TEST_COMPARE (CALL (impl, + left_start_pointer, + right_start_pointer, + length_argument) > 0, 1); + } + } + } + } + } + } + + return 0; +} + +#include diff --git a/string/test-strncmp-nonarray.c b/string/test-strncmp-nonarray.c new file mode 100644 index 0000000000..581e52d01b --- /dev/null +++ b/string/test-strncmp-nonarray.c @@ -0,0 +1,4 @@ +#define TEST_IDENTIFIER strncmp +#define TEST_NAME "strncmp" +typedef char CHAR; +#include "test-Xncmp-nonarray.c" diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile index c51c9b4f1f..63adf0e8ef 100644 --- a/wcsmbs/Makefile +++ b/wcsmbs/Makefile @@ -206,6 +206,10 @@ tests := \ wcsmbs-tst1 \ # tests +# This test runs for a long time. +xtests += test-wcsncmp-nonarray + + include ../Rules ifeq ($(run-built-tests),yes) diff --git a/wcsmbs/test-wcsncmp-nonarray.c b/wcsmbs/test-wcsncmp-nonarray.c new file mode 100644 index 0000000000..1ad9ebd8fd --- /dev/null +++ b/wcsmbs/test-wcsncmp-nonarray.c @@ -0,0 +1,5 @@ +#include +#define TEST_IDENTIFIER wcsncmp +#define TEST_NAME "wcsncmp" +typedef wchar_t CHAR; +#include "../string/test-Xncmp-nonarray.c" commit 3433a358428bcec2d203fa408b85f442c9a465ca Author: H.J. Lu Date: Wed Jul 24 14:05:13 2024 -0700 linux: Update the mremap C implementation [BZ #31968] Update the mremap C implementation to support the optional argument for MREMAP_DONTUNMAP added in Linux 5.7 since it may not always be correct to implement a variadic function as a non-variadic function on all Linux targets. Return MAP_FAILED and set errno to EINVAL for unknown flag bits. This fixes BZ #31968. Note: A test must be added when a new flag bit is introduced. Signed-off-by: H.J. Lu Reviewed-by: Adhemerval Zanella (cherry picked from commit 6c40cb0e9f893d49dc7caee580a055de53562206) diff --git a/NEWS b/NEWS index 6b62f55658..5948704ef9 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,7 @@ The following bugs are resolved with this release: [30081] resolv: Do not wait for non-existing second DNS response after error [31890] resolv: Allow short error responses to match any DNS query + [31968] mremap implementation in C does not handle arguments correctly Version 2.40 diff --git a/sysdeps/unix/sysv/linux/mremap.c b/sysdeps/unix/sysv/linux/mremap.c index 4f770799c4..1ada5c1f40 100644 --- a/sysdeps/unix/sysv/linux/mremap.c +++ b/sysdeps/unix/sysv/linux/mremap.c @@ -20,6 +20,12 @@ #include #include #include +#include + +#define MREMAP_KNOWN_BITS \ + (MREMAP_MAYMOVE \ + | MREMAP_FIXED \ + | MREMAP_DONTUNMAP) void * __mremap (void *addr, size_t old_len, size_t new_len, int flags, ...) @@ -27,7 +33,13 @@ __mremap (void *addr, size_t old_len, size_t new_len, int flags, ...) va_list va; void *new_addr = NULL; - if (flags & MREMAP_FIXED) + if (flags & ~(MREMAP_KNOWN_BITS)) + { + __set_errno (EINVAL); + return MAP_FAILED; + } + + if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) { va_start (va, flags); new_addr = va_arg (va, void *); commit 2eb2d78ca75d7432bd1d11d227f4c899554ea02e Author: H.J. Lu Date: Wed Jul 24 14:05:14 2024 -0700 mremap: Update manual entry Update mremap manual entry: 1. Change mremap to variadic. 2. Document MREMAP_FIXED and MREMAP_DONTUNMAP. Signed-off-by: H.J. Lu Reviewed-by: Adhemerval Zanella (cherry picked from commit cb2dee4eccf46642eef588bee64f9c875c408f1c) diff --git a/NEWS b/NEWS index 5948704ef9..8da0408ad9 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,7 @@ Version 2.40.1 The following bugs are resolved with this release: [30081] resolv: Do not wait for non-existing second DNS response after error + [31968] mremap implementation in C does not handle arguments correctly [31890] resolv: Allow short error responses to match any DNS query [31968] mremap implementation in C does not handle arguments correctly diff --git a/manual/llio.texi b/manual/llio.texi index 6f0a48609b..ea84196abd 100644 --- a/manual/llio.texi +++ b/manual/llio.texi @@ -1892,7 +1892,7 @@ There is no existing mapping in at least part of the given region. @end deftypefun -@deftypefun {void *} mremap (void *@var{address}, size_t @var{length}, size_t @var{new_length}, int @var{flag}) +@deftypefun {void *} mremap (void *@var{address}, size_t @var{length}, size_t @var{new_length}, int @var{flag}, ... /* void *@var{new_address} */) @standards{GNU, sys/mman.h} @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} @@ -1901,12 +1901,40 @@ area. @var{address} and @var{length} must cover a region entirely mapped in the same @code{mmap} statement. A new mapping with the same characteristics will be returned with the length @var{new_length}. -One option is possible, @code{MREMAP_MAYMOVE}. If it is given in -@var{flags}, the system may remove the existing mapping and create a new -one of the desired length in another location. +Possible flags are -The address of the resulting mapping is returned, or @math{-1}. Possible -error codes include: +@table @code + +@item MREMAP_MAYMOVE +If it is given in @var{flags}, the system may remove the existing mapping +and create a new one of the desired length in another location. + +@item MREMAP_FIXED +If it is given in @var{flags}, @code{mremap} accepts a fifth argument, +@code{void *new_address}, which specifies a page-aligned address to +which the mapping must be moved. Any previous mapping at the address +range specified by @var{new_address} and @var{new_size} is unmapped. + +@code{MREMAP_FIXED} must be used together with @code{MREMAP_MAYMOVE}. + +@item MREMAP_DONTUNMAP +If it is given in @var{flags}, @code{mremap} accepts a fifth argument, +@code{void *new_address}, which specifies a page-aligned address. Any +previous mapping at the address range specified by @var{new_address} and +@var{new_size} is unmapped. If @var{new_address} is @code{NULL}, the +kernel chooses the page-aligned address at which to create the mapping. +Otherwise, the kernel takes it as a hint about where to place the mapping. +The mapping at the address range specified by @var{old_address} and +@var{old_size} isn't unmapped. + +@code{MREMAP_DONTUNMAP} must be used together with @code{MREMAP_MAYMOVE}. +@var{old_size} must be the same as @var{new_size}. This flag bit is +Linux-specific. + +@end table + +The address of the resulting mapping is returned, or @code{MAP_FAILED}. +Possible error codes include: @table @code @@ -1915,7 +1943,7 @@ There is no existing mapping in at least part of the original region, or the region covers two or more distinct mappings. @item EINVAL -The address given is misaligned or inappropriate. +Any arguments are inappropriate, including unknown @var{flags} values. @item EAGAIN The region has pages locked, and if extended it would exceed the commit ca53bc68ab92ab413a7d53e951227d380a85ebd8 Author: H.J. Lu Date: Wed Jul 24 14:05:15 2024 -0700 Add mremap tests Add tests for MREMAP_MAYMOVE and MREMAP_FIXED. On Linux, also test MREMAP_DONTUNMAP. Signed-off-by: H.J. Lu Reviewed-by: Adhemerval Zanella (cherry picked from commit ff0320bec2810192d453c579623482fab87bfa01) diff --git a/misc/Makefile b/misc/Makefile index 5d17c562fe..7b7f8351bf 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -257,6 +257,8 @@ tests := \ tst-mntent-blank-passno \ tst-mntent-escape \ tst-mntent2 \ + tst-mremap1 \ + tst-mremap2 \ tst-preadvwritev \ tst-preadvwritev2 \ tst-preadvwritev64 \ diff --git a/misc/tst-mremap1.c b/misc/tst-mremap1.c new file mode 100644 index 0000000000..0469991a6c --- /dev/null +++ b/misc/tst-mremap1.c @@ -0,0 +1,46 @@ +/* Test mremap with MREMAP_MAYMOVE. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + size_t old_size = getpagesize (); + char *old_addr = xmmap (NULL, old_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + old_addr[0] = 1; + old_addr[old_size - 1] = 2; + + /* Test MREMAP_MAYMOVE. */ + size_t new_size = old_size + old_size; + char *new_addr = mremap (old_addr, old_size, new_size, MREMAP_MAYMOVE); + TEST_VERIFY_EXIT (new_addr != MAP_FAILED); + new_addr[0] = 1; + new_addr[new_size - 1] = 2; + xmunmap (new_addr, new_size); + + return 0; +} + +#include diff --git a/misc/tst-mremap2.c b/misc/tst-mremap2.c new file mode 100644 index 0000000000..45be7f0369 --- /dev/null +++ b/misc/tst-mremap2.c @@ -0,0 +1,54 @@ +/* Test mremap with MREMAP_FIXED. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + size_t old_size = getpagesize (); + size_t new_size = old_size + old_size; + char *old_addr = xmmap (NULL, old_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + old_addr[0] = 1; + old_addr[old_size - 1] = 2; + + char *fixed_addr = xmmap (NULL, new_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + fixed_addr[0] = 1; + fixed_addr[new_size - 1] = 2; + + /* Test MREMAP_FIXED. */ + char *new_addr = mremap (old_addr, old_size, new_size, + MREMAP_FIXED | MREMAP_MAYMOVE, + fixed_addr); + if (new_addr == MAP_FAILED) + return mremap_failure_exit (errno); + new_addr[0] = 1; + new_addr[new_size - 1] = 2; + xmunmap (new_addr, new_size); + + return 0; +} + +#include diff --git a/sysdeps/generic/mremap-failure.h b/sysdeps/generic/mremap-failure.h new file mode 100644 index 0000000000..bc0d476368 --- /dev/null +++ b/sysdeps/generic/mremap-failure.h @@ -0,0 +1,25 @@ +/* mremap failure handling. Generic version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Return exit value on mremap failure with errno ERR. */ + +static int +mremap_failure_exit (int err) +{ + return EXIT_FAILURE; +} diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 097b5a26fc..59998c7af4 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -206,6 +206,7 @@ tests += \ tst-getauxval \ tst-gettid \ tst-gettid-kill \ + tst-linux-mremap1 \ tst-memfd_create \ tst-misalign-clone \ tst-mlock2 \ diff --git a/sysdeps/unix/sysv/linux/mremap-failure.h b/sysdeps/unix/sysv/linux/mremap-failure.h new file mode 100644 index 0000000000..c99ab30ca9 --- /dev/null +++ b/sysdeps/unix/sysv/linux/mremap-failure.h @@ -0,0 +1,30 @@ +/* mremap failure handling. Linux version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* Return exit value on mremap failure with errno ERR. */ + +static int +mremap_failure_exit (int err) +{ + if (err != EINVAL) + return EXIT_FAILURE; + + return EXIT_UNSUPPORTED; +} diff --git a/sysdeps/unix/sysv/linux/tst-linux-mremap1.c b/sysdeps/unix/sysv/linux/tst-linux-mremap1.c new file mode 100644 index 0000000000..408e8af2ab --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-linux-mremap1.c @@ -0,0 +1,63 @@ +/* Test mremap with MREMAP_DONTUNMAP. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + size_t old_size = getpagesize (); + size_t new_size = old_size; + char *old_addr = xmmap (NULL, old_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + old_addr[0] = 1; + old_addr[old_size - 1] = 2; + + /* Create an available 64-page mmap region. */ + size_t fixed_size = old_size * 64; + char *fixed_addr = xmmap (NULL, fixed_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + xmunmap (fixed_addr, fixed_size); + + /* Add 3 * pagesize. */ + fixed_size += 3 * old_size; + + /* Test MREMAP_DONTUNMAP. It should return FIXED_ADDR created above. */ + char *new_addr = mremap (old_addr, old_size, new_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE, + fixed_addr); + if (new_addr == MAP_FAILED) + return mremap_failure_exit (errno); + TEST_VERIFY_EXIT (fixed_addr == new_addr); + old_addr[0] = 3; + old_addr[old_size - 1] = 4; + new_addr[0] = 1; + new_addr[new_size - 1] = 2; + xmunmap (new_addr, new_size); + xmunmap (old_addr, old_size); + + return 0; +} + +#include commit 928769737c8f0cbb51dfa2a02de0594c2395d802 Author: Florian Weimer Date: Thu Aug 1 10:46:10 2024 +0200 resolv: Fix tst-resolv-short-response for older GCC (bug 32042) Previous GCC versions do not support the C23 change that allows labels on declarations. Reviewed-by: Adhemerval Zanella (cherry picked from commit ec119972cb2598c04ec7d4219e20506006836f64) diff --git a/resolv/tst-resolv-short-response.c b/resolv/tst-resolv-short-response.c index be354ae1c7..9b06b0c176 100644 --- a/resolv/tst-resolv-short-response.c +++ b/resolv/tst-resolv-short-response.c @@ -33,8 +33,10 @@ response (const struct resolv_response_context *ctx, { case 0: /* First server times out. */ - struct resolv_response_flags flags = {.rcode = rcode}; - resolv_response_init (b, flags); + { + struct resolv_response_flags flags = {.rcode = rcode}; + resolv_response_init (b, flags); + } break; case 1: /* Second server sends reply. */ commit aa533d58ff12e27771d9c960a727d74992a3f2a3 Author: Florian Weimer Date: Fri Aug 2 15:22:14 2024 +0200 x86: Tunables may incorrectly set Prefer_PMINUB_for_stringop (bug 32047) Fixes commit 5bcf6265f215326d14dfacdce8532792c2c7f8f8 ("x86: Disable non-temporal memset on Skylake Server"). Reviewed-by: Noah Goldstein (cherry picked from commit 7a630f7d3392ca391a399486ce2846f9e4b4ee63) diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c index ae9dcd6180..ccc6b64dc2 100644 --- a/sysdeps/x86/cpu-tunables.c +++ b/sysdeps/x86/cpu-tunables.c @@ -248,6 +248,7 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Avoid_Non_Temporal_Memset, 25); } + break; case 26: { CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH commit 6a97e2ba144a554809161d488f25e4bae07c9405 Author: Andreas Schwab Date: Mon Aug 5 10:55:51 2024 +0200 Fix name space violation in fortify wrappers (bug 32052) Rename the identifier sz to __sz everywhere. Fixes: a643f60c53 ("Make sure that the fortified function conditionals are constant") (cherry picked from commit 39ca997ab378990d5ac1aadbaa52aaf1db6d526f) diff --git a/NEWS b/NEWS index 8da0408ad9..b9eb4a558b 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,7 @@ The following bugs are resolved with this release: [31968] mremap implementation in C does not handle arguments correctly [31890] resolv: Allow short error responses to match any DNS query [31968] mremap implementation in C does not handle arguments correctly + [32052] Name space violation in fortify wrappers Version 2.40 diff --git a/libio/bits/stdio2.h b/libio/bits/stdio2.h index 91a80dd7c6..4b8bc35bdf 100644 --- a/libio/bits/stdio2.h +++ b/libio/bits/stdio2.h @@ -308,14 +308,14 @@ fgets (__fortify_clang_overload_arg (char *, __restrict, __s), int __n, "fgets called with bigger size than length of " "destination buffer") { - size_t sz = __glibc_objsize (__s); - if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) + size_t __sz = __glibc_objsize (__s); + if (__glibc_safe_or_unknown_len (__n, sizeof (char), __sz)) return __fgets_alias (__s, __n, __stream); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (char), sz)) - return __fgets_chk_warn (__s, sz, __n, __stream); + if (__glibc_unsafe_len (__n, sizeof (char), __sz)) + return __fgets_chk_warn (__s, __sz, __n, __stream); #endif - return __fgets_chk (__s, sz, __n, __stream); + return __fgets_chk (__s, __sz, __n, __stream); } __fortify_function __wur __nonnull ((4)) __attribute_overloadable__ size_t @@ -326,14 +326,14 @@ fread (__fortify_clang_overload_arg (void *, __restrict, __ptr), "fread called with bigger size * n than length " "of destination buffer") { - size_t sz = __glibc_objsize0 (__ptr); - if (__glibc_safe_or_unknown_len (__n, __size, sz)) + size_t __sz = __glibc_objsize0 (__ptr); + if (__glibc_safe_or_unknown_len (__n, __size, __sz)) return __fread_alias (__ptr, __size, __n, __stream); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, __size, sz)) - return __fread_chk_warn (__ptr, sz, __size, __n, __stream); + if (__glibc_unsafe_len (__n, __size, __sz)) + return __fread_chk_warn (__ptr, __sz, __size, __n, __stream); #endif - return __fread_chk (__ptr, sz, __size, __n, __stream); + return __fread_chk (__ptr, __sz, __size, __n, __stream); } #ifdef __USE_GNU @@ -345,14 +345,14 @@ fgets_unlocked (__fortify_clang_overload_arg (char *, __restrict, __s), "fgets called with bigger size than length of " "destination buffer") { - size_t sz = __glibc_objsize (__s); - if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) + size_t __sz = __glibc_objsize (__s); + if (__glibc_safe_or_unknown_len (__n, sizeof (char), __sz)) return __fgets_unlocked_alias (__s, __n, __stream); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (char), sz)) - return __fgets_unlocked_chk_warn (__s, sz, __n, __stream); + if (__glibc_unsafe_len (__n, sizeof (char), __sz)) + return __fgets_unlocked_chk_warn (__s, __sz, __n, __stream); #endif - return __fgets_unlocked_chk (__s, sz, __n, __stream); + return __fgets_unlocked_chk (__s, __sz, __n, __stream); } #endif @@ -366,8 +366,8 @@ fread_unlocked (__fortify_clang_overload_arg0 (void *, __restrict, __ptr), "fread_unlocked called with bigger size * n than " "length of destination buffer") { - size_t sz = __glibc_objsize0 (__ptr); - if (__glibc_safe_or_unknown_len (__n, __size, sz)) + size_t __sz = __glibc_objsize0 (__ptr); + if (__glibc_safe_or_unknown_len (__n, __size, __sz)) { # ifdef __USE_EXTERN_INLINES if (__builtin_constant_p (__size) @@ -393,10 +393,10 @@ fread_unlocked (__fortify_clang_overload_arg0 (void *, __restrict, __ptr), return __fread_unlocked_alias (__ptr, __size, __n, __stream); } # if !__fortify_use_clang - if (__glibc_unsafe_len (__n, __size, sz)) - return __fread_unlocked_chk_warn (__ptr, sz, __size, __n, __stream); + if (__glibc_unsafe_len (__n, __size, __sz)) + return __fread_unlocked_chk_warn (__ptr, __sz, __size, __n, __stream); # endif - return __fread_unlocked_chk (__ptr, sz, __size, __n, __stream); + return __fread_unlocked_chk (__ptr, __sz, __size, __n, __stream); } #endif diff --git a/socket/bits/socket2.h b/socket/bits/socket2.h index 04780f320e..bd91647f37 100644 --- a/socket/bits/socket2.h +++ b/socket/bits/socket2.h @@ -37,14 +37,14 @@ recv (int __fd, __fortify_clang_overload_arg0 (void *, ,__buf), size_t __n, "recv called with bigger length than " "size of destination buffer") { - size_t sz = __glibc_objsize0 (__buf); - if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) + size_t __sz = __glibc_objsize0 (__buf); + if (__glibc_safe_or_unknown_len (__n, sizeof (char), __sz)) return __recv_alias (__fd, __buf, __n, __flags); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (char), sz)) - return __recv_chk_warn (__fd, __buf, __n, sz, __flags); + if (__glibc_unsafe_len (__n, sizeof (char), __sz)) + return __recv_chk_warn (__fd, __buf, __n, __sz, __flags); #endif - return __recv_chk (__fd, __buf, __n, sz, __flags); + return __recv_chk (__fd, __buf, __n, __sz, __flags); } extern ssize_t __recvfrom_chk (int __fd, void *__restrict __buf, size_t __n, @@ -71,13 +71,13 @@ recvfrom (int __fd, __fortify_clang_overload_arg0 (void *, __restrict, __buf), "recvfrom called with bigger length " "than size of destination buffer") { - size_t sz = __glibc_objsize0 (__buf); - if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) + size_t __sz = __glibc_objsize0 (__buf); + if (__glibc_safe_or_unknown_len (__n, sizeof (char), __sz)) return __recvfrom_alias (__fd, __buf, __n, __flags, __addr, __addr_len); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (char), sz)) - return __recvfrom_chk_warn (__fd, __buf, __n, sz, __flags, __addr, + if (__glibc_unsafe_len (__n, sizeof (char), __sz)) + return __recvfrom_chk_warn (__fd, __buf, __n, __sz, __flags, __addr, __addr_len); #endif - return __recvfrom_chk (__fd, __buf, __n, sz, __flags, __addr, __addr_len); + return __recvfrom_chk (__fd, __buf, __n, __sz, __flags, __addr, __addr_len); } diff --git a/stdlib/bits/stdlib.h b/stdlib/bits/stdlib.h index 1557b862b1..9c78ecf458 100644 --- a/stdlib/bits/stdlib.h +++ b/stdlib/bits/stdlib.h @@ -43,16 +43,16 @@ __NTH (realpath (const char *__restrict __name, "bytes long buffer") #endif { - size_t sz = __glibc_objsize (__resolved); + size_t __sz = __glibc_objsize (__resolved); - if (sz == (size_t) -1) + if (__sz == (size_t) -1) return __realpath_alias (__name, __resolved); #if !__fortify_use_clang && defined _LIBC_LIMITS_H_ && defined PATH_MAX - if (__glibc_unsafe_len (PATH_MAX, sizeof (char), sz)) - return __realpath_chk_warn (__name, __resolved, sz); + if (__glibc_unsafe_len (PATH_MAX, sizeof (char), __sz)) + return __realpath_chk_warn (__name, __resolved, __sz); #endif - return __realpath_chk (__name, __resolved, sz); + return __realpath_chk (__name, __resolved, __sz); } diff --git a/wcsmbs/bits/wchar2.h b/wcsmbs/bits/wchar2.h index 9fdff47ee2..43c6b63027 100644 --- a/wcsmbs/bits/wchar2.h +++ b/wcsmbs/bits/wchar2.h @@ -74,9 +74,9 @@ __fortify_function __attribute_overloadable__ wchar_t * __NTH (wcscpy (__fortify_clang_overload_arg (wchar_t *, __restrict, __dest), const wchar_t *__restrict __src)) { - size_t sz = __glibc_objsize (__dest); - if (sz != (size_t) -1) - return __wcscpy_chk (__dest, __src, sz / sizeof (wchar_t)); + size_t __sz = __glibc_objsize (__dest); + if (__sz != (size_t) -1) + return __wcscpy_chk (__dest, __src, __sz / sizeof (wchar_t)); return __wcscpy_alias (__dest, __src); } @@ -84,9 +84,9 @@ __fortify_function __attribute_overloadable__ wchar_t * __NTH (wcpcpy (__fortify_clang_overload_arg (wchar_t *, __restrict, __dest), const wchar_t *__restrict __src)) { - size_t sz = __glibc_objsize (__dest); - if (sz != (size_t) -1) - return __wcpcpy_chk (__dest, __src, sz / sizeof (wchar_t)); + size_t __sz = __glibc_objsize (__dest); + if (__sz != (size_t) -1) + return __wcpcpy_chk (__dest, __src, __sz / sizeof (wchar_t)); return __wcpcpy_alias (__dest, __src); } @@ -118,9 +118,9 @@ __fortify_function __attribute_overloadable__ wchar_t * __NTH (wcscat (__fortify_clang_overload_arg (wchar_t *, __restrict, __dest), const wchar_t *__restrict __src)) { - size_t sz = __glibc_objsize (__dest); - if (sz != (size_t) -1) - return __wcscat_chk (__dest, __src, sz / sizeof (wchar_t)); + size_t __sz = __glibc_objsize (__dest); + if (__sz != (size_t) -1) + return __wcscat_chk (__dest, __src, __sz / sizeof (wchar_t)); return __wcscat_alias (__dest, __src); } @@ -128,9 +128,9 @@ __fortify_function __attribute_overloadable__ wchar_t * __NTH (wcsncat (__fortify_clang_overload_arg (wchar_t *, __restrict, __dest), const wchar_t *__restrict __src, size_t __n)) { - size_t sz = __glibc_objsize (__dest); - if (sz != (size_t) -1) - return __wcsncat_chk (__dest, __src, __n, sz / sizeof (wchar_t)); + size_t __sz = __glibc_objsize (__dest); + if (__sz != (size_t) -1) + return __wcsncat_chk (__dest, __src, __n, __sz / sizeof (wchar_t)); return __wcsncat_alias (__dest, __src, __n); } @@ -170,10 +170,10 @@ __fortify_function int __NTH (swprintf (wchar_t *__restrict __s, size_t __n, const wchar_t *__restrict __fmt, ...)) { - size_t sz = __glibc_objsize (__s); - if (sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) + size_t __sz = __glibc_objsize (__s); + if (__sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) return __swprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, - sz / sizeof (wchar_t), __fmt, __va_arg_pack ()); + __sz / sizeof (wchar_t), __fmt, __va_arg_pack ()); return __swprintf_alias (__s, __n, __fmt, __va_arg_pack ()); } #elif __fortify_use_clang @@ -206,10 +206,10 @@ __fortify_function int __NTH (vswprintf (wchar_t *__restrict __s, size_t __n, const wchar_t *__restrict __fmt, __gnuc_va_list __ap)) { - size_t sz = __glibc_objsize (__s); - if (sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) + size_t __sz = __glibc_objsize (__s); + if (__sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) return __vswprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, - sz / sizeof (wchar_t), __fmt, __ap); + __sz / sizeof (wchar_t), __fmt, __ap); return __vswprintf_alias (__s, __n, __fmt, __ap); } @@ -257,14 +257,14 @@ fgetws (__fortify_clang_overload_arg (wchar_t *, __restrict, __s), int __n, "fgetws called with length bigger " "than size of destination buffer") { - size_t sz = __glibc_objsize (__s); - if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), sz)) + size_t __sz = __glibc_objsize (__s); + if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), __sz)) return __fgetws_alias (__s, __n, __stream); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (wchar_t), sz)) - return __fgetws_chk_warn (__s, sz / sizeof (wchar_t), __n, __stream); + if (__glibc_unsafe_len (__n, sizeof (wchar_t), __sz)) + return __fgetws_chk_warn (__s, __sz / sizeof (wchar_t), __n, __stream); #endif - return __fgetws_chk (__s, sz / sizeof (wchar_t), __n, __stream); + return __fgetws_chk (__s, __sz / sizeof (wchar_t), __n, __stream); } #ifdef __USE_GNU @@ -275,15 +275,15 @@ fgetws_unlocked (__fortify_clang_overload_arg (wchar_t *, __restrict, __s), "fgetws_unlocked called with length bigger " "than size of destination buffer") { - size_t sz = __glibc_objsize (__s); - if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), sz)) + size_t __sz = __glibc_objsize (__s); + if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), __sz)) return __fgetws_unlocked_alias (__s, __n, __stream); # if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (wchar_t), sz)) - return __fgetws_unlocked_chk_warn (__s, sz / sizeof (wchar_t), __n, + if (__glibc_unsafe_len (__n, sizeof (wchar_t), __sz)) + return __fgetws_unlocked_chk_warn (__s, __sz / sizeof (wchar_t), __n, __stream); # endif - return __fgetws_unlocked_chk (__s, sz / sizeof (wchar_t), __n, __stream); + return __fgetws_unlocked_chk (__s, __sz / sizeof (wchar_t), __n, __stream); } #endif commit 5641780762723156b0d20a0b9f7df1d76831bab0 Author: Arjun Shankar Date: Tue Jul 30 11:37:57 2024 +0200 manual/stdio: Further clarify putc, putwc, getc, and getwc This is a follow-up to 10de4a47ef3f481592e3c62eb07bcda23e9fde4d that reworded the manual entries for putc and putwc and removed any performance claims. This commit further clarifies these entries and brings getc and getwc in line with the descriptions of putc and putwc, removing any performance claims from them as well. Reviewed-by: Florian Weimer (cherry picked from commit 942670c81dc8071dd75d6213e771daa5d2084cb6) diff --git a/manual/stdio.texi b/manual/stdio.texi index f9529a098d..8517653507 100644 --- a/manual/stdio.texi +++ b/manual/stdio.texi @@ -904,20 +904,16 @@ This function is a GNU extension. @standards{ISO, stdio.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} This is just like @code{fputc}, except that it may be implemented as -a macro, making it faster. One consequence is that it may evaluate the -@var{stream} argument more than once, which is an exception to the -general rule for macros. Therefore, @var{stream} should never be an -expression with side-effects. +a macro and may evaluate the @var{stream} argument more than once. +Therefore, @var{stream} should never be an expression with side-effects. @end deftypefun @deftypefun wint_t putwc (wchar_t @var{wc}, FILE *@var{stream}) @standards{ISO, wchar.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} This is just like @code{fputwc}, except that it may be implemented as -a macro, making it faster. One consequence is that it may evaluate the -@var{stream} argument more than once, which is an exception to the -general rule for macros. Therefore, @var{stream} should never be an -expression with side-effects. +a macro and may evaluate the @var{stream} argument more than once. +Therefore, @var{stream} should never be an expression with side-effects. @end deftypefun @deftypefun int putc_unlocked (int @var{c}, FILE *@var{stream}) @@ -1110,20 +1106,17 @@ This function is a GNU extension. @deftypefun int getc (FILE *@var{stream}) @standards{ISO, stdio.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} -This is just like @code{fgetc}, except that it is permissible (and -typical) for it to be implemented as a macro that evaluates the -@var{stream} argument more than once. @code{getc} is often highly -optimized, so it is usually the best function to use to read a single -character. +This is just like @code{fgetc}, except that it may be implemented as +a macro and may evaluate the @var{stream} argument more than once. +Therefore, @var{stream} should never be an expression with side-effects. @end deftypefun @deftypefun wint_t getwc (FILE *@var{stream}) @standards{ISO, wchar.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} -This is just like @code{fgetwc}, except that it is permissible for it to -be implemented as a macro that evaluates the @var{stream} argument more -than once. @code{getwc} can be highly optimized, so it is usually the -best function to use to read a single wide character. +This is just like @code{fgetwc}, except that it may be implemented as +a macro and may evaluate the @var{stream} argument more than once. +Therefore, @var{stream} should never be an expression with side-effects. @end deftypefun @deftypefun int getc_unlocked (FILE *@var{stream}) commit 39ee60a719eb6ffde76ccc232d79273c473d7a93 Author: Florian Weimer Date: Fri Aug 9 17:01:17 2024 +0200 support: Add options list terminator to the test driver This avoids crashes if a test is passed unknown options. Reviewed-by: Adhemerval Zanella (cherry picked from commit c2a474f4617ede7a8bf56b7257acb37dc757b2d1) diff --git a/support/test-driver.c b/support/test-driver.c index f4c3e4d666..04ceebc08f 100644 --- a/support/test-driver.c +++ b/support/test-driver.c @@ -155,6 +155,7 @@ main (int argc, char **argv) { CMDLINE_OPTIONS TEST_DEFAULT_OPTIONS + { 0, } }; test_config.options = &options; #endif commit e3556937c2402b8acf2e586bf53168e9f482ba91 Author: H.J. Lu Date: Mon Jul 22 17:47:21 2024 -0700 x86-64: Remove sysdeps/x86_64/x32/dl-machine.h Remove sysdeps/x86_64/x32/dl-machine.h by folding x32 ARCH_LA_PLTENTER, ARCH_LA_PLTEXIT and RTLD_START into sysdeps/x86_64/dl-machine.h. There are no regressions on x86-64 nor x32. There are no changes in x86-64 _dl_start_user. On x32, _dl_start_user changes are <_dl_start_user>: mov %eax,%r12d + mov %esp,%r13d mov (%rsp),%edx mov %edx,%esi - mov %esp,%r13d and $0xfffffff0,%esp mov 0x0(%rip),%edi # <_dl_start_user+0x14> lea 0x8(%r13,%rdx,4),%ecx Signed-off-by: H.J. Lu Reviewed-by: Noah Goldstein (cherry picked from commit 652c6cf26927352fc0e37e4e60c6fc98ddf6d3b4) diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index a6de3793e4..4f12955875 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -139,37 +139,37 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], .globl _start\n\ .globl _dl_start_user\n\ _start:\n\ - movq %rsp, %rdi\n\ + mov %" RSP_LP ", %" RDI_LP "\n\ call _dl_start\n\ _dl_start_user:\n\ # Save the user entry point address in %r12.\n\ - movq %rax, %r12\n\ + mov %" RAX_LP ", %" R12_LP "\n\ # Save %rsp value in %r13.\n\ - movq %rsp, %r13\n\ + mov %" RSP_LP ", % " R13_LP "\n\ "\ RTLD_START_ENABLE_X86_FEATURES \ "\ # Read the original argument count.\n\ - movq (%rsp), %rdx\n\ + mov (%rsp), %" RDX_LP "\n\ # Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\ # argc -> rsi\n\ - movq %rdx, %rsi\n\ + mov %" RDX_LP ", %" RSI_LP "\n\ # And align stack for the _dl_init call. \n\ - andq $-16, %rsp\n\ + and $-16, %" RSP_LP "\n\ # _dl_loaded -> rdi\n\ - movq _rtld_local(%rip), %rdi\n\ + mov _rtld_local(%rip), %" RDI_LP "\n\ # env -> rcx\n\ - leaq 16(%r13,%rdx,8), %rcx\n\ + lea 2*" LP_SIZE "(%r13,%rdx," LP_SIZE "), %" RCX_LP "\n\ # argv -> rdx\n\ - leaq 8(%r13), %rdx\n\ + lea " LP_SIZE "(%r13), %" RDX_LP "\n\ # Clear %rbp to mark outermost frame obviously even for constructors.\n\ xorl %ebp, %ebp\n\ # Call the function to run the initializers.\n\ call _dl_init\n\ # Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\ - leaq _dl_fini(%rip), %rdx\n\ + lea _dl_fini(%rip), %" RDX_LP "\n\ # And make sure %rsp points to argc stored on the stack.\n\ - movq %r13, %rsp\n\ + mov %" R13_LP ", %" RSP_LP "\n\ # Jump to the user's entry point.\n\ jmp *%r12\n\ .previous\n\ @@ -234,8 +234,13 @@ elf_machine_plt_value (struct link_map *map, const ElfW(Rela) *reloc, /* Names of the architecture-specific auditing callback functions. */ +#ifdef __LP64__ #define ARCH_LA_PLTENTER x86_64_gnu_pltenter #define ARCH_LA_PLTEXIT x86_64_gnu_pltexit +#else +#define ARCH_LA_PLTENTER x32_gnu_pltenter +#define ARCH_LA_PLTEXIT x32_gnu_pltexit +#endif #endif /* !dl_machine_h */ diff --git a/sysdeps/x86_64/x32/dl-machine.h b/sysdeps/x86_64/x32/dl-machine.h deleted file mode 100644 index c35cee9261..0000000000 --- a/sysdeps/x86_64/x32/dl-machine.h +++ /dev/null @@ -1,76 +0,0 @@ -/* Machine-dependent ELF dynamic relocation inline functions. x32 version. - Copyright (C) 2012-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* Must allow to be included more than once. - See #ifdef RESOLVE_MAP in sysdeps/x86_64/dl-machine.h. */ -#include - -#ifndef _X32_DL_MACHINE_H -#define _X32_DL_MACHINE_H - -#undef ARCH_LA_PLTENTER -#undef ARCH_LA_PLTEXIT -#undef RTLD_START - -/* Names of the architecture-specific auditing callback functions. */ -#define ARCH_LA_PLTENTER x32_gnu_pltenter -#define ARCH_LA_PLTEXIT x32_gnu_pltexit - -/* Initial entry point code for the dynamic linker. - The C function `_dl_start' is the real entry point; - its return value is the user program's entry point. */ -#define RTLD_START asm ("\n\ -.text\n\ - .p2align 4\n\ -.globl _start\n\ -.globl _dl_start_user\n\ -_start:\n\ - movl %esp, %edi\n\ - call _dl_start\n\ -_dl_start_user:\n\ - # Save the user entry point address in %r12.\n\ - movl %eax, %r12d\n\ - # Read the original argument count.\n\ - movl (%rsp), %edx\n\ - # Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\ - # argc -> rsi\n\ - movl %edx, %esi\n\ - # Save %rsp value in %r13.\n\ - movl %esp, %r13d\n\ - # And align stack for the _dl_init call.\n\ - and $-16, %esp\n\ - # _dl_loaded -> rdi\n\ - movl _rtld_local(%rip), %edi\n\ - # env -> rcx\n\ - lea 8(%r13,%rdx,4), %ecx\n\ - # argv -> rdx\n\ - lea 4(%r13), %edx\n\ - # Clear %rbp to mark outermost frame obviously even for constructors.\n\ - xorl %ebp, %ebp\n\ - # Call the function to run the initializers.\n\ - call _dl_init\n\ - # Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\ - lea _dl_fini(%rip), %edx\n\ - # And make sure %rsp points to argc stored on the stack.\n\ - movl %r13d, %esp\n\ - # Jump to the user's entry point.\n\ - jmp *%r12\n\ -.previous\n\ -"); - -#endif /* !_X32_DL_MACHINE_H */ commit 898f25e0b1026be5701d05715600be646e9d0714 Author: H.J. Lu Date: Mon Jul 22 17:47:22 2024 -0700 x32/cet: Support shadow stack during startup for Linux 6.10 Use RXX_LP in RTLD_START_ENABLE_X86_FEATURES. Support shadow stack during startup for Linux 6.10: commit 2883f01ec37dd8668e7222dfdb5980c86fdfe277 Author: H.J. Lu Date: Fri Mar 15 07:04:33 2024 -0700 x86/shstk: Enable shadow stacks for x32 1. Add shadow stack support to x32 signal. 2. Use the 64-bit map_shadow_stack syscall for x32. 3. Set up shadow stack for x32. Add the map_shadow_stack system call to and regenerate arch-syscall.h. Tested on Intel Tiger Lake with CET enabled x32. There are no regressions with CET enabled x86-64. There are no changes in CET enabled x86-64 _dl_start_user. Signed-off-by: H.J. Lu Reviewed-by: Noah Goldstein (cherry picked from commit 8344c1f5514b1b5b1c8c6e48f4b802653bd23b71) diff --git a/sysdeps/unix/sysv/linux/x86_64/dl-cet.h b/sysdeps/unix/sysv/linux/x86_64/dl-cet.h index 1fe3133406..b4f7e6c9cd 100644 --- a/sysdeps/unix/sysv/linux/x86_64/dl-cet.h +++ b/sysdeps/unix/sysv/linux/x86_64/dl-cet.h @@ -92,9 +92,9 @@ dl_cet_ibt_enabled (void) # Pass GL(dl_x86_feature_1) to _dl_cet_setup_features.\n\ movl %edx, %edi\n\ # Align stack for the _dl_cet_setup_features call.\n\ - andq $-16, %rsp\n\ + and $-16, %" RSP_LP "\n\ call _dl_cet_setup_features\n\ # Restore %rax and %rsp from %r12 and %r13.\n\ - movq %r12, %rax\n\ - movq %r13, %rsp\n\ + mov %" R12_LP ", %" RAX_LP "\n\ + mov %" R13_LP ", %" RSP_LP "\n\ " diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h index 3040a47d72..df3e22236d 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h @@ -155,6 +155,7 @@ #define __NR_lsm_set_self_attr 1073742284 #define __NR_lstat 1073741830 #define __NR_madvise 1073741852 +#define __NR_map_shadow_stack 1073742277 #define __NR_mbind 1073742061 #define __NR_membarrier 1073742148 #define __NR_memfd_create 1073742143 diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/fixup-asm-unistd.h b/sysdeps/unix/sysv/linux/x86_64/x32/fixup-asm-unistd.h index 98124169e6..47fa8af4ce 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/fixup-asm-unistd.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/fixup-asm-unistd.h @@ -15,6 +15,10 @@ License along with the GNU C Library; if not, see . */ +#ifndef __NR_map_shadow_stack +# define __NR_map_shadow_stack 1073742277 +#endif + /* X32 uses the same 64-bit syscall interface for set_thread_area. */ #ifndef __NR_set_thread_area # define __NR_set_thread_area 1073742029 commit c0af0c2ba016030c778bc22c55f5f9498f96b8b9 Author: Noah Goldstein Date: Tue Aug 13 23:29:14 2024 +0800 x86: Fix bug in strchrnul-evex512 [BZ #32078] Issue was we were expecting not matches with CHAR before the start of the string in the page cross case. The check code in the page cross case: ``` and $0xffffffffffffffc0,%rax vmovdqa64 (%rax),%zmm17 vpcmpneqb %zmm17,%zmm16,%k1 vptestmb %zmm17,%zmm17,%k0{%k1} kmovq %k0,%rax inc %rax shr %cl,%rax je L(continue) ``` expects that all characters that neither match null nor CHAR will be 1s in `rax` prior to the `inc`. Then the `inc` will overflow all of the 1s where no relevant match was found. This is incorrect in the page-cross case, as the `vmovdqa64 (%rax),%zmm17` loads from before the start of the input string. If there are matches with CHAR before the start of the string, `rax` won't properly overflow. The fix is quite simple. Just replace: ``` inc %rax shr %cl,%rax ``` With: ``` sar %cl,%rax inc %rax ``` The arithmetic shift will clear any matches prior to the start of the string while maintaining the signbit so the 1s can properly overflow to zero in the case of no matches. Reviewed-by: H.J. Lu (cherry picked from commit 7da08862471dfec6fdae731c2a5f351ad485c71f) diff --git a/string/test-strchr.c b/string/test-strchr.c index c795eac6fa..72b17af687 100644 --- a/string/test-strchr.c +++ b/string/test-strchr.c @@ -255,6 +255,69 @@ check1 (void) check_result (impl, s, c, exp_result); } +static void +check2 (void) +{ + CHAR *s = (CHAR *) (buf1 + getpagesize () - 4 * sizeof (CHAR)); + CHAR *s_begin = (CHAR *) (buf1 + getpagesize () - 64); +#ifndef USE_FOR_STRCHRNUL + CHAR *exp_result = NULL; +#else + CHAR *exp_result = s + 1; +#endif + CHAR val = 0x12; + for (; s_begin != s; ++s_begin) + *s_begin = val; + + s[0] = val + 1; + s[1] = 0; + s[2] = val + 1; + s[3] = val + 1; + + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + s[3] = val; + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + exp_result = s; + s[0] = val; + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + + s[3] = val + 1; + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + + s[0] = val + 1; + s[1] = val + 1; + s[2] = val + 1; + s[3] = val + 1; + s[4] = val; + exp_result = s + 4; + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + s[4] = 0; +#ifndef USE_FOR_STRCHRNUL + exp_result = NULL; +#else + exp_result = s + 4; +#endif + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } +} + int test_main (void) { @@ -263,7 +326,7 @@ test_main (void) test_init (); check1 (); - + check2 (); printf ("%20s", ""); FOR_EACH_IMPL (impl, 0) printf ("\t%s", impl->name); diff --git a/sysdeps/x86_64/multiarch/strchr-evex-base.S b/sysdeps/x86_64/multiarch/strchr-evex-base.S index 04e2c0e79e..3a0b7c9d64 100644 --- a/sysdeps/x86_64/multiarch/strchr-evex-base.S +++ b/sysdeps/x86_64/multiarch/strchr-evex-base.S @@ -124,13 +124,13 @@ L(page_cross): VPCMPNE %VMM(1), %VMM(0), %k1 VPTEST %VMM(1), %VMM(1), %k0{%k1} KMOV %k0, %VRAX -# ifdef USE_AS_WCSCHR + sar %cl, %VRAX +#ifdef USE_AS_WCSCHR sub $VEC_MATCH_MASK, %VRAX -# else +#else inc %VRAX -# endif +#endif /* Ignore number of character for alignment adjustment. */ - shr %cl, %VRAX jz L(align_more) bsf %VRAX, %VRAX commit 586e4cd8c67df95ebb0bfecea3d007bff618d6d9 Author: Florian Weimer Date: Fri Aug 9 16:17:14 2024 +0200 Define __libc_initial for the static libc Reviewed-by: Adhemerval Zanella (cherry picked from commit eb0e50e9a1cf80a2ba6f33f990a08ef37a3267fb) diff --git a/include/libc-internal.h b/include/libc-internal.h index 87ac591835..1ef43ffe67 100644 --- a/include/libc-internal.h +++ b/include/libc-internal.h @@ -53,6 +53,9 @@ extern __typeof (__profile_frequency) __profile_frequency attribute_hidden; is not for an audit module, not loaded via dlmopen, and not loaded via static dlopen either). */ extern _Bool __libc_initial attribute_hidden; +#else +/* The static libc is always the initial namespace. */ +# define __libc_initial ((_Bool) 1) #endif #endif /* _LIBC_INTERNAL */ commit 709319f9de6f976f9feceb623cb2c96769e9d78b Author: Florian Weimer Date: Mon Aug 19 15:48:03 2024 +0200 string: strerror, strsignal cannot use buffer after dlmopen (bug 32026) Secondary namespaces have a different malloc. Allocating the buffer in one namespace and freeing it another results in heap corruption. Fix this by using a static string (potentially translated) in secondary namespaces. It would also be possible to use the malloc from the initial namespace to manage the buffer, but these functions would still not be safe to use in auditors etc. because a call to strerror could still free a buffer while it is used by the application. Another approach could use proper initial-exec TLS, duplicated in secondary namespaces, but that would need a callback interface for freeing libc resources in namespaces on thread exit, which does not exist today. Reviewed-by: Adhemerval Zanella (cherry picked from commit 25a5eb4010df94b412c67db9e346029de316d06b) diff --git a/NEWS b/NEWS index b9eb4a558b..10a125bc66 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,7 @@ The following bugs are resolved with this release: [31968] mremap implementation in C does not handle arguments correctly [31890] resolv: Allow short error responses to match any DNS query [31968] mremap implementation in C does not handle arguments correctly + [32026] strerror/strsignal TLS not handled correctly for secondary namespaces [32052] Name space violation in fortify wrappers Version 2.40 diff --git a/string/strerror_l.c b/string/strerror_l.c index 15cce261e6..70456e5bb4 100644 --- a/string/strerror_l.c +++ b/string/strerror_l.c @@ -20,7 +20,7 @@ #include #include #include - +#include static const char * translate (const char *str, locale_t loc) @@ -31,6 +31,12 @@ translate (const char *str, locale_t loc) return res; } +static char * +unknown_error (locale_t loc) +{ + return (char *) translate ("Unknown error", loc); +} + /* Return a string describing the errno code in ERRNUM. */ char * @@ -40,18 +46,25 @@ __strerror_l (int errnum, locale_t loc) char *err = (char *) __get_errlist (errnum); if (__glibc_unlikely (err == NULL)) { - struct tls_internal_t *tls_internal = __glibc_tls_internal (); - free (tls_internal->strerror_l_buf); - if (__asprintf (&tls_internal->strerror_l_buf, "%s%d", - translate ("Unknown error ", loc), errnum) > 0) - err = tls_internal->strerror_l_buf; - else + if (__libc_initial) { - /* The memory was freed above. */ - tls_internal->strerror_l_buf = NULL; - /* Provide a fallback translation. */ - err = (char *) translate ("Unknown error", loc); + struct tls_internal_t *tls_internal = __glibc_tls_internal (); + free (tls_internal->strerror_l_buf); + if (__asprintf (&tls_internal->strerror_l_buf, "%s%d", + translate ("Unknown error ", loc), errnum) > 0) + err = tls_internal->strerror_l_buf; + else + { + /* The memory was freed above. */ + tls_internal->strerror_l_buf = NULL; + /* Provide a fallback translation. */ + err = unknown_error (loc); + } } + else + /* Secondary namespaces use a different malloc, so cannot + participate in the buffer management. */ + err = unknown_error (loc); } else err = (char *) translate (err, loc); diff --git a/string/strsignal.c b/string/strsignal.c index 3114601564..d9b0365468 100644 --- a/string/strsignal.c +++ b/string/strsignal.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Return a string describing the meaning of the signal number SIGNUM. */ char * @@ -30,21 +31,28 @@ strsignal (int signum) if (desc != NULL) return _(desc); - struct tls_internal_t *tls_internal = __glibc_tls_internal (); - free (tls_internal->strsignal_buf); + if (__libc_initial) + { + struct tls_internal_t *tls_internal = __glibc_tls_internal (); + free (tls_internal->strsignal_buf); - int r; + int r; #ifdef SIGRTMIN - if (signum >= SIGRTMIN && signum <= SIGRTMAX) - r = __asprintf (&tls_internal->strsignal_buf, _("Real-time signal %d"), - signum - SIGRTMIN); - else + if (signum >= SIGRTMIN && signum <= SIGRTMAX) + r = __asprintf (&tls_internal->strsignal_buf, _("Real-time signal %d"), + signum - SIGRTMIN); + else #endif - r = __asprintf (&tls_internal->strsignal_buf, _("Unknown signal %d"), - signum); - - if (r == -1) - tls_internal->strsignal_buf = NULL; - - return tls_internal->strsignal_buf; + r = __asprintf (&tls_internal->strsignal_buf, _("Unknown signal %d"), + signum); + + if (r >= 0) + return tls_internal->strsignal_buf; + else + tls_internal->strsignal_buf = NULL; + } + /* Fall through on asprintf error, and for !__libc_initial: + secondary namespaces use a different malloc and cannot + participate in the buffer management. */ + return _("Unknown signal"); } commit bc240ba7c86a74bc64d4234feadb49bb8e36e1c9 Author: Maciej W. Rozycki Date: Fri Jul 26 13:21:34 2024 +0100 support: Add FAIL test failure helper Add a FAIL test failure helper analogous to FAIL_RET, that does not cause the current function to return, providing a standardized way to report a test failure with a message supplied while permitting the caller to continue executing, for further reporting, cleaning up, etc. Update existing test cases that provide a conflicting definition of FAIL by removing the local FAIL definition and then as follows: - tst-fortify-syslog: provide a meaningful message in addition to the file name already added by ; 'support_record_failure' is already called by 'support_print_failure_impl' invoked by the new FAIL test failure helper. - tst-ctype: no update to FAIL calls required, with the name of the file and the line number within of the failure site additionally included by the new FAIL test failure helper, and error counting plus count reporting upon test program termination also already provided by 'support_record_failure' and 'support_report_failure' respectively, called by 'support_print_failure_impl' and 'adjust_exit_status' also respectively. However in a number of places 'printf' is called and the error count adjusted by hand, so update these places to make use of FAIL instead. And last but not least adjust the final summary just to report completion, with any error count following as reported by the test driver. - test-tgmath2: no update to FAIL calls required, with the name of the file of the failure site additionally included by the new FAIL test failure helper. Also there is no need to track the return status by hand as any call to FAIL will eventually cause the test case to return an unsuccesful exit status regardless of the return status from the test function, via a call to 'adjust_exit_status' made by the test driver. Reviewed-by: DJ Delorie (cherry picked from commit 1b97a9f23bf605ca608162089c94187573fb2a9e) diff --git a/debug/tst-fortify-syslog.c b/debug/tst-fortify-syslog.c index a7ddbf7c6b..2712acf689 100644 --- a/debug/tst-fortify-syslog.c +++ b/debug/tst-fortify-syslog.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -46,18 +45,13 @@ handler (int sig) _exit (127); } -#define FAIL() \ - do { \ - printf ("Failure on line %d\n", __LINE__); \ - support_record_failure (); \ - } while (0) #define CHK_FAIL_START \ chk_fail_ok = 1; \ if (! setjmp (chk_fail_buf)) \ { #define CHK_FAIL_END \ chk_fail_ok = 0; \ - FAIL (); \ + FAIL ("not supposed to reach here"); \ } static void diff --git a/localedata/tst-ctype.c b/localedata/tst-ctype.c index 9de979a2d7..a23689719c 100644 --- a/localedata/tst-ctype.c +++ b/localedata/tst-ctype.c @@ -21,6 +21,8 @@ #include #include +#include + static const char lower[] = "abcdefghijklmnopqrstuvwxyz"; static const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; @@ -53,19 +55,11 @@ static struct classes #define nclasses (sizeof (classes) / sizeof (classes[0])) -#define FAIL(str, args...) \ - { \ - printf (" " str "\n", ##args); \ - ++errors; \ - } - - static int do_test (void) { const char *cp; const char *cp2; - int errors = 0; char *inpline = NULL; size_t inplinelen = 0; char *resline = NULL; @@ -394,11 +388,8 @@ punct = %04x alnum = %04x\n", { if (((__ctype_b[(unsigned int) *inp] & classes[n].mask) != 0) != (*resp != '0')) - { - printf (" is%s('%c' = '\\x%02x') %s true\n", inpline, - *inp, *inp, *resp == '1' ? "not" : "is"); - ++errors; - } + FAIL (" is%s('%c' = '\\x%02x') %s true\n", inpline, + *inp, *inp, *resp == '1' ? "not" : "is"); ++inp; ++resp; } @@ -408,11 +399,8 @@ punct = %04x alnum = %04x\n", while (*inp != '\0') { if (tolower (*inp) != *resp) - { - printf (" tolower('%c' = '\\x%02x') != '%c'\n", - *inp, *inp, *resp); - ++errors; - } + FAIL (" tolower('%c' = '\\x%02x') != '%c'\n", + *inp, *inp, *resp); ++inp; ++resp; } @@ -422,11 +410,8 @@ punct = %04x alnum = %04x\n", while (*inp != '\0') { if (toupper (*inp) != *resp) - { - printf (" toupper('%c' = '\\x%02x') != '%c'\n", - *inp, *inp, *resp); - ++errors; - } + FAIL (" toupper('%c' = '\\x%02x') != '%c'\n", + *inp, *inp, *resp); ++inp; ++resp; } @@ -436,14 +421,7 @@ punct = %04x alnum = %04x\n", } - if (errors != 0) - { - printf (" %d error%s for `%s' locale\n\n\n", errors, - errors == 1 ? "" : "s", setlocale (LC_ALL, NULL)); - return 1; - } - - printf (" No errors for `%s' locale\n\n\n", setlocale (LC_ALL, NULL)); + printf ("Completed testing for `%s' locale\n\n\n", setlocale (LC_ALL, NULL)); return 0; } diff --git a/math/test-tgmath2.c b/math/test-tgmath2.c index 37afa8a08a..4aeb877b8e 100644 --- a/math/test-tgmath2.c +++ b/math/test-tgmath2.c @@ -24,6 +24,8 @@ #include #include +#include + //#define DEBUG typedef complex float cfloat; @@ -87,13 +89,6 @@ enum int count; int counts[Tlast][C_last]; -#define FAIL(str) \ - do \ - { \ - printf ("%s failure on line %d\n", (str), __LINE__); \ - result = 1; \ - } \ - while (0) #define TEST_TYPE_ONLY(expr, rettype) \ do \ { \ @@ -133,8 +128,6 @@ int counts[Tlast][C_last]; int test_cos (const int Vint4, const long long int Vllong4) { - int result = 0; - TEST (cos (vfloat1), float, cos); TEST (cos (vdouble1), double, cos); TEST (cos (vldouble1), ldouble, cos); @@ -152,7 +145,7 @@ test_cos (const int Vint4, const long long int Vllong4) TEST (cos (Vcdouble1), cdouble, cos); TEST (cos (Vcldouble1), cldouble, cos); - return result; + return 0; } int diff --git a/support/check.h b/support/check.h index 711f34b83b..7ea22c7a2c 100644 --- a/support/check.h +++ b/support/check.h @@ -24,6 +24,11 @@ __BEGIN_DECLS +/* Record a test failure, print the failure message to standard output + and pass the result of 1 through. */ +#define FAIL(...) \ + support_print_failure_impl (__FILE__, __LINE__, __VA_ARGS__) + /* Record a test failure, print the failure message to standard output and return 1. */ #define FAIL_RET(...) \ commit 27fb563bfee521239316142fa4968282bffa86a6 Author: Maciej W. Rozycki Date: Fri Jul 26 13:21:34 2024 +0100 stdio-common: Add test for vfscanf with matches longer than INT_MAX [BZ #27650] Complement commit b03e4d7bd25b ("stdio: fix vfscanf with matches longer than INT_MAX (bug 27650)") and add a test case for the issue, inspired by the reproducer provided with the bug report. This has been verified to succeed as from the commit referred and fail beforehand. As the test requires 2GiB of data to be passed around its performance has been evaluated using a choice of systems and the execution time determined to be respectively in the range of 9s for POWER9@2.166GHz, 24s for FU740@1.2GHz, and 40s for 74Kf@950MHz. As this is on the verge of and beyond the default timeout it has been increased by the factor of 8. Regardless, following recent practice the test has been added to the standard rather than extended set. Reviewed-by: DJ Delorie (cherry picked from commit 89cddc8a7096f3d9225868304d2bc0a1aaf07d63) diff --git a/stdio-common/Makefile b/stdio-common/Makefile index a63c05a120..e4f0146d2c 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -240,6 +240,7 @@ tests := \ tst-scanf-binary-c23 \ tst-scanf-binary-gnu11 \ tst-scanf-binary-gnu89 \ + tst-scanf-bz27650 \ tst-scanf-intn \ tst-scanf-round \ tst-scanf-to_inpunct \ @@ -328,6 +329,7 @@ generated += \ tst-printf-fp-free.mtrace \ tst-printf-fp-leak-mem.out \ tst-printf-fp-leak.mtrace \ + tst-scanf-bz27650.mtrace \ tst-vfprintf-width-prec-mem.out \ tst-vfprintf-width-prec.mtrace \ # generated @@ -419,6 +421,9 @@ tst-printf-fp-free-ENV = \ tst-printf-fp-leak-ENV = \ MALLOC_TRACE=$(objpfx)tst-printf-fp-leak.mtrace \ LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so +tst-scanf-bz27650-ENV = \ + MALLOC_TRACE=$(objpfx)tst-scanf-bz27650.mtrace \ + LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so $(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc $(SHELL) $< $(common-objpfx) '$(test-program-prefix)'; \ diff --git a/stdio-common/tst-scanf-bz27650.c b/stdio-common/tst-scanf-bz27650.c new file mode 100644 index 0000000000..3a742bc865 --- /dev/null +++ b/stdio-common/tst-scanf-bz27650.c @@ -0,0 +1,108 @@ +/* Test for BZ #27650, formatted input matching beyond INT_MAX. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* Produce a stream of more than INT_MAX characters via buffer BUF of + size SIZE according to bookkeeping in COOKIE and then return EOF. */ + +static ssize_t +io_read (void *cookie, char *buf, size_t size) +{ + unsigned int *written = cookie; + unsigned int w = *written; + + if (w > INT_MAX) + return 0; + + memset (buf, 'a', size); + *written = w + size; + return size; +} + +/* Consume a stream of more than INT_MAX characters from an artificial + input stream of which none is the new line character. The call to + fscanf is supposed to complete upon the EOF condition of input, + however in the presence of BZ #27650 it will terminate prematurely + with characters still outstanding in input. Diagnose the condition + and return status accordingly. */ + +int +do_test (void) +{ + static cookie_io_functions_t io_funcs = { .read = io_read }; + unsigned int written = 0; + FILE *in; + int v; + + mtrace (); + + in = fopencookie (&written, "r", io_funcs); + if (in == NULL) + { + FAIL ("fopencookie: %m"); + goto out; + } + + v = fscanf (in, "%*[^\n]"); + if (ferror (in)) + { + FAIL ("fscanf: input failure, at %u: %m", written); + goto out_close; + } + else if (v == EOF) + { + FAIL ("fscanf: unexpected end of file, at %u", written); + goto out_close; + } + + if (!feof (in)) + { + v = fgetc (in); + if (ferror (in)) + FAIL ("fgetc: input failure: %m"); + else if (v == EOF) + FAIL ("fgetc: unexpected end of file after missing end of file"); + else if (v == '\n') + FAIL ("unexpected new line character received"); + else + FAIL ("character received after end of file expected: \\x%02x", v); + } + +out_close: + if (fclose (in) != 0) + FAIL ("fclose: %m"); + +out: + return EXIT_SUCCESS; +} + +#define TIMEOUT (DEFAULT_TIMEOUT * 8) +#include commit 2f749d2b15cbc82268d7f8735f21ae1e3b68754f Author: Siddhesh Poyarekar Date: Wed Aug 14 19:20:04 2024 -0400 Make tst-ungetc use libsupport Signed-off-by: Siddhesh Poyarekar Reviewed-by: Carlos O'Donell (cherry picked from commit 3f7df7e757f4efec38e45d4068e5492efcac4856) diff --git a/stdio-common/tst-ungetc.c b/stdio-common/tst-ungetc.c index 1344b2b591..5c808f0734 100644 --- a/stdio-common/tst-ungetc.c +++ b/stdio-common/tst-ungetc.c @@ -1,70 +1,72 @@ -/* Test for ungetc bugs. */ +/* Test for ungetc bugs. + Copyright (C) 1996-2024 Free Software Foundation, Inc. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ #include #include -#include - -#undef assert -#define assert(x) \ - if (!(x)) \ - { \ - fputs ("test failed: " #x "\n", stderr); \ - retval = 1; \ - goto the_end; \ - } +#include +#include +#include +#include +#include -int -main (int argc, char *argv[]) +static int +do_test (void) { - char name[] = "/tmp/tst-ungetc.XXXXXX"; + char *name = NULL; FILE *fp = NULL; - int retval = 0; int c; char buffer[64]; - int fd = mkstemp (name); + int fd = create_temp_file ("tst-ungetc.", &name); if (fd == -1) - { - printf ("mkstemp failed: %m\n"); - return 1; - } - close (fd); - fp = fopen (name, "w"); - assert (fp != NULL) - fputs ("bla", fp); - fclose (fp); - fp = NULL; + FAIL_EXIT1 ("cannot create temporary file: %m"); + xclose (fd); - fp = fopen (name, "r"); - assert (fp != NULL); - assert (ungetc ('z', fp) == 'z'); - assert (getc (fp) == 'z'); - assert (getc (fp) == 'b'); - assert (getc (fp) == 'l'); - assert (ungetc ('m', fp) == 'm'); - assert (getc (fp) == 'm'); - assert ((c = getc (fp)) == 'a'); - assert (getc (fp) == EOF); - assert (ungetc (c, fp) == c); - assert (feof (fp) == 0); - assert (getc (fp) == c); - assert (getc (fp) == EOF); - fclose (fp); - fp = NULL; + fp = xfopen (name, "w"); + fputs ("bla", fp); + xfclose (fp); - fp = fopen (name, "r"); - assert (fp != NULL); - assert (getc (fp) == 'b'); - assert (getc (fp) == 'l'); - assert (ungetc ('b', fp) == 'b'); - assert (fread (buffer, 1, 64, fp) == 2); - assert (buffer[0] == 'b'); - assert (buffer[1] == 'a'); + fp = xfopen (name, "r"); + TEST_VERIFY_EXIT (ungetc ('z', fp) == 'z'); + TEST_VERIFY_EXIT (getc (fp) == 'z'); + TEST_VERIFY_EXIT (getc (fp) == 'b'); + TEST_VERIFY_EXIT (getc (fp) == 'l'); + TEST_VERIFY_EXIT (ungetc ('m', fp) == 'm'); + TEST_VERIFY_EXIT (getc (fp) == 'm'); + TEST_VERIFY_EXIT ((c = getc (fp)) == 'a'); + TEST_VERIFY_EXIT (getc (fp) == EOF); + TEST_VERIFY_EXIT (ungetc (c, fp) == c); + TEST_VERIFY_EXIT (feof (fp) == 0); + TEST_VERIFY_EXIT (getc (fp) == c); + TEST_VERIFY_EXIT (getc (fp) == EOF); + xfclose (fp); -the_end: - if (fp != NULL) - fclose (fp); - unlink (name); + fp = xfopen (name, "r"); + TEST_VERIFY_EXIT (getc (fp) == 'b'); + TEST_VERIFY_EXIT (getc (fp) == 'l'); + TEST_VERIFY_EXIT (ungetc ('b', fp) == 'b'); + TEST_VERIFY_EXIT (fread (buffer, 1, 64, fp) == 2); + TEST_VERIFY_EXIT (buffer[0] == 'b'); + TEST_VERIFY_EXIT (buffer[1] == 'a'); + xfclose (fp); - return retval; + return 0; } + +#include commit dac7a0694b5e853f08be518cd5a133ac5804666d Author: Siddhesh Poyarekar Date: Tue Aug 13 21:00:06 2024 -0400 ungetc: Fix uninitialized read when putting into unused streams [BZ #27821] When ungetc is called on an unused stream, the backup buffer is allocated without the main get area being present. This results in every subsequent ungetc (as the stream remains in the backup area) checking uninitialized memory in the backup buffer when trying to put a character back into the stream. Avoid comparing the input character with buffer contents when in backup to avoid this uninitialized read. The uninitialized read is harmless in this context since the location is promptly overwritten with the input character, thus fulfilling ungetc functionality. Also adjust wording in the manual to drop the paragraph that says glibc cannot do multiple ungetc back to back since with this change, ungetc can actually do this. Signed-off-by: Siddhesh Poyarekar Reviewed-by: Carlos O'Donell (cherry picked from commit cdf0f88f97b0aaceb894cc02b21159d148d7065c) diff --git a/libio/genops.c b/libio/genops.c index 99f5e80f20..b012fa33d2 100644 --- a/libio/genops.c +++ b/libio/genops.c @@ -662,7 +662,7 @@ _IO_sputbackc (FILE *fp, int c) { int result; - if (fp->_IO_read_ptr > fp->_IO_read_base + if (fp->_IO_read_ptr > fp->_IO_read_base && !_IO_in_backup (fp) && (unsigned char)fp->_IO_read_ptr[-1] == (unsigned char)c) { fp->_IO_read_ptr--; diff --git a/manual/stdio.texi b/manual/stdio.texi index 8517653507..92614775fa 100644 --- a/manual/stdio.texi +++ b/manual/stdio.texi @@ -1467,11 +1467,9 @@ program; usually @code{ungetc} is used only to unread a character that was just read from the same stream. @Theglibc{} supports this even on files opened in binary mode, but other systems might not. -@Theglibc{} only supports one character of pushback---in other -words, it does not work to call @code{ungetc} twice without doing input -in between. Other systems might let you push back multiple characters; -then reading from the stream retrieves the characters in the reverse -order that they were pushed. +@Theglibc{} supports pushing back multiple characters; subsequently +reading from the stream retrieves the characters in the reverse order +that they were pushed. Pushing back characters doesn't alter the file; only the internal buffering for the stream is affected. If a file positioning function diff --git a/stdio-common/tst-ungetc.c b/stdio-common/tst-ungetc.c index 5c808f0734..388b202493 100644 --- a/stdio-common/tst-ungetc.c +++ b/stdio-common/tst-ungetc.c @@ -48,6 +48,8 @@ do_test (void) TEST_VERIFY_EXIT (getc (fp) == 'b'); TEST_VERIFY_EXIT (getc (fp) == 'l'); TEST_VERIFY_EXIT (ungetc ('m', fp) == 'm'); + TEST_VERIFY_EXIT (ungetc ('n', fp) == 'n'); + TEST_VERIFY_EXIT (getc (fp) == 'n'); TEST_VERIFY_EXIT (getc (fp) == 'm'); TEST_VERIFY_EXIT ((c = getc (fp)) == 'a'); TEST_VERIFY_EXIT (getc (fp) == EOF); commit e24902f409994f226dbc6fde2476009df452a18f Author: Siddhesh Poyarekar Date: Tue Aug 13 21:08:49 2024 -0400 ungetc: Fix backup buffer leak on program exit [BZ #27821] If a file descriptor is left unclosed and is cleaned up by _IO_cleanup on exit, its backup buffer remains unfreed, registering as a leak in valgrind. This is not strictly an issue since (1) the program should ideally be closing the stream once it's not in use and (2) the program is about to exit anyway, so keeping the backup buffer around a wee bit longer isn't a real problem. Free it anyway to keep valgrind happy when the streams in question are the standard ones, i.e. stdout, stdin or stderr. Also, the _IO_have_backup macro checks for _IO_save_base, which is a roundabout way to check for a backup buffer instead of directly looking for _IO_backup_base. The roundabout check breaks when the main get area has not been used and user pushes a char into the backup buffer with ungetc. Fix this to use the _IO_backup_base directly. Signed-off-by: Siddhesh Poyarekar Reviewed-by: Carlos O'Donell (cherry picked from commit 3e1d8d1d1dca24ae90df2ea826a8916896fc7e77) diff --git a/libio/genops.c b/libio/genops.c index b012fa33d2..35d8b30710 100644 --- a/libio/genops.c +++ b/libio/genops.c @@ -816,6 +816,12 @@ _IO_unbuffer_all (void) legacy = 1; #endif + /* Free up the backup area if it was ever allocated. */ + if (_IO_have_backup (fp)) + _IO_free_backup_area (fp); + if (fp->_mode > 0 && _IO_have_wbackup (fp)) + _IO_free_wbackup_area (fp); + if (! (fp->_flags & _IO_UNBUFFERED) /* Iff stream is un-orientated, it wasn't used. */ && (legacy || fp->_mode != 0)) diff --git a/libio/libioP.h b/libio/libioP.h index 1af287b19f..616253fcd0 100644 --- a/libio/libioP.h +++ b/libio/libioP.h @@ -577,8 +577,8 @@ extern void _IO_old_init (FILE *fp, int flags) __THROW; ((__fp)->_wide_data->_IO_write_base \ = (__fp)->_wide_data->_IO_write_ptr = __p, \ (__fp)->_wide_data->_IO_write_end = (__ep)) -#define _IO_have_backup(fp) ((fp)->_IO_save_base != NULL) -#define _IO_have_wbackup(fp) ((fp)->_wide_data->_IO_save_base != NULL) +#define _IO_have_backup(fp) ((fp)->_IO_backup_base != NULL) +#define _IO_have_wbackup(fp) ((fp)->_wide_data->_IO_backup_base != NULL) #define _IO_in_backup(fp) ((fp)->_flags & _IO_IN_BACKUP) #define _IO_have_markers(fp) ((fp)->_markers != NULL) #define _IO_blen(fp) ((fp)->_IO_buf_end - (fp)->_IO_buf_base) diff --git a/stdio-common/Makefile b/stdio-common/Makefile index e4f0146d2c..a91754f52d 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -254,6 +254,7 @@ tests := \ tst-swscanf \ tst-tmpnam \ tst-ungetc \ + tst-ungetc-leak \ tst-unlockedio \ tst-vfprintf-mbs-prec \ tst-vfprintf-user-type \ @@ -316,6 +317,7 @@ tests-special += \ $(objpfx)tst-printf-bz25691-mem.out \ $(objpfx)tst-printf-fp-free-mem.out \ $(objpfx)tst-printf-fp-leak-mem.out \ + $(objpfx)tst-ungetc-leak-mem.out \ $(objpfx)tst-vfprintf-width-prec-mem.out \ # tests-special @@ -330,6 +332,8 @@ generated += \ tst-printf-fp-leak-mem.out \ tst-printf-fp-leak.mtrace \ tst-scanf-bz27650.mtrace \ + tst-ungetc-leak-mem.out \ + tst-ungetc-leak.mtrace \ tst-vfprintf-width-prec-mem.out \ tst-vfprintf-width-prec.mtrace \ # generated @@ -424,6 +428,9 @@ tst-printf-fp-leak-ENV = \ tst-scanf-bz27650-ENV = \ MALLOC_TRACE=$(objpfx)tst-scanf-bz27650.mtrace \ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so +tst-ungetc-leak-ENV = \ + MALLOC_TRACE=$(objpfx)tst-ungetc-leak.mtrace \ + LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so $(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc $(SHELL) $< $(common-objpfx) '$(test-program-prefix)'; \ diff --git a/stdio-common/tst-ungetc-leak.c b/stdio-common/tst-ungetc-leak.c new file mode 100644 index 0000000000..6c5152b43f --- /dev/null +++ b/stdio-common/tst-ungetc-leak.c @@ -0,0 +1,32 @@ +/* Test for memory leak with ungetc when stream is unused. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + mtrace (); + TEST_COMPARE (ungetc('y', stdin), 'y'); + return 0; +} + +#include commit 3b3350d7baa8de70b71c02e964d5b8343749ebf8 Author: Maciej W. Rozycki Date: Fri Jul 26 13:21:34 2024 +0100 posix: Use facilities in tst-truncate and tst-truncate64 Remove local FAIL macro in favor to FAIL_RET from , which provides equivalent reporting, with the name of the file of the failure site additionally included, for the tst-truncate-common core shared between the tst-truncate and tst-truncate64 tests. Reviewed-by: DJ Delorie (cherry picked from commit fe47595504a55e7bb992f8928533df154b510383) diff --git a/posix/tst-truncate-common.c b/posix/tst-truncate-common.c index b774fa46b8..b8c561ffdb 100644 --- a/posix/tst-truncate-common.c +++ b/posix/tst-truncate-common.c @@ -21,6 +21,8 @@ #include #include +#include + static void do_prepare (void); #define PREPARE(argc, argv) do_prepare () static int do_test (void); @@ -42,9 +44,6 @@ do_prepare (void) } } -#define FAIL(str) \ - do { printf ("error: %s (line %d)\n", str, __LINE__); return 1; } while (0) - static int do_test_with_offset (off_t offset) { @@ -54,35 +53,35 @@ do_test_with_offset (off_t offset) memset (buf, 0xcf, sizeof (buf)); if (pwrite (temp_fd, buf, sizeof (buf), offset) != sizeof (buf)) - FAIL ("write failed"); + FAIL_RET ("write failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + sizeof (buf))) - FAIL ("initial size wrong"); + FAIL_RET ("initial size wrong"); if (ftruncate (temp_fd, offset + 800) < 0) - FAIL ("size reduction with ftruncate failed"); + FAIL_RET ("size reduction with ftruncate failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + 800)) - FAIL ("size after reduction with ftruncate is incorrect"); + FAIL_RET ("size after reduction with ftruncate is incorrect"); /* The following test covers more than POSIX. POSIX does not require that ftruncate() can increase the file size. But we are testing Unix systems. */ if (ftruncate (temp_fd, offset + 1200) < 0) - FAIL ("size increate with ftruncate failed"); + FAIL_RET ("size increate with ftruncate failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + 1200)) - FAIL ("size after increase is incorrect"); + FAIL_RET ("size after increase is incorrect"); if (truncate (temp_filename, offset + 800) < 0) - FAIL ("size reduction with truncate failed"); + FAIL_RET ("size reduction with truncate failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + 800)) - FAIL ("size after reduction with truncate incorrect"); + FAIL_RET ("size after reduction with truncate incorrect"); /* The following test covers more than POSIX. POSIX does not require that truncate() can increase the file size. But we are testing Unix systems. */ if (truncate (temp_filename, (offset + 1200)) < 0) - FAIL ("size increase with truncate failed"); + FAIL_RET ("size increase with truncate failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + 1200)) - FAIL ("size increase with truncate is incorrect"); + FAIL_RET ("size increase with truncate is incorrect"); return 0; } commit 3414b17e9db16c3bc0de5b49555c4f94f155ffc8 Author: Maciej W. Rozycki Date: Fri Jul 26 13:21:34 2024 +0100 nptl: Use facilities in tst-setuid3 Remove local FAIL macro in favor to FAIL_EXIT1 from , which provides equivalent reporting, with the name of the file and the line number within of the failure site additionally included. Remove FAIL_ERR altogether and include ": %m" explicitly with the format string supplied to FAIL_EXIT1 as there seems little value to have a separate macro just for this. Reviewed-by: DJ Delorie (cherry picked from commit 8c98195af6e6f1ce21743fc26c723e0f7e45bcf2) diff --git a/sysdeps/pthread/tst-setuid3.c b/sysdeps/pthread/tst-setuid3.c index 83f42a0ae5..3845ab03d3 100644 --- a/sysdeps/pthread/tst-setuid3.c +++ b/sysdeps/pthread/tst-setuid3.c @@ -15,24 +15,19 @@ License along with the GNU C Library; if not, see . */ -#include #include #include #include #include +#include + /* The test must run under a non-privileged user ID. */ static const uid_t test_uid = 1; static pthread_barrier_t barrier1; static pthread_barrier_t barrier2; -#define FAIL(fmt, ...) \ - do { printf ("FAIL: " fmt "\n", __VA_ARGS__); _exit (1); } while (0) - -#define FAIL_ERR(fmt, ...) \ - do { printf ("FAIL: " fmt ": %m\n", __VA_ARGS__); _exit (1); } while (0) - /* True if x is not a successful return code from pthread_barrier_wait. */ static inline bool is_invalid_barrier_ret (int x) @@ -45,10 +40,10 @@ thread_func (void *ctx __attribute__ ((unused))) { int ret = pthread_barrier_wait (&barrier1); if (is_invalid_barrier_ret (ret)) - FAIL ("pthread_barrier_wait (barrier1) (on thread): %d", ret); + FAIL_EXIT1 ("pthread_barrier_wait (barrier1) (on thread): %d", ret); ret = pthread_barrier_wait (&barrier2); if (is_invalid_barrier_ret (ret)) - FAIL ("pthread_barrier_wait (barrier2) (on thread): %d", ret); + FAIL_EXIT1 ("pthread_barrier_wait (barrier2) (on thread): %d", ret); return NULL; } @@ -59,13 +54,13 @@ setuid_failure (int phase) switch (ret) { case 0: - FAIL ("setuid succeeded unexpectedly in phase %d", phase); + FAIL_EXIT1 ("setuid succeeded unexpectedly in phase %d", phase); case -1: if (errno != EPERM) - FAIL_ERR ("setuid phase %d", phase); + FAIL_EXIT1 ("setuid phase %d: %m", phase); break; default: - FAIL ("invalid setuid return value in phase %d: %d", phase, ret); + FAIL_EXIT1 ("invalid setuid return value in phase %d: %d", phase, ret); } } @@ -74,42 +69,42 @@ do_test (void) { if (getuid () == 0) if (setuid (test_uid) != 0) - FAIL_ERR ("setuid (%u)", (unsigned) test_uid); + FAIL_EXIT1 ("setuid (%u): %m", (unsigned) test_uid); if (setuid (getuid ())) - FAIL_ERR ("setuid (%s)", "getuid ()"); + FAIL_EXIT1 ("setuid (%s): %m", "getuid ()"); setuid_failure (1); int ret = pthread_barrier_init (&barrier1, NULL, 2); if (ret != 0) - FAIL ("pthread_barrier_init (barrier1): %d", ret); + FAIL_EXIT1 ("pthread_barrier_init (barrier1): %d", ret); ret = pthread_barrier_init (&barrier2, NULL, 2); if (ret != 0) - FAIL ("pthread_barrier_init (barrier2): %d", ret); + FAIL_EXIT1 ("pthread_barrier_init (barrier2): %d", ret); pthread_t thread; ret = pthread_create (&thread, NULL, thread_func, NULL); if (ret != 0) - FAIL ("pthread_create: %d", ret); + FAIL_EXIT1 ("pthread_create: %d", ret); /* Ensure that the thread is running properly. */ ret = pthread_barrier_wait (&barrier1); if (is_invalid_barrier_ret (ret)) - FAIL ("pthread_barrier_wait (barrier1): %d", ret); + FAIL_EXIT1 ("pthread_barrier_wait (barrier1): %d", ret); setuid_failure (2); /* Check success case. */ if (setuid (getuid ()) != 0) - FAIL_ERR ("setuid (%s)", "getuid ()"); + FAIL_EXIT1 ("setuid (%s): %m", "getuid ()"); /* Shutdown. */ ret = pthread_barrier_wait (&barrier2); if (is_invalid_barrier_ret (ret)) - FAIL ("pthread_barrier_wait (barrier2): %d", ret); + FAIL_EXIT1 ("pthread_barrier_wait (barrier2): %d", ret); ret = pthread_join (thread, NULL); if (ret != 0) - FAIL ("pthread_join: %d", ret); + FAIL_EXIT1 ("pthread_join: %d", ret); return 0; } commit b7edcfa0f41ccfaeb665a661d3a6a594c8d95848 Author: Florian Weimer Date: Thu Aug 1 23:31:23 2024 +0200 elf: Clarify and invert second argument of _dl_allocate_tls_init Also remove an outdated comment: _dl_allocate_tls_init is called as part of pthread_create. Reviewed-by: Carlos O'Donell (cherry picked from commit fe06fb313bddf7e4530056897d4a706606e49377) diff --git a/elf/dl-tls.c b/elf/dl-tls.c index 3d221273f1..ecb966d282 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -552,9 +552,14 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) /* Allocate initial TLS. RESULT should be a non-NULL pointer to storage for the TLS space. The DTV may be resized, and so this function may call malloc to allocate that space. The loader's GL(dl_load_tls_lock) - is taken when manipulating global TLS-related data in the loader. */ + is taken when manipulating global TLS-related data in the loader. + + If MAIN_THREAD, this is the first call during process + initialization. In this case, TLS initialization for secondary + (audit) namespaces is skipped because that has already been handled + by dlopen. */ void * -_dl_allocate_tls_init (void *result, bool init_tls) +_dl_allocate_tls_init (void *result, bool main_thread) { if (result == NULL) /* The memory allocation failed. */ @@ -633,7 +638,7 @@ _dl_allocate_tls_init (void *result, bool init_tls) because it would already be set by the audit setup. However, subsequent thread creation would need to follow the default behaviour. */ - if (map->l_ns != LM_ID_BASE && !init_tls) + if (map->l_ns != LM_ID_BASE && main_thread) continue; memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size), '\0', @@ -661,7 +666,7 @@ _dl_allocate_tls (void *mem) { return _dl_allocate_tls_init (mem == NULL ? _dl_allocate_tls_storage () - : allocate_dtv (mem), true); + : allocate_dtv (mem), false); } rtld_hidden_def (_dl_allocate_tls) diff --git a/elf/rtld.c b/elf/rtld.c index bfdf632e77..09b9c9993b 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -2338,7 +2338,7 @@ dl_main (const ElfW(Phdr) *phdr, into the main thread's TLS area, which we allocated above. Note: thread-local variables must only be accessed after completing the next step. */ - _dl_allocate_tls_init (tcbp, false); + _dl_allocate_tls_init (tcbp, true); /* And finally install it for the main thread. */ if (! __rtld_tls_init_tp_called) diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index f35a8369bd..2cb562f8ea 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -139,7 +139,7 @@ get_cached_stack (size_t *sizep, void **memp) memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t)); /* Re-initialize the TLS. */ - _dl_allocate_tls_init (TLS_TPADJ (result), true); + _dl_allocate_tls_init (TLS_TPADJ (result), false); return result; } diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 656e8a3fa0..154efb0e19 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -1200,10 +1200,8 @@ extern void _dl_get_tls_static_info (size_t *sizep, size_t *alignp); extern void _dl_allocate_static_tls (struct link_map *map) attribute_hidden; -/* These are internal entry points to the two halves of _dl_allocate_tls, - only used within rtld.c itself at startup time. */ extern void *_dl_allocate_tls_storage (void) attribute_hidden; -extern void *_dl_allocate_tls_init (void *, bool); +extern void *_dl_allocate_tls_init (void *result, bool main_thread); rtld_hidden_proto (_dl_allocate_tls_init) /* True if the TCB has been set up. */ commit f496b750f135e57da921e975835c44bd199246dd Author: Florian Weimer Date: Thu Aug 1 23:31:30 2024 +0200 elf: Avoid re-initializing already allocated TLS in dlopen (bug 31717) The old code used l_init_called as an indicator for whether TLS initialization was complete. However, it is possible that TLS for an object is initialized, written to, and then dlopen for this object is called again, and l_init_called is not true at this point. Previously, this resulted in TLS being initialized twice, discarding any interim writes (technically introducing a use-after-free bug even). This commit introduces an explicit per-object flag, l_tls_in_slotinfo. It indicates whether _dl_add_to_slotinfo has been called for this object. This flag is used to avoid double-initialization of TLS. In update_tls_slotinfo, the first_static_tls micro-optimization is removed because preserving the initalization flag for subsequent use by the second loop for static TLS is a bit complicated, and another per-object flag does not seem to be worth it. Furthermore, the l_init_called flag is dropped from the second loop (for static TLS initialization) because l_need_tls_init on its own prevents double-initialization. The remaining l_init_called usage in resize_scopes and update_scopes is just an optimization due to the use of scope_has_map, so it is not changed in this commit. The isupper check ensures that libc.so.6 is TLS is not reverted. Such a revert happens if l_need_tls_init is not cleared in _dl_allocate_tls_init for the main_thread case, now that l_init_called is not checked anymore in update_tls_slotinfo in elf/dl-open.c. Reported-by: Jonathon Anderson Reviewed-by: Carlos O'Donell (cherry picked from commit 5097cd344fd243fb8deb6dec96e8073753f962f9) diff --git a/NEWS b/NEWS index 10a125bc66..5b20efbf6c 100644 --- a/NEWS +++ b/NEWS @@ -10,7 +10,7 @@ Version 2.40.1 The following bugs are resolved with this release: [30081] resolv: Do not wait for non-existing second DNS response after error - [31968] mremap implementation in C does not handle arguments correctly + [31717] elf: Avoid re-initializing already allocated TLS in dlopen [31890] resolv: Allow short error responses to match any DNS query [31968] mremap implementation in C does not handle arguments correctly [32026] strerror/strsignal TLS not handled correctly for secondary namespaces diff --git a/elf/Makefile b/elf/Makefile index a3475f3fb5..a03c6520d8 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -416,6 +416,10 @@ tests += \ tst-dlmopen4 \ tst-dlopen-self \ tst-dlopen-tlsmodid \ + tst-dlopen-tlsreinit1 \ + tst-dlopen-tlsreinit2 \ + tst-dlopen-tlsreinit3 \ + tst-dlopen-tlsreinit4 \ tst-dlopenfail \ tst-dlopenfail-2 \ tst-dlopenrpath \ @@ -853,6 +857,9 @@ modules-names += \ tst-dlmopen-twice-mod1 \ tst-dlmopen-twice-mod2 \ tst-dlmopen1mod \ + tst-dlopen-tlsreinitmod1 \ + tst-dlopen-tlsreinitmod2 \ + tst-dlopen-tlsreinitmod3 \ tst-dlopenfaillinkmod \ tst-dlopenfailmod1 \ tst-dlopenfailmod2 \ @@ -3118,3 +3125,26 @@ $(objpfx)tst-recursive-tls.out: \ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) $(objpfx)tst-recursive-tlsmod%.os: tst-recursive-tlsmodN.c $(compile-command.c) -DVAR=thread_$* -DFUNC=get_threadvar_$* + +# Order matters here. The test needs the constructor for +# tst-dlopen-tlsreinitmod2.so to be called first. +LDFLAGS-tst-dlopen-tlsreinitmod1.so = -Wl,--no-as-needed +$(objpfx)tst-dlopen-tlsreinitmod1.so: \ + $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so +LDFLAGS-tst-dlopen-tlsreinit2 = -Wl,--no-as-needed +$(objpfx)tst-dlopen-tlsreinit2: \ + $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so +LDFLAGS-tst-dlopen-tlsreinit4 = -Wl,--no-as-needed +$(objpfx)tst-dlopen-tlsreinit4: \ + $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so +# tst-dlopen-tlsreinitmod2.so is underlinked and refers to +# tst-dlopen-tlsreinitmod3.so. The dependency is provided via +# $(objpfx)tst-dlopen-tlsreinitmod1.so. +tst-dlopen-tlsreinitmod2.so-no-z-defs = yes +$(objpfx)tst-dlopen-tlsreinit.out: $(objpfx)tst-dlopen-tlsreinitmod1.so \ + $(objpfx)tst-dlopen-tlsreinitmod2.so $(objpfx)tst-dlopen-tlsreinitmod3.so +# Reuse an audit module which provides ample debug logging. +$(objpfx)tst-dlopen-tlsreinit3.out: $(objpfx)tst-auditmod1.so +tst-dlopen-tlsreinit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so +$(objpfx)tst-dlopen-tlsreinit4.out: $(objpfx)tst-auditmod1.so +tst-dlopen-tlsreinit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so diff --git a/elf/dl-open.c b/elf/dl-open.c index c378da16c0..8556e7bd2f 100644 --- a/elf/dl-open.c +++ b/elf/dl-open.c @@ -363,17 +363,8 @@ resize_tls_slotinfo (struct link_map *new) { bool any_tls = false; for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i) - { - struct link_map *imap = new->l_searchlist.r_list[i]; - - /* Only add TLS memory if this object is loaded now and - therefore is not yet initialized. */ - if (! imap->l_init_called && imap->l_tls_blocksize > 0) - { - _dl_add_to_slotinfo (imap, false); - any_tls = true; - } - } + if (_dl_add_to_slotinfo (new->l_searchlist.r_list[i], false)) + any_tls = true; return any_tls; } @@ -383,22 +374,8 @@ resize_tls_slotinfo (struct link_map *new) static void update_tls_slotinfo (struct link_map *new) { - unsigned int first_static_tls = new->l_searchlist.r_nlist; for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i) - { - struct link_map *imap = new->l_searchlist.r_list[i]; - - /* Only add TLS memory if this object is loaded now and - therefore is not yet initialized. */ - if (! imap->l_init_called && imap->l_tls_blocksize > 0) - { - _dl_add_to_slotinfo (imap, true); - - if (imap->l_need_tls_init - && first_static_tls == new->l_searchlist.r_nlist) - first_static_tls = i; - } - } + _dl_add_to_slotinfo (new->l_searchlist.r_list[i], true); size_t newgen = GL(dl_tls_generation) + 1; if (__glibc_unlikely (newgen == 0)) @@ -410,13 +387,11 @@ TLS generation counter wrapped! Please report this.")); /* We need a second pass for static tls data, because _dl_update_slotinfo must not be run while calls to _dl_add_to_slotinfo are still pending. */ - for (unsigned int i = first_static_tls; i < new->l_searchlist.r_nlist; ++i) + for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i) { struct link_map *imap = new->l_searchlist.r_list[i]; - if (imap->l_need_tls_init - && ! imap->l_init_called - && imap->l_tls_blocksize > 0) + if (imap->l_need_tls_init && imap->l_tls_blocksize > 0) { /* For static TLS we have to allocate the memory here and now, but we can delay updating the DTV. */ diff --git a/elf/dl-tls.c b/elf/dl-tls.c index ecb966d282..3d529b722c 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -632,17 +632,21 @@ _dl_allocate_tls_init (void *result, bool main_thread) some platforms use in static programs requires it. */ dtv[map->l_tls_modid].pointer.val = dest; - /* Copy the initialization image and clear the BSS part. For - audit modules or dependencies with initial-exec TLS, we can not - set the initial TLS image on default loader initialization - because it would already be set by the audit setup. However, - subsequent thread creation would need to follow the default - behaviour. */ + /* Copy the initialization image and clear the BSS part. + For audit modules or dependencies with initial-exec TLS, + we can not set the initial TLS image on default loader + initialization because it would already be set by the + audit setup, which uses the dlopen code and already + clears l_need_tls_init. Calls with !main_thread from + pthread_create need to initialze TLS for the current + thread regardless of namespace. */ if (map->l_ns != LM_ID_BASE && main_thread) continue; memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size), '\0', map->l_tls_blocksize - map->l_tls_initimage_size); + if (main_thread) + map->l_need_tls_init = 0; } total += cnt; @@ -1099,9 +1103,32 @@ _dl_tls_initial_modid_limit_setup (void) } -void +/* Add module to slot information data. If DO_ADD is false, only the + required memory is allocated. Must be called with + GL (dl_load_tls_lock) acquired. If the function has already been + called for the link map L with !DO_ADD, then this function will not + raise an exception, otherwise it is possible that it encounters a + memory allocation failure. + + Return false if L has already been added to the slotinfo data, or + if L has no TLS data. If the returned value is true, L has been + added with this call (DO_ADD), or has been added in a previous call + (!DO_ADD). + + The expected usage is as follows: Call _dl_add_to_slotinfo for + several link maps with DO_ADD set to false, and record if any calls + result in a true result. If there was a true result, call + _dl_add_to_slotinfo again, this time with DO_ADD set to true. (For + simplicity, it's possible to call the function for link maps where + the previous result was false.) The return value from the second + round of calls can be ignored. If there was true result initially, + call _dl_update_slotinfo to update the TLS generation counter. */ +bool _dl_add_to_slotinfo (struct link_map *l, bool do_add) { + if (l->l_tls_blocksize == 0 || l->l_tls_in_slotinfo) + return false; + /* Now that we know the object is loaded successfully add modules containing TLS data to the dtv info table. We might have to increase its size. */ @@ -1157,7 +1184,10 @@ cannot create TLS data structures")); atomic_store_relaxed (&listp->slotinfo[idx].map, l); atomic_store_relaxed (&listp->slotinfo[idx].gen, GL(dl_tls_generation) + 1); + l->l_tls_in_slotinfo = true; } + + return true; } #if PTHREAD_IN_LIBC diff --git a/elf/tst-dlopen-tlsreinit1.c b/elf/tst-dlopen-tlsreinit1.c new file mode 100644 index 0000000000..2016b9b0c6 --- /dev/null +++ b/elf/tst-dlopen-tlsreinit1.c @@ -0,0 +1,40 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717). + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + void *handle = xdlopen ("tst-dlopen-tlsreinitmod1.so", RTLD_NOW); + + bool *tlsreinitmod3_tested = xdlsym (handle, "tlsreinitmod3_tested"); + TEST_VERIFY (*tlsreinitmod3_tested); + + xdlclose (handle); + + /* This crashes if the libc.so.6 TLS image has been reverted. */ + TEST_VERIFY (!isupper ('@')); + + return 0; +} + +#include diff --git a/elf/tst-dlopen-tlsreinit2.c b/elf/tst-dlopen-tlsreinit2.c new file mode 100644 index 0000000000..90ad2c7713 --- /dev/null +++ b/elf/tst-dlopen-tlsreinit2.c @@ -0,0 +1,39 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717). + Variant with initially-linked modules. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + + +static int +do_test (void) +{ + /* Defined in tst-dlopen-tlsreinitmod3.so. */ + extern bool tlsreinitmod3_tested; + TEST_VERIFY (tlsreinitmod3_tested); + + /* This crashes if the libc.so.6 TLS image has been reverted. */ + TEST_VERIFY (!isupper ('@')); + + return 0; +} + +#include diff --git a/elf/tst-dlopen-tlsreinit3.c b/elf/tst-dlopen-tlsreinit3.c new file mode 100644 index 0000000000..79bd585aff --- /dev/null +++ b/elf/tst-dlopen-tlsreinit3.c @@ -0,0 +1,2 @@ +/* Same code, but run with LD_AUDIT=tst-auditmod1.so. */ +#include "tst-dlopen-tlsreinit1.c" diff --git a/elf/tst-dlopen-tlsreinit4.c b/elf/tst-dlopen-tlsreinit4.c new file mode 100644 index 0000000000..344c9211ab --- /dev/null +++ b/elf/tst-dlopen-tlsreinit4.c @@ -0,0 +1,2 @@ +/* Same code, but run with LD_AUDIT=tst-auditmod1.so. */ +#include "tst-dlopen-tlsreinit2.c" diff --git a/elf/tst-dlopen-tlsreinitmod1.c b/elf/tst-dlopen-tlsreinitmod1.c new file mode 100644 index 0000000000..354cc3de51 --- /dev/null +++ b/elf/tst-dlopen-tlsreinitmod1.c @@ -0,0 +1,20 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717), module 1. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This module triggers loading of tst-dlopen-tlsreinitmod2.so and + tst-dlopen-tlsreinitmod3.so. */ diff --git a/elf/tst-dlopen-tlsreinitmod2.c b/elf/tst-dlopen-tlsreinitmod2.c new file mode 100644 index 0000000000..677e69bd35 --- /dev/null +++ b/elf/tst-dlopen-tlsreinitmod2.c @@ -0,0 +1,30 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717), module 2. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* Defined in tst-dlopen-tlsreinitmod3.so. This an underlinked symbol + dependency. */ +extern void call_tlsreinitmod3 (void); + +static void __attribute__ ((constructor)) +tlsreinitmod2_init (void) +{ + puts ("info: constructor of tst-dlopen-tlsreinitmod2.so invoked"); + call_tlsreinitmod3 (); +} diff --git a/elf/tst-dlopen-tlsreinitmod3.c b/elf/tst-dlopen-tlsreinitmod3.c new file mode 100644 index 0000000000..ef769c5131 --- /dev/null +++ b/elf/tst-dlopen-tlsreinitmod3.c @@ -0,0 +1,102 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717), module 3. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* Used to verify from the main program that the test ran. */ +bool tlsreinitmod3_tested; + +/* This TLS variable must not revert back to the initial state after + dlopen. */ +static __thread int tlsreinitmod3_state = 1; + +/* Set from the ELF constructor during dlopen. */ +static bool tlsreinitmod3_constructed; + +/* Second half of test, behind a compiler barrier. The compiler + barrier is necessary to prevent carrying over TLS address + information from call_tlsreinitmod3 to call_tlsreinitmod3_tail. */ +void call_tlsreinitmod3_tail (void *self) __attribute__ ((weak)); + +/* Called from tst-dlopen-tlsreinitmod2.so. */ +void +call_tlsreinitmod3 (void) +{ + printf ("info: call_tlsreinitmod3 invoked (state=%d)\n", + tlsreinitmod3_state); + + if (tlsreinitmod3_constructed) + { + puts ("error: call_tlsreinitmod3 called after ELF constructor"); + fflush (stdout); + /* Cannot rely on test harness due to dynamic linking. */ + _exit (1); + } + + tlsreinitmod3_state = 2; + + /* Self-dlopen. This will run the ELF constructor. */ + void *self = dlopen ("tst-dlopen-tlsreinitmod3.so", RTLD_NOW); + if (self == NULL) + { + printf ("error: dlopen: %s\n", dlerror ()); + fflush (stdout); + /* Cannot rely on test harness due to dynamic linking. */ + _exit (1); + } + + call_tlsreinitmod3_tail (self); +} + +void +call_tlsreinitmod3_tail (void *self) +{ + printf ("info: dlopen returned in tlsreinitmod3 (state=%d)\n", + tlsreinitmod3_state); + + if (!tlsreinitmod3_constructed) + { + puts ("error: dlopen did not call tlsreinitmod3 ELF constructor"); + fflush (stdout); + /* Cannot rely on test harness due to dynamic linking. */ + _exit (1); + } + + if (tlsreinitmod3_state != 2) + { + puts ("error: TLS state reverted in tlsreinitmod3"); + fflush (stdout); + /* Cannot rely on test harness due to dynamic linking. */ + _exit (1); + } + + dlclose (self); + + /* Signal test completion to the main program. */ + tlsreinitmod3_tested = true; +} + +static void __attribute__ ((constructor)) +tlsreinitmod3_init (void) +{ + puts ("info: constructor of tst-dlopen-tlsreinitmod3.so invoked"); + tlsreinitmod3_constructed = true; +} diff --git a/include/link.h b/include/link.h index cb0d7d8e2f..5ed445d5a6 100644 --- a/include/link.h +++ b/include/link.h @@ -212,6 +212,7 @@ struct link_map unsigned int l_find_object_processed:1; /* Zero if _dl_find_object_update needs to process this lt_library map. */ + unsigned int l_tls_in_slotinfo:1; /* TLS slotinfo updated in dlopen. */ /* NODELETE status of the map. Only valid for maps of type lt_loaded. Lazy binding sets l_nodelete_active directly, diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 154efb0e19..259ce2e7d6 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -1239,13 +1239,7 @@ extern void *_dl_open (const char *name, int mode, const void *caller, extern int _dl_scope_free (void *) attribute_hidden; -/* Add module to slot information data. If DO_ADD is false, only the - required memory is allocated. Must be called with GL - (dl_load_tls_lock) acquired. If the function has already been called - for the link map L with !do_add, then this function will not raise - an exception, otherwise it is possible that it encounters a memory - allocation failure. */ -extern void _dl_add_to_slotinfo (struct link_map *l, bool do_add) +extern bool _dl_add_to_slotinfo (struct link_map *l, bool do_add) attribute_hidden; /* Update slot information data for at least the generation of the commit f4a9b6e97bf05cf5a41907e55901f7e9afaafd4d Author: Florian Weimer Date: Mon Sep 9 21:10:23 2024 +0200 elf: Fix tst-dlopen-tlsreinit1.out test dependency Fixes commit 5097cd344fd243fb8deb6dec96e8073753f962f9 ("elf: Avoid re-initializing already allocated TLS in dlopen (bug 31717)"). Reported-by: Patsy Griffin Reviewed-by: Patsy Griffin (cherry picked from commit e82a7cb1622bff08d8e3a144d7c5516a088f1cbc) diff --git a/elf/Makefile b/elf/Makefile index a03c6520d8..dc686c3bff 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -3141,7 +3141,7 @@ $(objpfx)tst-dlopen-tlsreinit4: \ # tst-dlopen-tlsreinitmod3.so. The dependency is provided via # $(objpfx)tst-dlopen-tlsreinitmod1.so. tst-dlopen-tlsreinitmod2.so-no-z-defs = yes -$(objpfx)tst-dlopen-tlsreinit.out: $(objpfx)tst-dlopen-tlsreinitmod1.so \ +$(objpfx)tst-dlopen-tlsreinit1.out: $(objpfx)tst-dlopen-tlsreinitmod1.so \ $(objpfx)tst-dlopen-tlsreinitmod2.so $(objpfx)tst-dlopen-tlsreinitmod3.so # Reuse an audit module which provides ample debug logging. $(objpfx)tst-dlopen-tlsreinit3.out: $(objpfx)tst-auditmod1.so commit adfb14e71f240a6bc2a4cbd6e6c50cc3fa1bcc3b Author: Florian Weimer Date: Tue Sep 10 12:40:27 2024 +0200 debug: Fix read error handling in pcprofiledump The reading loops did not check for read failures. Addresses a static analysis report. Manually tested by compiling a program with the GCC's -finstrument-functions option, running it with “LD_PRELOAD=debug/libpcprofile.so PCPROFILE_OUTPUT=output-file”, and reviewing the output of “debug/pcprofiledump output-file”. (cherry picked from commit 89b088bf70c651c231bf27e644270d093b8f144a) diff --git a/debug/pcprofiledump.c b/debug/pcprofiledump.c index 049a9c2744..94530f0cf9 100644 --- a/debug/pcprofiledump.c +++ b/debug/pcprofiledump.c @@ -75,6 +75,44 @@ static struct argp argp = options, parse_opt, args_doc, doc, NULL, more_help }; +/* Try to read SIZE bytes from FD and store them on BUF. Terminate + the process upon read error. Also terminate the process if less + than SIZE bytes are remaining in the file. If !IN_HEADER, do not + terminate the process if the end of the file is encountered + immediately, before any bytes are read. + + Returns true if SIZE bytes have been read, and false if no bytes + have been read due to an end-of-file condition. */ +static bool +read_exactly (int fd, void *buffer, size_t size, bool in_header) +{ + char *p = buffer; + char *end = p + size; + while (p < end) + { + ssize_t ret = TEMP_FAILURE_RETRY (read (fd, p, end - p)); + if (ret < 0) + { + if (in_header) + error (EXIT_FAILURE, errno, _("cannot read header")); + else + error (EXIT_FAILURE, errno, _("cannot read pointer pair")); + } + if (ret == 0) + { + if (p == buffer && !in_header) + /* Nothing has been read. */ + return false; + if (in_header) + error (EXIT_FAILURE, 0, _("unexpected end of file in header")); + else + error (EXIT_FAILURE, 0, + _("unexpected end of file in pointer pair")); + } + p += ret; + } + return true; +} int main (int argc, char *argv[]) @@ -110,8 +148,7 @@ main (int argc, char *argv[]) /* Read the first 4-byte word. It contains the information about the word size and the endianness. */ uint32_t word; - if (TEMP_FAILURE_RETRY (read (fd, &word, 4)) != 4) - error (EXIT_FAILURE, errno, _("cannot read header")); + read_exactly (fd, &word, sizeof (word), true); /* Check whether we have to swap the byte order. */ int must_swap = (word & 0x0fffffff) == bswap_32 (0xdeb00000); @@ -121,56 +158,30 @@ main (int argc, char *argv[]) /* We have two loops, one for 32 bit pointers, one for 64 bit pointers. */ if (word == 0xdeb00004) { - union - { - uint32_t ptrs[2]; - char bytes[8]; - } pair; + uint32_t ptrs[2]; while (1) { - size_t len = sizeof (pair); - size_t n; - - while (len > 0 - && (n = TEMP_FAILURE_RETRY (read (fd, &pair.bytes[8 - len], - len))) != 0) - len -= n; - - if (len != 0) - /* Nothing to read. */ + if (!read_exactly (fd, ptrs, sizeof (ptrs), false)) break; printf ("this = %#010" PRIx32 ", caller = %#010" PRIx32 "\n", - must_swap ? bswap_32 (pair.ptrs[0]) : pair.ptrs[0], - must_swap ? bswap_32 (pair.ptrs[1]) : pair.ptrs[1]); + must_swap ? bswap_32 (ptrs[0]) : ptrs[0], + must_swap ? bswap_32 (ptrs[1]) : ptrs[1]); } } else if (word == 0xdeb00008) { - union - { - uint64_t ptrs[2]; - char bytes[16]; - } pair; + uint64_t ptrs[2]; while (1) { - size_t len = sizeof (pair); - size_t n; - - while (len > 0 - && (n = TEMP_FAILURE_RETRY (read (fd, &pair.bytes[8 - len], - len))) != 0) - len -= n; - - if (len != 0) - /* Nothing to read. */ + if (!read_exactly (fd, ptrs, sizeof (ptrs), false)) break; printf ("this = %#018" PRIx64 ", caller = %#018" PRIx64 "\n", - must_swap ? bswap_64 (pair.ptrs[0]) : pair.ptrs[0], - must_swap ? bswap_64 (pair.ptrs[1]) : pair.ptrs[1]); + must_swap ? bswap_64 (ptrs[0]) : ptrs[0], + must_swap ? bswap_64 (ptrs[1]) : ptrs[1]); } } else commit 7073164add3b874cf0c19ca0fb84236f6bb0985d Author: Siddhesh Poyarekar Date: Tue Sep 3 14:58:33 2024 -0400 libio: Attempt wide backup free only for non-legacy code _wide_data and _mode are not available in legacy code, so do not attempt to free the wide backup buffer in legacy code. Resolves: BZ #32137 and BZ #27821 Signed-off-by: Siddhesh Poyarekar Reviewed-by: Florian Weimer (cherry picked from commit ae4d44b1d501421ad9a3af95279b8f4d1546f1ce) diff --git a/NEWS b/NEWS index 5b20efbf6c..9033335db1 100644 --- a/NEWS +++ b/NEWS @@ -9,12 +9,14 @@ Version 2.40.1 The following bugs are resolved with this release: + [27821] ungetc: Fix backup buffer leak on program exit [30081] resolv: Do not wait for non-existing second DNS response after error [31717] elf: Avoid re-initializing already allocated TLS in dlopen [31890] resolv: Allow short error responses to match any DNS query [31968] mremap implementation in C does not handle arguments correctly [32026] strerror/strsignal TLS not handled correctly for secondary namespaces [32052] Name space violation in fortify wrappers + [32137] libio: Attempt wide backup free only for non-legacy code Version 2.40 diff --git a/libio/genops.c b/libio/genops.c index 35d8b30710..6f20d49669 100644 --- a/libio/genops.c +++ b/libio/genops.c @@ -819,7 +819,7 @@ _IO_unbuffer_all (void) /* Free up the backup area if it was ever allocated. */ if (_IO_have_backup (fp)) _IO_free_backup_area (fp); - if (fp->_mode > 0 && _IO_have_wbackup (fp)) + if (!legacy && fp->_mode > 0 && _IO_have_wbackup (fp)) _IO_free_wbackup_area (fp); if (! (fp->_flags & _IO_UNBUFFERED) commit 77018fd9f99f86a354387219fdf099915857a527 Author: Sergey Kolosov Date: Wed Sep 25 15:51:23 2024 +0200 stdio-common: Add new test for fdopen This commit adds fdopen test with all modes. Reviewed-by: DJ Delorie (cherry picked from commit 1d72fa3cfa046f7293421a7e58f2a272474ea901) diff --git a/stdio-common/Makefile b/stdio-common/Makefile index a91754f52d..5af53d61fd 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -207,6 +207,7 @@ tests := \ tst-cookie \ tst-dprintf-length \ tst-fdopen \ + tst-fdopen2 \ tst-ferror \ tst-fgets \ tst-fileno \ diff --git a/stdio-common/tst-fdopen2.c b/stdio-common/tst-fdopen2.c new file mode 100644 index 0000000000..0c6625f258 --- /dev/null +++ b/stdio-common/tst-fdopen2.c @@ -0,0 +1,246 @@ +/* Test the fdopen function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include + +char *tmp_dir; +char *path_to_file; + +void +prepare_tmp_dir (void) +{ + tmp_dir = support_create_temp_directory ("tst-fdopen2"); + path_to_file = xasprintf ("%s/tst-fdopen2.txt", tmp_dir); +} + +/* open temp file descriptor with mode. */ +int +open_tmp_fd (int mode) +{ + int fd = xopen (path_to_file, mode, 0644); + return fd; +} + + +/* close and remove temp file with close. */ +void +close_tmp_fd (int fd) +{ + xclose (fd); + xunlink (path_to_file); +} + +/* close and remove temp file with fclose. */ +void +close_tmp_fp (FILE *fp) +{ + fclose (fp); + xunlink (path_to_file); +} + +/* test "w" fdopen mode. */ +void +do_test_fdopen_w (void) +{ + int fd, ret; + FILE *fp; + fd = open_tmp_fd (O_WRONLY | O_CREAT | O_TRUNC); + + /* test mode mismatch. */ + fp = fdopen (fd, "r"); + if (fp != NULL || errno != EINVAL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, r) should fail with EINVAL: %m", fd); + } + + fp = fdopen (fd, "w"); + if (fp == NULL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, w): %m", fd); + } + + const void *buf = "AAAA"; + ret = fwrite (buf, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fwrite (): %m"); + } + + unsigned char buf2[4]; + rewind (fp); + clearerr (fp); + /* fread should fail in "w" mode */ + ret = fread (buf2, 1, 4, fp); + if (ret != 0 || ferror (fp) == 0) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fread should fail in \"w\" mode"); + } + + fclose (fp); +} + +/* test "r" fdopen mode. */ +void +do_test_fdopen_r (void) +{ + int fd, ret; + FILE *fp; + fd = open_tmp_fd (O_RDONLY); + + /* test mode mismatch. */ + fp = fdopen (fd, "w"); + if (fp != NULL || errno != EINVAL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, w) should fail with EINVAL: %m", fd); + } + + fp = fdopen (fd, "r"); + if (fp == NULL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, w): %m", fd); + } + + const void *buf = "BBBB"; + /* fwrite should fail in "r" mode. */ + ret = fwrite (buf, 1, 4, fp); + if (ret != 0 || ferror (fp) == 0) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fwrite should fail in \"r\" mode"); + } + + unsigned char buf2[4]; + ret = fread (buf2, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fread (): %m"); + } + + fclose (fp); +} + +/* test "a" fdopen mode. */ +void +do_test_fdopen_a (void) +{ + int fd, ret; + FILE *fp; + fd = open_tmp_fd (O_WRONLY | O_CREAT | O_APPEND); + + /* test mode mismatch. */ + fp = fdopen (fd, "r+"); + if (fp != NULL || errno != EINVAL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, \"r+\") should fail with EINVAL: %m", fd); + } + + fp = fdopen (fd, "a"); + if (fp == NULL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, w): %m", fd); + } + + const void *buf = "CCCC"; + ret = fwrite (buf, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fwrite (): %m"); + } + + /* fread should fail in "a" mode. */ + unsigned char buf2[4]; + clearerr (fp); + ret = fread (buf2, 1, 4, fp); + if (ret != 0 || ferror (fp) == 0) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fread should fail \"a\" mode"); + } + + fclose (fp); +} + +void +do_test_fdopen_mode (int mode, const char *fmode) +{ + int fd, ret; + FILE *fp; + fd = open_tmp_fd (mode); + + fp = fdopen (fd, fmode); + if (fp == NULL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, %s): %m", fd, fmode); + } + + const void *buf = "EEEE"; + ret = fwrite (buf, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fwrite () in mode:%s returns %d: %m", fmode, ret); + } + + rewind (fp); + unsigned char buf2[4]; + ret = fread (buf2, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fread () in mode:%s returns %d: %m", fmode, ret); + } + + fclose (fp); +} + +static int +do_test (void) +{ + + prepare_tmp_dir (); + + do_test_fdopen_w (); + do_test_fdopen_r (); + do_test_fdopen_a (); + + /* test r+ w+ a+ fdopen modes. */ + do_test_fdopen_mode (O_RDWR, "r+"); + do_test_fdopen_mode (O_RDWR | O_CREAT | O_TRUNC, "w+"); + do_test_fdopen_mode (O_RDWR | O_CREAT | O_APPEND, "a+"); + xunlink (path_to_file); + return 0; +} + +#include commit 61b6464f8d72aef520ee769a2ae317b4f68d5e1d Author: Joseph Myers Date: Tue Sep 24 14:06:22 2024 +0000 Add tests of fread There seem to be no glibc tests specifically for the fread function. Add basic tests of that function. Tested for x86_64. (cherry picked from commit d14c977c65aac7db35bb59380ef99d6582c4f930) diff --git a/stdio-common/Makefile b/stdio-common/Makefile index 5af53d61fd..3396090be1 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -217,6 +217,7 @@ tests := \ tst-fmemopen4 \ tst-fphex \ tst-fphex-wide \ + tst-fread \ tst-fseek \ tst-fwrite \ tst-gets \ diff --git a/stdio-common/tst-fread.c b/stdio-common/tst-fread.c new file mode 100644 index 0000000000..4d9a7895f6 --- /dev/null +++ b/stdio-common/tst-fread.c @@ -0,0 +1,134 @@ +/* Test fread. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int +do_test (void) +{ + char *temp_dir = support_create_temp_directory ("tst-fread"); + char *file1 = xasprintf ("%s/file1", temp_dir); + support_write_file_string (file1, "file1"); + add_temp_file (file1); + FILE *fp; + size_t ret; + char buf[1024]; + + verbose_printf ("test single-byte reads\n"); + fp = xfopen (file1, "r"); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, 2, fp); + TEST_COMPARE (ret, 2); + TEST_COMPARE (buf[0], 'f'); + TEST_COMPARE (buf[1], 'i'); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (ftell (fp), 2); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, 3, fp); + TEST_COMPARE (ret, 3); + TEST_COMPARE (buf[0], 'l'); + TEST_COMPARE (buf[1], 'e'); + TEST_COMPARE (buf[2], '1'); + TEST_COMPARE (ftell (fp), 5); + TEST_COMPARE (feof (fp), 0); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, 1, fp); + TEST_COMPARE (ret, 0); + TEST_COMPARE (!!feof (fp), 1); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 5); + xfclose (fp); + + verbose_printf ("test single-byte reads, EOF part way through\n"); + fp = xfopen (file1, "r"); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, sizeof buf, fp); + TEST_COMPARE (ret, 5); + TEST_COMPARE (buf[0], 'f'); + TEST_COMPARE (buf[1], 'i'); + TEST_COMPARE (buf[2], 'l'); + TEST_COMPARE (buf[3], 'e'); + TEST_COMPARE (buf[4], '1'); + TEST_COMPARE (!!feof (fp), 1); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 5); + xfclose (fp); + + verbose_printf ("test multi-byte reads\n"); + fp = xfopen (file1, "r"); + memset (buf, 0, sizeof buf); + ret = fread (buf, 2, 2, fp); + TEST_COMPARE (ret, 2); + TEST_COMPARE (buf[0], 'f'); + TEST_COMPARE (buf[1], 'i'); + TEST_COMPARE (buf[2], 'l'); + TEST_COMPARE (buf[3], 'e'); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (ftell (fp), 4); + memset (buf, 0, sizeof buf); + ret = fread (buf, 3, 3, fp); + TEST_COMPARE (ret, 0); + /* The bytes written for a partial element read are unspecified. */ + TEST_COMPARE (!!feof (fp), 1); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 5); + xfclose (fp); + + verbose_printf ("test read error\n"); + fp = xfopen (file1, "r"); + xclose (fileno (fp)); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, sizeof buf, fp); + TEST_COMPARE (ret, 0); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (!!ferror (fp), 1); + fclose (fp); + + verbose_printf ("test zero size\n"); + fp = xfopen (file1, "r"); + ret = fread (buf, 0, SIZE_MAX, fp); + TEST_COMPARE (ret, 0); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 0); + xfclose (fp); + + verbose_printf ("test zero items\n"); + fp = xfopen (file1, "r"); + ret = fread (buf, SIZE_MAX, 0, fp); + TEST_COMPARE (ret, 0); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 0); + xfclose (fp); + + free (temp_dir); + free (file1); + return 0; +} + +#include commit 9bc76c7ca4d6022fd588c274d139813f99e04f35 Author: Joseph Myers Date: Wed Aug 14 17:15:46 2024 +0000 Test errno setting on strtod overflow in tst-strtod-round We have no tests that errno is set to ERANGE on overflow of strtod-family functions (we do have some tests for underflow, in tst-strtod-underflow). Add such tests to tst-strtod-round. Tested for x86_64. (cherry picked from commit 207d64feb26279e152c50744e3c37e68491aca99) diff --git a/stdlib/tst-strtod-round-skeleton.c b/stdlib/tst-strtod-round-skeleton.c index 6fba4b5228..c3cc0201d4 100644 --- a/stdlib/tst-strtod-round-skeleton.c +++ b/stdlib/tst-strtod-round-skeleton.c @@ -21,6 +21,7 @@ declared in the headers. */ #define _LIBC_TEST 1 #define __STDC_WANT_IEC_60559_TYPES_EXT__ +#include #include #include #include @@ -205,7 +206,9 @@ struct test { #define GEN_ONE_TEST(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ { \ feclearexcept (FE_ALL_EXCEPT); \ + errno = 0; \ FTYPE f = STRTO (FSUF) (s, NULL); \ + int new_errno = errno; \ if (f != expected->FSUF \ || (copysign ## CSUF) (1.0 ## LSUF, f) \ != (copysign ## CSUF) (1.0 ## LSUF, expected->FSUF)) \ @@ -254,6 +257,14 @@ struct test { printf ("ignoring this exception error\n"); \ } \ } \ + if (overflow->FSUF && new_errno != ERANGE) \ + { \ + printf (FNPFXS "to" #FSUF \ + " (" STRM ") left errno == %d," \ + " not %d (ERANGE)\n", \ + s, new_errno, ERANGE); \ + result = 1; \ + } \ } \ } commit e06153665fa931e4c7d2a3ecc14e5197e96143a7 Author: Joseph Myers Date: Tue Aug 27 12:38:01 2024 +0000 More thoroughly test underflow / errno in tst-strtod-round Add tests of underflow in tst-strtod-round, and thus also test for errno being unchanged when there is neither overflow nor underflow. The errno setting before the function call to test for being unchanged is adjusted to set errno to 12345 instead of 0, so that any bugs where strtod sets errno to 0 would be detected. This doesn't add any new test inputs for tst-strtod-round, and in particular doesn't cover the edge cases of underflow the way tst-strtod-underflow does (none of the existing test inputs for tst-strtod-round actually exercise cases that have underflow with before-rounding tininess detection but not with after-rounding tininess detection), but at least it provides some coverage (as per the recent discussions) that ordinary non-overflowing non-underflowing inputs to these functions do not set errno. Tested for x86_64. (cherry picked from commit d73ed2601b7c3c93c3529149a3d7f7b6177900a8) diff --git a/stdlib/gen-tst-strtod-round.c b/stdlib/gen-tst-strtod-round.c index e48bf4d6ea..7ce735f81d 100644 --- a/stdlib/gen-tst-strtod-round.c +++ b/stdlib/gen-tst-strtod-round.c @@ -46,6 +46,7 @@ static int string_to_fp (mpfr_t f, const char *s, mpfr_rnd_t rnd) { mpfr_clear_overflow (); + mpfr_clear_underflow (); #ifdef WORKAROUND mpfr_t f2; mpfr_init2 (f2, 100000); @@ -53,12 +54,16 @@ string_to_fp (mpfr_t f, const char *s, mpfr_rnd_t rnd) int r = mpfr_set (f, f2, rnd); r |= mpfr_subnormalize (f, r, rnd); mpfr_clear (f2); - return r0 | r; + r |= r0; #else int r = mpfr_strtofr (f, s, NULL, 0, rnd); r |= mpfr_subnormalize (f, r, rnd); - return r; #endif + if (r == 0) + /* The MPFR underflow flag is set for exact subnormal results, + which is not wanted here. */ + mpfr_clear_underflow (); + return r; } void @@ -70,6 +75,21 @@ print_fp (FILE *fout, mpfr_t f, const char *suffix) mpfr_fprintf (fout, "\t%Ra%s", f, suffix); } +static const char * +suffix_to_print (bool overflow, bool underflow, bool underflow_before_rounding, + bool with_comma) +{ + if (overflow) + return with_comma ? ", true, false,\n" : ", true, false"; + if (underflow) + return with_comma ? ", false, true,\n" : ", false, true"; + if (underflow_before_rounding) + return (with_comma + ? ", false, !TININESS_AFTER_ROUNDING,\n" + : ", false, !TININESS_AFTER_ROUNDING"); + return with_comma ? ", false, false,\n" : ", false, false"; +} + static void round_str (FILE *fout, const char *s, int prec, int emin, int emax, bool ibm_ld) @@ -80,8 +100,11 @@ round_str (FILE *fout, const char *s, int prec, int emin, int emax, mpfr_set_emin (emin); mpfr_set_emax (emax); mpfr_init (f); + string_to_fp (f, s, MPFR_RNDZ); + bool underflow_before_rounding = mpfr_underflow_p () != 0; int r = string_to_fp (f, s, MPFR_RNDD); bool overflow = mpfr_overflow_p () != 0; + bool underflow = mpfr_underflow_p () != 0; if (ibm_ld) { assert (prec == 106 && emin == -1073 && emax == 1024); @@ -97,19 +120,27 @@ round_str (FILE *fout, const char *s, int prec, int emin, int emax, } } mpfr_fprintf (fout, "\t%s,\n", r ? "false" : "true"); - print_fp (fout, f, overflow ? ", true,\n" : ", false,\n"); + print_fp (fout, f, + suffix_to_print (overflow, underflow, underflow_before_rounding, + true)); string_to_fp (f, s, MPFR_RNDN); overflow = (mpfr_overflow_p () != 0 || (ibm_ld && mpfr_cmpabs (f, max_value) > 0)); - print_fp (fout, f, overflow ? ", true,\n" : ", false,\n"); + print_fp (fout, f, + suffix_to_print (overflow, underflow, underflow_before_rounding, + true)); string_to_fp (f, s, MPFR_RNDZ); overflow = (mpfr_overflow_p () != 0 || (ibm_ld && mpfr_cmpabs (f, max_value) > 0)); - print_fp (fout, f, overflow ? ", true,\n" : ", false,\n"); + print_fp (fout, f, + suffix_to_print (overflow, underflow, underflow_before_rounding, + true)); string_to_fp (f, s, MPFR_RNDU); overflow = (mpfr_overflow_p () != 0 || (ibm_ld && mpfr_cmpabs (f, max_value) > 0)); - print_fp (fout, f, overflow ? ", true" : ", false"); + print_fp (fout, f, + suffix_to_print (overflow, underflow, underflow_before_rounding, + false)); mpfr_clear (f); if (ibm_ld) mpfr_clear (max_value); diff --git a/stdlib/tst-strtod-round-data.h b/stdlib/tst-strtod-round-data.h index 8899d15f9b..13e62dd2b0 100644 --- a/stdlib/tst-strtod-round-data.h +++ b/stdlib/tst-strtod-round-data.h @@ -2,1852 +2,1852 @@ static const struct test tests[] = { TEST ("3.518437208883201171875E+013", false, - 0x2p+44, false, - 0x2p+44, false, - 0x2p+44, false, - 0x2.000004p+44, false, - false, - 0x2.0000000000002p+44, false, - 0x2.0000000000004p+44, false, - 0x2.0000000000002p+44, false, - 0x2.0000000000004p+44, false, - true, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - true, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - true, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - true, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false), + 0x2p+44, false, false, + 0x2p+44, false, false, + 0x2p+44, false, false, + 0x2.000004p+44, false, false, + false, + 0x2.0000000000002p+44, false, false, + 0x2.0000000000004p+44, false, false, + 0x2.0000000000002p+44, false, false, + 0x2.0000000000004p+44, false, false, + true, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + true, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + true, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + true, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false), TEST ("1.00000005960464477550", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000004p+0, false, - false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000004p+0, false, - false, - 0x1.0000010000000002048242f2ffp+0, false, - 0x1.0000010000000002048242f2ff8p+0, false, - 0x1.0000010000000002048242f2ffp+0, false, - 0x1.0000010000000002048242f2ff8p+0, false, - false, - 0x1.0000010000000002048242f2ff66p+0, false, - 0x1.0000010000000002048242f2ff67p+0, false, - 0x1.0000010000000002048242f2ff66p+0, false, - 0x1.0000010000000002048242f2ff67p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000004p+0, false, false, + false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000004p+0, false, false, + false, + 0x1.0000010000000002048242f2ffp+0, false, false, + 0x1.0000010000000002048242f2ff8p+0, false, false, + 0x1.0000010000000002048242f2ffp+0, false, false, + 0x1.0000010000000002048242f2ff8p+0, false, false, + false, + 0x1.0000010000000002048242f2ff66p+0, false, false, + 0x1.0000010000000002048242f2ff67p+0, false, false, + 0x1.0000010000000002048242f2ff66p+0, false, false, + 0x1.0000010000000002048242f2ff67p+0, false, false), TEST ("1.0000000596046447755", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000004p+0, false, - false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000004p+0, false, - false, - 0x1.0000010000000002048242f2ffp+0, false, - 0x1.0000010000000002048242f2ff8p+0, false, - 0x1.0000010000000002048242f2ffp+0, false, - 0x1.0000010000000002048242f2ff8p+0, false, - false, - 0x1.0000010000000002048242f2ff66p+0, false, - 0x1.0000010000000002048242f2ff67p+0, false, - 0x1.0000010000000002048242f2ff66p+0, false, - 0x1.0000010000000002048242f2ff67p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000004p+0, false, false, + false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000004p+0, false, false, + false, + 0x1.0000010000000002048242f2ffp+0, false, false, + 0x1.0000010000000002048242f2ff8p+0, false, false, + 0x1.0000010000000002048242f2ffp+0, false, false, + 0x1.0000010000000002048242f2ff8p+0, false, false, + false, + 0x1.0000010000000002048242f2ff66p+0, false, false, + 0x1.0000010000000002048242f2ff67p+0, false, false, + 0x1.0000010000000002048242f2ff66p+0, false, false, + 0x1.0000010000000002048242f2ff67p+0, false, false), TEST ("1.000000059604644776", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.000001000000000ap+0, false, - 0x1.000001000000000cp+0, false, - 0x1.000001000000000ap+0, false, - 0x1.000001000000000cp+0, false, - false, - 0x1.000001000000000ap+0, false, - 0x1.000001000000000cp+0, false, - 0x1.000001000000000ap+0, false, - 0x1.000001000000000cp+0, false, - false, - 0x1.000001000000000b3db12bdc21p+0, false, - 0x1.000001000000000b3db12bdc21p+0, false, - 0x1.000001000000000b3db12bdc21p+0, false, - 0x1.000001000000000b3db12bdc218p+0, false, - false, - 0x1.000001000000000b3db12bdc213cp+0, false, - 0x1.000001000000000b3db12bdc213dp+0, false, - 0x1.000001000000000b3db12bdc213cp+0, false, - 0x1.000001000000000b3db12bdc213dp+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.000001000000000ap+0, false, false, + 0x1.000001000000000cp+0, false, false, + 0x1.000001000000000ap+0, false, false, + 0x1.000001000000000cp+0, false, false, + false, + 0x1.000001000000000ap+0, false, false, + 0x1.000001000000000cp+0, false, false, + 0x1.000001000000000ap+0, false, false, + 0x1.000001000000000cp+0, false, false, + false, + 0x1.000001000000000b3db12bdc21p+0, false, false, + 0x1.000001000000000b3db12bdc21p+0, false, false, + 0x1.000001000000000b3db12bdc21p+0, false, false, + 0x1.000001000000000b3db12bdc218p+0, false, false, + false, + 0x1.000001000000000b3db12bdc213cp+0, false, false, + 0x1.000001000000000b3db12bdc213dp+0, false, false, + 0x1.000001000000000b3db12bdc213cp+0, false, false, + 0x1.000001000000000b3db12bdc213dp+0, false, false), TEST ("1.000000059604644775", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffffffp+0, false, - 0x1.000001p+0, false, - 0x1.000000fffffffp+0, false, - 0x1.000001p+0, false, - false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffffap+0, false, - false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffffap+0, false, - false, - 0x1.000000fffffffff8cb535a09dd8p+0, false, - 0x1.000000fffffffff8cb535a09dd8p+0, false, - 0x1.000000fffffffff8cb535a09dd8p+0, false, - 0x1.000000fffffffff8cb535a09dep+0, false, - false, - 0x1.000000fffffffff8cb535a09dd9p+0, false, - 0x1.000000fffffffff8cb535a09dd91p+0, false, - 0x1.000000fffffffff8cb535a09dd9p+0, false, - 0x1.000000fffffffff8cb535a09dd91p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffffffp+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000000fffffffp+0, false, false, + 0x1.000001p+0, false, false, + false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffffap+0, false, false, + false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffffap+0, false, false, + false, + 0x1.000000fffffffff8cb535a09dd8p+0, false, false, + 0x1.000000fffffffff8cb535a09dd8p+0, false, false, + 0x1.000000fffffffff8cb535a09dd8p+0, false, false, + 0x1.000000fffffffff8cb535a09dep+0, false, false, + false, + 0x1.000000fffffffff8cb535a09dd9p+0, false, false, + 0x1.000000fffffffff8cb535a09dd91p+0, false, false, + 0x1.000000fffffffff8cb535a09dd9p+0, false, false, + 0x1.000000fffffffff8cb535a09dd91p+0, false, false), TEST ("1.00000005960464478", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.0000010000000054p+0, false, - 0x1.0000010000000056p+0, false, - 0x1.0000010000000054p+0, false, - 0x1.0000010000000056p+0, false, - false, - 0x1.0000010000000054p+0, false, - 0x1.0000010000000056p+0, false, - 0x1.0000010000000054p+0, false, - 0x1.0000010000000056p+0, false, - false, - 0x1.0000010000000055072873252f8p+0, false, - 0x1.0000010000000055072873253p+0, false, - 0x1.0000010000000055072873252f8p+0, false, - 0x1.0000010000000055072873253p+0, false, - false, - 0x1.0000010000000055072873252febp+0, false, - 0x1.0000010000000055072873252febp+0, false, - 0x1.0000010000000055072873252febp+0, false, - 0x1.0000010000000055072873252fecp+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.0000010000000054p+0, false, false, + 0x1.0000010000000056p+0, false, false, + 0x1.0000010000000054p+0, false, false, + 0x1.0000010000000056p+0, false, false, + false, + 0x1.0000010000000054p+0, false, false, + 0x1.0000010000000056p+0, false, false, + 0x1.0000010000000054p+0, false, false, + 0x1.0000010000000056p+0, false, false, + false, + 0x1.0000010000000055072873252f8p+0, false, false, + 0x1.0000010000000055072873253p+0, false, false, + 0x1.0000010000000055072873252f8p+0, false, false, + 0x1.0000010000000055072873253p+0, false, false, + false, + 0x1.0000010000000055072873252febp+0, false, false, + 0x1.0000010000000055072873252febp+0, false, false, + 0x1.0000010000000055072873252febp+0, false, false, + 0x1.0000010000000055072873252fecp+0, false, false), TEST ("1.0000000596046448", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.00000100000001c4p+0, false, - 0x1.00000100000001c6p+0, false, - 0x1.00000100000001c4p+0, false, - 0x1.00000100000001c6p+0, false, - false, - 0x1.00000100000001c4p+0, false, - 0x1.00000100000001c6p+0, false, - 0x1.00000100000001c4p+0, false, - 0x1.00000100000001c6p+0, false, - false, - 0x1.00000100000001c5f67cd79279p+0, false, - 0x1.00000100000001c5f67cd792798p+0, false, - 0x1.00000100000001c5f67cd79279p+0, false, - 0x1.00000100000001c5f67cd792798p+0, false, - false, - 0x1.00000100000001c5f67cd7927953p+0, false, - 0x1.00000100000001c5f67cd7927954p+0, false, - 0x1.00000100000001c5f67cd7927953p+0, false, - 0x1.00000100000001c5f67cd7927954p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.00000100000001c4p+0, false, false, + 0x1.00000100000001c6p+0, false, false, + 0x1.00000100000001c4p+0, false, false, + 0x1.00000100000001c6p+0, false, false, + false, + 0x1.00000100000001c4p+0, false, false, + 0x1.00000100000001c6p+0, false, false, + 0x1.00000100000001c4p+0, false, false, + 0x1.00000100000001c6p+0, false, false, + false, + 0x1.00000100000001c5f67cd79279p+0, false, false, + 0x1.00000100000001c5f67cd792798p+0, false, false, + 0x1.00000100000001c5f67cd79279p+0, false, false, + 0x1.00000100000001c5f67cd792798p+0, false, false, + false, + 0x1.00000100000001c5f67cd7927953p+0, false, false, + 0x1.00000100000001c5f67cd7927954p+0, false, false, + 0x1.00000100000001c5f67cd7927953p+0, false, false, + 0x1.00000100000001c5f67cd7927954p+0, false, false), TEST ("1.000000059604645", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.0000010000001p+0, false, - 0x1.0000010000001p+0, false, - 0x1.0000010000001p+0, false, - 0x1.0000010000002p+0, false, - false, - 0x1.000001000000102ep+0, false, - 0x1.000001000000103p+0, false, - 0x1.000001000000102ep+0, false, - 0x1.000001000000103p+0, false, - false, - 0x1.000001000000102ep+0, false, - 0x1.000001000000103p+0, false, - 0x1.000001000000102ep+0, false, - 0x1.000001000000103p+0, false, - false, - 0x1.000001000000102f4fc8c3d757p+0, false, - 0x1.000001000000102f4fc8c3d7578p+0, false, - 0x1.000001000000102f4fc8c3d757p+0, false, - 0x1.000001000000102f4fc8c3d7578p+0, false, - false, - 0x1.000001000000102f4fc8c3d75769p+0, false, - 0x1.000001000000102f4fc8c3d75769p+0, false, - 0x1.000001000000102f4fc8c3d75769p+0, false, - 0x1.000001000000102f4fc8c3d7576ap+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.0000010000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + 0x1.0000010000002p+0, false, false, + false, + 0x1.000001000000102ep+0, false, false, + 0x1.000001000000103p+0, false, false, + 0x1.000001000000102ep+0, false, false, + 0x1.000001000000103p+0, false, false, + false, + 0x1.000001000000102ep+0, false, false, + 0x1.000001000000103p+0, false, false, + 0x1.000001000000102ep+0, false, false, + 0x1.000001000000103p+0, false, false, + false, + 0x1.000001000000102f4fc8c3d757p+0, false, false, + 0x1.000001000000102f4fc8c3d7578p+0, false, false, + 0x1.000001000000102f4fc8c3d757p+0, false, false, + 0x1.000001000000102f4fc8c3d7578p+0, false, false, + false, + 0x1.000001000000102f4fc8c3d75769p+0, false, false, + 0x1.000001000000102f4fc8c3d75769p+0, false, false, + 0x1.000001000000102f4fc8c3d75769p+0, false, false, + 0x1.000001000000102f4fc8c3d7576ap+0, false, false), TEST ("1.00000005960464", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffffeap+0, false, - 0x1.000000fffffeap+0, false, - 0x1.000000fffffeap+0, false, - 0x1.000000fffffebp+0, false, - false, - 0x1.000000fffffea7e4p+0, false, - 0x1.000000fffffea7e6p+0, false, - 0x1.000000fffffea7e4p+0, false, - 0x1.000000fffffea7e6p+0, false, - false, - 0x1.000000fffffea7e4p+0, false, - 0x1.000000fffffea7e6p+0, false, - 0x1.000000fffffea7e4p+0, false, - 0x1.000000fffffea7e6p+0, false, - false, - 0x1.000000fffffea7e5975eb11da7p+0, false, - 0x1.000000fffffea7e5975eb11da78p+0, false, - 0x1.000000fffffea7e5975eb11da7p+0, false, - 0x1.000000fffffea7e5975eb11da78p+0, false, - false, - 0x1.000000fffffea7e5975eb11da74ap+0, false, - 0x1.000000fffffea7e5975eb11da74bp+0, false, - 0x1.000000fffffea7e5975eb11da74ap+0, false, - 0x1.000000fffffea7e5975eb11da74bp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffffeap+0, false, false, + 0x1.000000fffffeap+0, false, false, + 0x1.000000fffffeap+0, false, false, + 0x1.000000fffffebp+0, false, false, + false, + 0x1.000000fffffea7e4p+0, false, false, + 0x1.000000fffffea7e6p+0, false, false, + 0x1.000000fffffea7e4p+0, false, false, + 0x1.000000fffffea7e6p+0, false, false, + false, + 0x1.000000fffffea7e4p+0, false, false, + 0x1.000000fffffea7e6p+0, false, false, + 0x1.000000fffffea7e4p+0, false, false, + 0x1.000000fffffea7e6p+0, false, false, + false, + 0x1.000000fffffea7e5975eb11da7p+0, false, false, + 0x1.000000fffffea7e5975eb11da78p+0, false, false, + 0x1.000000fffffea7e5975eb11da7p+0, false, false, + 0x1.000000fffffea7e5975eb11da78p+0, false, false, + false, + 0x1.000000fffffea7e5975eb11da74ap+0, false, false, + 0x1.000000fffffea7e5975eb11da74bp+0, false, false, + 0x1.000000fffffea7e5975eb11da74ap+0, false, false, + 0x1.000000fffffea7e5975eb11da74bp+0, false, false), TEST ("1.0000000596046", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffff36p+0, false, - 0x1.000000fffff36p+0, false, - 0x1.000000fffff36p+0, false, - 0x1.000000fffff37p+0, false, - false, - 0x1.000000fffff36596p+0, false, - 0x1.000000fffff36598p+0, false, - 0x1.000000fffff36596p+0, false, - 0x1.000000fffff36598p+0, false, - false, - 0x1.000000fffff36596p+0, false, - 0x1.000000fffff36598p+0, false, - 0x1.000000fffff36596p+0, false, - 0x1.000000fffff36598p+0, false, - false, - 0x1.000000fffff36597d40e1b5026p+0, false, - 0x1.000000fffff36597d40e1b50268p+0, false, - 0x1.000000fffff36597d40e1b5026p+0, false, - 0x1.000000fffff36597d40e1b50268p+0, false, - false, - 0x1.000000fffff36597d40e1b502655p+0, false, - 0x1.000000fffff36597d40e1b502656p+0, false, - 0x1.000000fffff36597d40e1b502655p+0, false, - 0x1.000000fffff36597d40e1b502656p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffff36p+0, false, false, + 0x1.000000fffff36p+0, false, false, + 0x1.000000fffff36p+0, false, false, + 0x1.000000fffff37p+0, false, false, + false, + 0x1.000000fffff36596p+0, false, false, + 0x1.000000fffff36598p+0, false, false, + 0x1.000000fffff36596p+0, false, false, + 0x1.000000fffff36598p+0, false, false, + false, + 0x1.000000fffff36596p+0, false, false, + 0x1.000000fffff36598p+0, false, false, + 0x1.000000fffff36596p+0, false, false, + 0x1.000000fffff36598p+0, false, false, + false, + 0x1.000000fffff36597d40e1b5026p+0, false, false, + 0x1.000000fffff36597d40e1b50268p+0, false, false, + 0x1.000000fffff36597d40e1b5026p+0, false, false, + 0x1.000000fffff36597d40e1b50268p+0, false, false, + false, + 0x1.000000fffff36597d40e1b502655p+0, false, false, + 0x1.000000fffff36597d40e1b502656p+0, false, false, + 0x1.000000fffff36597d40e1b502655p+0, false, false, + 0x1.000000fffff36597d40e1b502656p+0, false, false), TEST ("1.000000059605", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001000063fp+0, false, - 0x1.000001000064p+0, false, - 0x1.000001000063fp+0, false, - 0x1.000001000064p+0, false, - false, - 0x1.000001000063fcap+0, false, - 0x1.000001000063fca2p+0, false, - 0x1.000001000063fcap+0, false, - 0x1.000001000063fca2p+0, false, - false, - 0x1.000001000063fcap+0, false, - 0x1.000001000063fca2p+0, false, - 0x1.000001000063fcap+0, false, - 0x1.000001000063fca2p+0, false, - false, - 0x1.000001000063fca17533f5572f8p+0, false, - 0x1.000001000063fca17533f5573p+0, false, - 0x1.000001000063fca17533f5572f8p+0, false, - 0x1.000001000063fca17533f5573p+0, false, - false, - 0x1.000001000063fca17533f5572fe9p+0, false, - 0x1.000001000063fca17533f5572feap+0, false, - 0x1.000001000063fca17533f5572fe9p+0, false, - 0x1.000001000063fca17533f5572feap+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001000063fp+0, false, false, + 0x1.000001000064p+0, false, false, + 0x1.000001000063fp+0, false, false, + 0x1.000001000064p+0, false, false, + false, + 0x1.000001000063fcap+0, false, false, + 0x1.000001000063fca2p+0, false, false, + 0x1.000001000063fcap+0, false, false, + 0x1.000001000063fca2p+0, false, false, + false, + 0x1.000001000063fcap+0, false, false, + 0x1.000001000063fca2p+0, false, false, + 0x1.000001000063fcap+0, false, false, + 0x1.000001000063fca2p+0, false, false, + false, + 0x1.000001000063fca17533f5572f8p+0, false, false, + 0x1.000001000063fca17533f5573p+0, false, false, + 0x1.000001000063fca17533f5572f8p+0, false, false, + 0x1.000001000063fca17533f5573p+0, false, false, + false, + 0x1.000001000063fca17533f5572fe9p+0, false, false, + 0x1.000001000063fca17533f5572feap+0, false, false, + 0x1.000001000063fca17533f5572fe9p+0, false, false, + 0x1.000001000063fca17533f5572feap+0, false, false), TEST ("1.00000005960", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffae49p+0, false, - 0x1.000000fffae4ap+0, false, - 0x1.000000fffae49p+0, false, - 0x1.000000fffae4ap+0, false, - false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff388p+0, false, - false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382ep+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffae49p+0, false, false, + 0x1.000000fffae4ap+0, false, false, + 0x1.000000fffae49p+0, false, false, + 0x1.000000fffae4ap+0, false, false, + false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff388p+0, false, false, + false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382ep+0, false, false), TEST ("1.0000000596", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffae49p+0, false, - 0x1.000000fffae4ap+0, false, - 0x1.000000fffae49p+0, false, - 0x1.000000fffae4ap+0, false, - false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff388p+0, false, - false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382ep+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffae49p+0, false, false, + 0x1.000000fffae4ap+0, false, false, + 0x1.000000fffae49p+0, false, false, + 0x1.000000fffae4ap+0, false, false, + false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff388p+0, false, false, + false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382ep+0, false, false), TEST ("1.000000060", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.00000101b2b29p+0, false, - 0x1.00000101b2b2ap+0, false, - 0x1.00000101b2b29p+0, false, - 0x1.00000101b2b2ap+0, false, - false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a48p+0, false, - false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a48p+0, false, - false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca38p+0, false, - false, - 0x1.00000101b2b29a4692b67b7ca313p+0, false, - 0x1.00000101b2b29a4692b67b7ca314p+0, false, - 0x1.00000101b2b29a4692b67b7ca313p+0, false, - 0x1.00000101b2b29a4692b67b7ca314p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.00000101b2b29p+0, false, false, + 0x1.00000101b2b2ap+0, false, false, + 0x1.00000101b2b29p+0, false, false, + 0x1.00000101b2b2ap+0, false, false, + false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a48p+0, false, false, + false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a48p+0, false, false, + false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca38p+0, false, false, + false, + 0x1.00000101b2b29a4692b67b7ca313p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca314p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca313p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca314p+0, false, false), TEST ("1.00000006", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.00000101b2b29p+0, false, - 0x1.00000101b2b2ap+0, false, - 0x1.00000101b2b29p+0, false, - 0x1.00000101b2b2ap+0, false, - false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a48p+0, false, - false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a48p+0, false, - false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca38p+0, false, - false, - 0x1.00000101b2b29a4692b67b7ca313p+0, false, - 0x1.00000101b2b29a4692b67b7ca314p+0, false, - 0x1.00000101b2b29a4692b67b7ca313p+0, false, - 0x1.00000101b2b29a4692b67b7ca314p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.00000101b2b29p+0, false, false, + 0x1.00000101b2b2ap+0, false, false, + 0x1.00000101b2b29p+0, false, false, + 0x1.00000101b2b2ap+0, false, false, + false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a48p+0, false, false, + false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a48p+0, false, false, + false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca38p+0, false, false, + false, + 0x1.00000101b2b29a4692b67b7ca313p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca314p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca313p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca314p+0, false, false), TEST ("1.0000001", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001ad7f29ap+0, false, - 0x1.000001ad7f29bp+0, false, - 0x1.000001ad7f29ap+0, false, - 0x1.000001ad7f29bp+0, false, - false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abccp+0, false, - false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abccp+0, false, - false, - 0x1.000001ad7f29abcaf485787a65p+0, false, - 0x1.000001ad7f29abcaf485787a65p+0, false, - 0x1.000001ad7f29abcaf485787a65p+0, false, - 0x1.000001ad7f29abcaf485787a658p+0, false, - false, - 0x1.000001ad7f29abcaf485787a652p+0, false, - 0x1.000001ad7f29abcaf485787a6521p+0, false, - 0x1.000001ad7f29abcaf485787a652p+0, false, - 0x1.000001ad7f29abcaf485787a6521p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001ad7f29ap+0, false, false, + 0x1.000001ad7f29bp+0, false, false, + 0x1.000001ad7f29ap+0, false, false, + 0x1.000001ad7f29bp+0, false, false, + false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abccp+0, false, false, + false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abccp+0, false, false, + false, + 0x1.000001ad7f29abcaf485787a65p+0, false, false, + 0x1.000001ad7f29abcaf485787a65p+0, false, false, + 0x1.000001ad7f29abcaf485787a65p+0, false, false, + 0x1.000001ad7f29abcaf485787a658p+0, false, false, + false, + 0x1.000001ad7f29abcaf485787a652p+0, false, false, + 0x1.000001ad7f29abcaf485787a6521p+0, false, false, + 0x1.000001ad7f29abcaf485787a652p+0, false, false, + 0x1.000001ad7f29abcaf485787a6521p+0, false, false), TEST ("1.000000", true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false), TEST ("1.00000000000000011113", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.0000000000000801fc96557232p+0, false, - 0x1.0000000000000801fc96557232p+0, false, - 0x1.0000000000000801fc96557232p+0, false, - 0x1.0000000000000801fc965572328p+0, false, - false, - 0x1.0000000000000801fc9655723222p+0, false, - 0x1.0000000000000801fc9655723222p+0, false, - 0x1.0000000000000801fc9655723222p+0, false, - 0x1.0000000000000801fc9655723223p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.0000000000000801fc96557232p+0, false, false, + 0x1.0000000000000801fc96557232p+0, false, false, + 0x1.0000000000000801fc96557232p+0, false, false, + 0x1.0000000000000801fc965572328p+0, false, false, + false, + 0x1.0000000000000801fc9655723222p+0, false, false, + 0x1.0000000000000801fc9655723222p+0, false, false, + 0x1.0000000000000801fc9655723222p+0, false, false, + 0x1.0000000000000801fc9655723223p+0, false, false), TEST ("1.00000000000000011103", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008002459c076c48p+0, false, - 0x1.00000000000008002459c076c5p+0, false, - 0x1.00000000000008002459c076c48p+0, false, - 0x1.00000000000008002459c076c5p+0, false, - false, - 0x1.00000000000008002459c076c4f7p+0, false, - 0x1.00000000000008002459c076c4f8p+0, false, - 0x1.00000000000008002459c076c4f7p+0, false, - 0x1.00000000000008002459c076c4f8p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008002459c076c48p+0, false, false, + 0x1.00000000000008002459c076c5p+0, false, false, + 0x1.00000000000008002459c076c48p+0, false, false, + 0x1.00000000000008002459c076c5p+0, false, false, + false, + 0x1.00000000000008002459c076c4f7p+0, false, false, + 0x1.00000000000008002459c076c4f8p+0, false, false, + 0x1.00000000000008002459c076c4f7p+0, false, false, + 0x1.00000000000008002459c076c4f8p+0, false, false), TEST ("1.00000000000000011102", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007fff5207e5dap+0, false, - 0x1.00000000000007fff5207e5da08p+0, false, - 0x1.00000000000007fff5207e5dap+0, false, - 0x1.00000000000007fff5207e5da08p+0, false, - false, - 0x1.00000000000007fff5207e5da073p+0, false, - 0x1.00000000000007fff5207e5da073p+0, false, - 0x1.00000000000007fff5207e5da073p+0, false, - 0x1.00000000000007fff5207e5da074p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007fff5207e5dap+0, false, false, + 0x1.00000000000007fff5207e5da08p+0, false, false, + 0x1.00000000000007fff5207e5dap+0, false, false, + 0x1.00000000000007fff5207e5da08p+0, false, false, + false, + 0x1.00000000000007fff5207e5da073p+0, false, false, + 0x1.00000000000007fff5207e5da073p+0, false, false, + 0x1.00000000000007fff5207e5da073p+0, false, false, + 0x1.00000000000007fff5207e5da074p+0, false, false), TEST ("1.00000000000000011101", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007ffc5e73c447b8p+0, false, - 0x1.00000000000007ffc5e73c447cp+0, false, - 0x1.00000000000007ffc5e73c447b8p+0, false, - 0x1.00000000000007ffc5e73c447cp+0, false, - false, - 0x1.00000000000007ffc5e73c447befp+0, false, - 0x1.00000000000007ffc5e73c447befp+0, false, - 0x1.00000000000007ffc5e73c447befp+0, false, - 0x1.00000000000007ffc5e73c447bfp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007ffc5e73c447b8p+0, false, false, + 0x1.00000000000007ffc5e73c447cp+0, false, false, + 0x1.00000000000007ffc5e73c447b8p+0, false, false, + 0x1.00000000000007ffc5e73c447cp+0, false, false, + false, + 0x1.00000000000007ffc5e73c447befp+0, false, false, + 0x1.00000000000007ffc5e73c447befp+0, false, false, + 0x1.00000000000007ffc5e73c447befp+0, false, false, + 0x1.00000000000007ffc5e73c447bfp+0, false, false), TEST ("1.0000000000000001111", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008016eea8f26c48p+0, false, - 0x1.00000000000008016eea8f26c48p+0, false, - 0x1.00000000000008016eea8f26c48p+0, false, - 0x1.00000000000008016eea8f26c5p+0, false, - false, - 0x1.00000000000008016eea8f26c495p+0, false, - 0x1.00000000000008016eea8f26c496p+0, false, - 0x1.00000000000008016eea8f26c495p+0, false, - 0x1.00000000000008016eea8f26c496p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008016eea8f26c48p+0, false, false, + 0x1.00000000000008016eea8f26c48p+0, false, false, + 0x1.00000000000008016eea8f26c48p+0, false, false, + 0x1.00000000000008016eea8f26c5p+0, false, false, + false, + 0x1.00000000000008016eea8f26c495p+0, false, false, + 0x1.00000000000008016eea8f26c496p+0, false, false, + 0x1.00000000000008016eea8f26c495p+0, false, false, + 0x1.00000000000008016eea8f26c496p+0, false, false), TEST ("1.000000000000000111", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007ff96adfa2b57p+0, false, - 0x1.00000000000007ff96adfa2b578p+0, false, - 0x1.00000000000007ff96adfa2b57p+0, false, - 0x1.00000000000007ff96adfa2b578p+0, false, - false, - 0x1.00000000000007ff96adfa2b576ap+0, false, - 0x1.00000000000007ff96adfa2b576bp+0, false, - 0x1.00000000000007ff96adfa2b576ap+0, false, - 0x1.00000000000007ff96adfa2b576bp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007ff96adfa2b57p+0, false, false, + 0x1.00000000000007ff96adfa2b578p+0, false, false, + 0x1.00000000000007ff96adfa2b57p+0, false, false, + 0x1.00000000000007ff96adfa2b578p+0, false, false, + false, + 0x1.00000000000007ff96adfa2b576ap+0, false, false, + 0x1.00000000000007ff96adfa2b576bp+0, false, false, + 0x1.00000000000007ff96adfa2b576ap+0, false, false, + 0x1.00000000000007ff96adfa2b576bp+0, false, false), TEST ("1.00000000000000011", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000007ecp+0, false, - 0x1.00000000000007eep+0, false, - 0x1.00000000000007ecp+0, false, - 0x1.00000000000007eep+0, false, - false, - 0x1.00000000000007ecp+0, false, - 0x1.00000000000007eep+0, false, - 0x1.00000000000007ecp+0, false, - 0x1.00000000000007eep+0, false, - false, - 0x1.00000000000007ed24502859138p+0, false, - 0x1.00000000000007ed24502859138p+0, false, - 0x1.00000000000007ed24502859138p+0, false, - 0x1.00000000000007ed2450285914p+0, false, - false, - 0x1.00000000000007ed2450285913bfp+0, false, - 0x1.00000000000007ed2450285913bfp+0, false, - 0x1.00000000000007ed2450285913bfp+0, false, - 0x1.00000000000007ed2450285913cp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000007ecp+0, false, false, + 0x1.00000000000007eep+0, false, false, + 0x1.00000000000007ecp+0, false, false, + 0x1.00000000000007eep+0, false, false, + false, + 0x1.00000000000007ecp+0, false, false, + 0x1.00000000000007eep+0, false, false, + 0x1.00000000000007ecp+0, false, false, + 0x1.00000000000007eep+0, false, false, + false, + 0x1.00000000000007ed24502859138p+0, false, false, + 0x1.00000000000007ed24502859138p+0, false, false, + 0x1.00000000000007ed24502859138p+0, false, false, + 0x1.00000000000007ed2450285914p+0, false, false, + false, + 0x1.00000000000007ed2450285913bfp+0, false, false, + 0x1.00000000000007ed2450285913bfp+0, false, false, + 0x1.00000000000007ed2450285913bfp+0, false, false, + 0x1.00000000000007ed2450285913cp+0, false, false), TEST ("1.0000000000000001", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000736p+0, false, - false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000736p+0, false, - false, - 0x1.0000000000000734aca5f6226fp+0, false, - 0x1.0000000000000734aca5f6226fp+0, false, - 0x1.0000000000000734aca5f6226fp+0, false, - 0x1.0000000000000734aca5f6226f8p+0, false, - false, - 0x1.0000000000000734aca5f6226f0ap+0, false, - 0x1.0000000000000734aca5f6226f0bp+0, false, - 0x1.0000000000000734aca5f6226f0ap+0, false, - 0x1.0000000000000734aca5f6226f0bp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000736p+0, false, false, + false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000736p+0, false, false, + false, + 0x1.0000000000000734aca5f6226fp+0, false, false, + 0x1.0000000000000734aca5f6226fp+0, false, false, + 0x1.0000000000000734aca5f6226fp+0, false, false, + 0x1.0000000000000734aca5f6226f8p+0, false, false, + false, + 0x1.0000000000000734aca5f6226f0ap+0, false, false, + 0x1.0000000000000734aca5f6226f0bp+0, false, false, + 0x1.0000000000000734aca5f6226f0ap+0, false, false, + 0x1.0000000000000734aca5f6226f0bp+0, false, false), TEST ("3929201589819414e-25", false, - 0x1.b0053p-32, false, - 0x1.b00532p-32, false, - 0x1.b0053p-32, false, - 0x1.b00532p-32, false, - false, - 0x1.b005314e2421ep-32, false, - 0x1.b005314e2421ep-32, false, - 0x1.b005314e2421ep-32, false, - 0x1.b005314e2421fp-32, false, - false, - 0x1.b005314e2421e7fep-32, false, - 0x1.b005314e2421e8p-32, false, - 0x1.b005314e2421e7fep-32, false, - 0x1.b005314e2421e8p-32, false, - false, - 0x1.b005314e2421e7fep-32, false, - 0x1.b005314e2421e8p-32, false, - 0x1.b005314e2421e7fep-32, false, - 0x1.b005314e2421e8p-32, false, - false, - 0x1.b005314e2421e7ffb472840c5ap-32, false, - 0x1.b005314e2421e7ffb472840c5a8p-32, false, - 0x1.b005314e2421e7ffb472840c5ap-32, false, - 0x1.b005314e2421e7ffb472840c5a8p-32, false, - false, - 0x1.b005314e2421e7ffb472840c5a6ep-32, false, - 0x1.b005314e2421e7ffb472840c5a6fp-32, false, - 0x1.b005314e2421e7ffb472840c5a6ep-32, false, - 0x1.b005314e2421e7ffb472840c5a6fp-32, false), + 0x1.b0053p-32, false, false, + 0x1.b00532p-32, false, false, + 0x1.b0053p-32, false, false, + 0x1.b00532p-32, false, false, + false, + 0x1.b005314e2421ep-32, false, false, + 0x1.b005314e2421ep-32, false, false, + 0x1.b005314e2421ep-32, false, false, + 0x1.b005314e2421fp-32, false, false, + false, + 0x1.b005314e2421e7fep-32, false, false, + 0x1.b005314e2421e8p-32, false, false, + 0x1.b005314e2421e7fep-32, false, false, + 0x1.b005314e2421e8p-32, false, false, + false, + 0x1.b005314e2421e7fep-32, false, false, + 0x1.b005314e2421e8p-32, false, false, + 0x1.b005314e2421e7fep-32, false, false, + 0x1.b005314e2421e8p-32, false, false, + false, + 0x1.b005314e2421e7ffb472840c5ap-32, false, false, + 0x1.b005314e2421e7ffb472840c5a8p-32, false, false, + 0x1.b005314e2421e7ffb472840c5ap-32, false, false, + 0x1.b005314e2421e7ffb472840c5a8p-32, false, false, + false, + 0x1.b005314e2421e7ffb472840c5a6ep-32, false, false, + 0x1.b005314e2421e7ffb472840c5a6fp-32, false, false, + 0x1.b005314e2421e7ffb472840c5a6ep-32, false, false, + 0x1.b005314e2421e7ffb472840c5a6fp-32, false, false), TEST ("0.0000000000000000000000000000000000000000000021019476964872" "256063855943749348741969203929128147736576356024258346866240" "28790902229957282543182373046875", false, - 0x8p-152, false, - 0x1p-148, false, - 0x8p-152, false, - 0x1p-148, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false), + 0x8p-152, false, true, + 0x1p-148, false, true, + 0x8p-152, false, true, + 0x1p-148, false, true, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false), TEST ("1.00000005960464477539062499", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffffffp+0, false, - 0x1.000001p+0, false, - 0x1.000000fffffffp+0, false, - 0x1.000001p+0, false, - false, - 0x1.000000fffffffffep+0, false, - 0x1.000001p+0, false, - 0x1.000000fffffffffep+0, false, - 0x1.000001p+0, false, - false, - 0x1.000000fffffffffep+0, false, - 0x1.000001p+0, false, - 0x1.000000fffffffffep+0, false, - 0x1.000001p+0, false, - false, - 0x1.000000fffffffffffffffce7b78p+0, false, - 0x1.000000fffffffffffffffce7b8p+0, false, - 0x1.000000fffffffffffffffce7b78p+0, false, - 0x1.000000fffffffffffffffce7b8p+0, false, - false, - 0x1.000000fffffffffffffffce7b7e7p+0, false, - 0x1.000000fffffffffffffffce7b7e7p+0, false, - 0x1.000000fffffffffffffffce7b7e7p+0, false, - 0x1.000000fffffffffffffffce7b7e8p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffffffp+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000000fffffffp+0, false, false, + 0x1.000001p+0, false, false, + false, + 0x1.000000fffffffffep+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000000fffffffffep+0, false, false, + 0x1.000001p+0, false, false, + false, + 0x1.000000fffffffffep+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000000fffffffffep+0, false, false, + 0x1.000001p+0, false, false, + false, + 0x1.000000fffffffffffffffce7b78p+0, false, false, + 0x1.000000fffffffffffffffce7b8p+0, false, false, + 0x1.000000fffffffffffffffce7b78p+0, false, false, + 0x1.000000fffffffffffffffce7b8p+0, false, false, + false, + 0x1.000000fffffffffffffffce7b7e7p+0, false, false, + 0x1.000000fffffffffffffffce7b7e7p+0, false, false, + 0x1.000000fffffffffffffffce7b7e7p+0, false, false, + 0x1.000000fffffffffffffffce7b7e8p+0, false, false), TEST ("1.000000059604644775390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false), TEST ("1.00000005960464477539062501", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000000002p+0, false, - false, - 0x1.00000100000000000000031848p+0, false, - 0x1.00000100000000000000031848p+0, false, - 0x1.00000100000000000000031848p+0, false, - 0x1.000001000000000000000318488p+0, false, - false, - 0x1.0000010000000000000003184818p+0, false, - 0x1.0000010000000000000003184819p+0, false, - 0x1.0000010000000000000003184818p+0, false, - 0x1.0000010000000000000003184819p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000000002p+0, false, false, + false, + 0x1.00000100000000000000031848p+0, false, false, + 0x1.00000100000000000000031848p+0, false, false, + 0x1.00000100000000000000031848p+0, false, false, + 0x1.000001000000000000000318488p+0, false, false, + false, + 0x1.0000010000000000000003184818p+0, false, false, + 0x1.0000010000000000000003184819p+0, false, false, + 0x1.0000010000000000000003184818p+0, false, false, + 0x1.0000010000000000000003184819p+0, false, false), TEST ("1.00000011920928955078125", true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false), + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false), TEST ("1.00000017881393432617187499", false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - false, - 0x1.000002fffffffp+0, false, - 0x1.000003p+0, false, - 0x1.000002fffffffp+0, false, - 0x1.000003p+0, false, - false, - 0x1.000002fffffffffep+0, false, - 0x1.000003p+0, false, - 0x1.000002fffffffffep+0, false, - 0x1.000003p+0, false, - false, - 0x1.000002fffffffffep+0, false, - 0x1.000003p+0, false, - 0x1.000002fffffffffep+0, false, - 0x1.000003p+0, false, - false, - 0x1.000002fffffffffffffffce7b78p+0, false, - 0x1.000002fffffffffffffffce7b8p+0, false, - 0x1.000002fffffffffffffffce7b78p+0, false, - 0x1.000002fffffffffffffffce7b8p+0, false, - false, - 0x1.000002fffffffffffffffce7b7e7p+0, false, - 0x1.000002fffffffffffffffce7b7e7p+0, false, - 0x1.000002fffffffffffffffce7b7e7p+0, false, - 0x1.000002fffffffffffffffce7b7e8p+0, false), + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + false, + 0x1.000002fffffffp+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000002fffffffp+0, false, false, + 0x1.000003p+0, false, false, + false, + 0x1.000002fffffffffep+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000002fffffffffep+0, false, false, + 0x1.000003p+0, false, false, + false, + 0x1.000002fffffffffep+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000002fffffffffep+0, false, false, + 0x1.000003p+0, false, false, + false, + 0x1.000002fffffffffffffffce7b78p+0, false, false, + 0x1.000002fffffffffffffffce7b8p+0, false, false, + 0x1.000002fffffffffffffffce7b78p+0, false, false, + 0x1.000002fffffffffffffffce7b8p+0, false, false, + false, + 0x1.000002fffffffffffffffce7b7e7p+0, false, false, + 0x1.000002fffffffffffffffce7b7e7p+0, false, false, + 0x1.000002fffffffffffffffce7b7e7p+0, false, false, + 0x1.000002fffffffffffffffce7b7e8p+0, false, false), TEST ("1.000000178813934326171875", false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false), + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false), TEST ("1.00000017881393432617187501", false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.0000030000001p+0, false, - false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.0000030000000002p+0, false, - false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.0000030000000002p+0, false, - false, - 0x1.00000300000000000000031848p+0, false, - 0x1.00000300000000000000031848p+0, false, - 0x1.00000300000000000000031848p+0, false, - 0x1.000003000000000000000318488p+0, false, - false, - 0x1.0000030000000000000003184818p+0, false, - 0x1.0000030000000000000003184819p+0, false, - 0x1.0000030000000000000003184818p+0, false, - 0x1.0000030000000000000003184819p+0, false), + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.0000030000001p+0, false, false, + false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.0000030000000002p+0, false, false, + false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.0000030000000002p+0, false, false, + false, + 0x1.00000300000000000000031848p+0, false, false, + 0x1.00000300000000000000031848p+0, false, false, + 0x1.00000300000000000000031848p+0, false, false, + 0x1.000003000000000000000318488p+0, false, false, + false, + 0x1.0000030000000000000003184818p+0, false, false, + 0x1.0000030000000000000003184819p+0, false, false, + 0x1.0000030000000000000003184818p+0, false, false, + 0x1.0000030000000000000003184819p+0, false, false), TEST ("1.0000002384185791015625", true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false), + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false), TEST ("1.08420217248550443400745280086994171142578125e-19", true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false), + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false), TEST ("1.0842022371089897897127399001987457793916291848290711641311" "645507812499e-19", false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2.000004p-64, false, - false, - 0x2.000001ffffffep-64, false, - 0x2.000002p-64, false, - 0x2.000001ffffffep-64, false, - 0x2.000002p-64, false, - false, - 0x2.000001fffffffffcp-64, false, - 0x2.000002p-64, false, - 0x2.000001fffffffffcp-64, false, - 0x2.000002p-64, false, - false, - 0x2.000001fffffffffcp-64, false, - 0x2.000002p-64, false, - 0x2.000001fffffffffcp-64, false, - 0x2.000002p-64, false, - false, - 0x2.000001ffffffffffffffffffffp-64, false, - 0x2.000002p-64, false, - 0x2.000001ffffffffffffffffffffp-64, false, - 0x2.000002p-64, false, - false, - 0x2.000001fffffffffffffffffffffep-64, false, - 0x2.000002p-64, false, - 0x2.000001fffffffffffffffffffffep-64, false, - 0x2.000002p-64, false), + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2.000004p-64, false, false, + false, + 0x2.000001ffffffep-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001ffffffep-64, false, false, + 0x2.000002p-64, false, false, + false, + 0x2.000001fffffffffcp-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001fffffffffcp-64, false, false, + 0x2.000002p-64, false, false, + false, + 0x2.000001fffffffffcp-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001fffffffffcp-64, false, false, + 0x2.000002p-64, false, false, + false, + 0x2.000001ffffffffffffffffffffp-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001ffffffffffffffffffffp-64, false, false, + 0x2.000002p-64, false, false, + false, + 0x2.000001fffffffffffffffffffffep-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001fffffffffffffffffffffep-64, false, false, + 0x2.000002p-64, false, false), TEST ("1.0842022371089897897127399001987457793916291848290711641311" "6455078125e-19", false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false), + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false), TEST ("1.0842022371089897897127399001987457793916291848290711641311" "645507812501e-19", false, - 0x2p-64, false, - 0x2.000004p-64, false, - 0x2p-64, false, - 0x2.000004p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.0000020000002p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.0000020000000004p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.0000020000000004p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.00000200000000000000000001p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.0000020000000000000000000002p-64, false), + 0x2p-64, false, false, + 0x2.000004p-64, false, false, + 0x2p-64, false, false, + 0x2.000004p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.0000020000002p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.0000020000000004p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.0000020000000004p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.00000200000000000000000001p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.0000020000000000000000000002p-64, false, false), TEST ("1.0842023017324751454180269995275498473574771196581423282623" "291015625e-19", true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false), + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false), TEST ("1.0842023663559605011233140988563539153233250544872134923934" "936523437499e-19", false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - false, - 0x2.000005ffffffep-64, false, - 0x2.000006p-64, false, - 0x2.000005ffffffep-64, false, - 0x2.000006p-64, false, - false, - 0x2.000005fffffffffcp-64, false, - 0x2.000006p-64, false, - 0x2.000005fffffffffcp-64, false, - 0x2.000006p-64, false, - false, - 0x2.000005fffffffffcp-64, false, - 0x2.000006p-64, false, - 0x2.000005fffffffffcp-64, false, - 0x2.000006p-64, false, - false, - 0x2.000005ffffffffffffffffffffp-64, false, - 0x2.000006p-64, false, - 0x2.000005ffffffffffffffffffffp-64, false, - 0x2.000006p-64, false, - false, - 0x2.000005fffffffffffffffffffffep-64, false, - 0x2.000006p-64, false, - 0x2.000005fffffffffffffffffffffep-64, false, - 0x2.000006p-64, false), + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + false, + 0x2.000005ffffffep-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005ffffffep-64, false, false, + 0x2.000006p-64, false, false, + false, + 0x2.000005fffffffffcp-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005fffffffffcp-64, false, false, + 0x2.000006p-64, false, false, + false, + 0x2.000005fffffffffcp-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005fffffffffcp-64, false, false, + 0x2.000006p-64, false, false, + false, + 0x2.000005ffffffffffffffffffffp-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005ffffffffffffffffffffp-64, false, false, + 0x2.000006p-64, false, false, + false, + 0x2.000005fffffffffffffffffffffep-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005fffffffffffffffffffffep-64, false, false, + 0x2.000006p-64, false, false), TEST ("1.0842023663559605011233140988563539153233250544872134923934" "9365234375e-19", false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false), + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false), TEST ("1.0842023663559605011233140988563539153233250544872134923934" "936523437501e-19", false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.0000060000002p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.0000060000000004p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.0000060000000004p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.00000600000000000000000001p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.0000060000000000000000000002p-64, false), + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.0000060000002p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.0000060000000004p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.0000060000000004p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.00000600000000000000000001p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.0000060000000000000000000002p-64, false, false), TEST ("1.0842024309794458568286011981851579832891729893162846565246" "58203125e-19", true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false), + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false), TEST ("7.5231638452626400509999138382223723380394595633413601376560" "1092018187046051025390625e-37", true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false), + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false), TEST ("7.5231642936781486349413765338158389908126215730251815381410" "578824437213052434003657253924757242202758789062499e-37", false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1.000002p-120, false, - false, - 0x1.000000fffffffp-120, false, - 0x1.000001p-120, false, - 0x1.000000fffffffp-120, false, - 0x1.000001p-120, false, - false, - 0x1.000000fffffffffep-120, false, - 0x1.000001p-120, false, - 0x1.000000fffffffffep-120, false, - 0x1.000001p-120, false, - false, - 0x1.000000fffffffffep-120, false, - 0x1.000001p-120, false, - 0x1.000000fffffffffep-120, false, - 0x1.000001p-120, false, - false, - 0x1.000000ffffffffffffffffffff8p-120, false, - 0x1.000001p-120, false, - 0x1.000000ffffffffffffffffffff8p-120, false, - 0x1.000001p-120, false, - false, - 0x1.000000ffffffffffffffffffffffp-120, false, - 0x1.000001p-120, false, - 0x1.000000ffffffffffffffffffffffp-120, false, - 0x1.000001p-120, false), + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1.000002p-120, false, false, + false, + 0x1.000000fffffffp-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000fffffffp-120, false, false, + 0x1.000001p-120, false, false, + false, + 0x1.000000fffffffffep-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000fffffffffep-120, false, false, + 0x1.000001p-120, false, false, + false, + 0x1.000000fffffffffep-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000fffffffffep-120, false, false, + 0x1.000001p-120, false, false, + false, + 0x1.000000ffffffffffffffffffff8p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000ffffffffffffffffffff8p-120, false, false, + 0x1.000001p-120, false, false, + false, + 0x1.000000ffffffffffffffffffffffp-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000ffffffffffffffffffffffp-120, false, false, + 0x1.000001p-120, false, false), TEST ("7.5231642936781486349413765338158389908126215730251815381410" "5788244372130524340036572539247572422027587890625e-37", false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false), + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false), TEST ("7.5231642936781486349413765338158389908126215730251815381410" "578824437213052434003657253924757242202758789062501e-37", false, - 0x1p-120, false, - 0x1.000002p-120, false, - 0x1p-120, false, - 0x1.000002p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.0000010000001p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.0000010000000002p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.0000010000000002p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001000000000000000000008p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.0000010000000000000000000001p-120, false), + 0x1p-120, false, false, + 0x1.000002p-120, false, false, + 0x1p-120, false, false, + 0x1.000002p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.0000010000001p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.0000010000000002p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.0000010000000002p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001000000000000000000008p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.0000010000000000000000000001p-120, false, false), TEST ("7.5231647420936572188828392294093056435857835827090029386261" "048447055721499765468252007849514484405517578125e-37", true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false), + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false), TEST ("7.5231651905091658028243019250027722963589455923928243391111" "518069674229947096932846761774271726608276367187499e-37", false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - false, - 0x1.000002fffffffp-120, false, - 0x1.000003p-120, false, - 0x1.000002fffffffp-120, false, - 0x1.000003p-120, false, - false, - 0x1.000002fffffffffep-120, false, - 0x1.000003p-120, false, - 0x1.000002fffffffffep-120, false, - 0x1.000003p-120, false, - false, - 0x1.000002fffffffffep-120, false, - 0x1.000003p-120, false, - 0x1.000002fffffffffep-120, false, - 0x1.000003p-120, false, - false, - 0x1.000002ffffffffffffffffffff8p-120, false, - 0x1.000003p-120, false, - 0x1.000002ffffffffffffffffffff8p-120, false, - 0x1.000003p-120, false, - false, - 0x1.000002ffffffffffffffffffffffp-120, false, - 0x1.000003p-120, false, - 0x1.000002ffffffffffffffffffffffp-120, false, - 0x1.000003p-120, false), + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + false, + 0x1.000002fffffffp-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002fffffffp-120, false, false, + 0x1.000003p-120, false, false, + false, + 0x1.000002fffffffffep-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002fffffffffep-120, false, false, + 0x1.000003p-120, false, false, + false, + 0x1.000002fffffffffep-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002fffffffffep-120, false, false, + 0x1.000003p-120, false, false, + false, + 0x1.000002ffffffffffffffffffff8p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002ffffffffffffffffffff8p-120, false, false, + 0x1.000003p-120, false, false, + false, + 0x1.000002ffffffffffffffffffffffp-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002ffffffffffffffffffffffp-120, false, false, + 0x1.000003p-120, false, false), TEST ("7.5231651905091658028243019250027722963589455923928243391111" "5180696742299470969328467617742717266082763671875e-37", false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false), + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false), TEST ("7.5231651905091658028243019250027722963589455923928243391111" "518069674229947096932846761774271726608276367187501e-37", false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.0000030000001p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.0000030000000002p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.0000030000000002p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003000000000000000000008p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.0000030000000000000000000001p-120, false), + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.0000030000001p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.0000030000000002p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.0000030000000002p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003000000000000000000008p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.0000030000000000000000000001p-120, false, false), TEST ("7.5231656389246743867657646205962389491321076020766457395961" "98769229273839442839744151569902896881103515625e-37", true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false), + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false), TEST ("340282356779733661637539395458142568447.999", false, - 0xf.fffffp+124, false, - 0xf.fffffp+124, false, - 0xf.fffffp+124, false, - INF, true, - false, - 0xf.fffff7ffffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7ffffff8p+124, false, - 0xf.fffff8p+124, false, - false, - 0xf.fffff7fffffffffp+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7fffffffffp+124, false, - 0xf.fffff8p+124, false, - false, - 0xf.fffff7fffffffffp+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7fffffffffp+124, false, - 0xf.fffff8p+124, false, - false, - 0xf.fffff7fffffffffffffffffffcp+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7fffffffffffffffffffcp+124, false, - 0xf.fffff8p+124, false, - false, - 0xf.fffff7fffffffffffffffffffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7fffffffffffffffffffff8p+124, false, - 0xf.fffff8p+124, false), + 0xf.fffffp+124, false, false, + 0xf.fffffp+124, false, false, + 0xf.fffffp+124, false, false, + INF, true, false, + false, + 0xf.fffff7ffffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7ffffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + false, + 0xf.fffff7fffffffffp+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7fffffffffp+124, false, false, + 0xf.fffff8p+124, false, false, + false, + 0xf.fffff7fffffffffp+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7fffffffffp+124, false, false, + 0xf.fffff8p+124, false, false, + false, + 0xf.fffff7fffffffffffffffffffcp+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7fffffffffffffffffffcp+124, false, false, + 0xf.fffff8p+124, false, false, + false, + 0xf.fffff7fffffffffffffffffffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7fffffffffffffffffffff8p+124, false, false, + 0xf.fffff8p+124, false, false), TEST ("340282356779733661637539395458142568448", false, - 0xf.fffffp+124, false, - INF, true, - 0xf.fffffp+124, false, - INF, true, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false), + 0xf.fffffp+124, false, false, + INF, true, false, + 0xf.fffffp+124, false, false, + INF, true, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false), TEST ("340282356779733661637539395458142568448.001", false, - 0xf.fffffp+124, false, - INF, true, - 0xf.fffffp+124, false, - INF, true, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff80000008p+124, false, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8000000001p+124, false, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8000000001p+124, false, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff800000000000000000004p+124, false, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff80000000000000000000008p+124, false), + 0xf.fffffp+124, false, false, + INF, true, false, + 0xf.fffffp+124, false, false, + INF, true, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff80000008p+124, false, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8000000001p+124, false, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8000000001p+124, false, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff800000000000000000004p+124, false, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff80000000000000000000008p+124, false, false), TEST ("-340282356779733661637539395458142568447.999", false, - -INF, true, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7ffffff8p+124, false, - -0xf.fffff7ffffff8p+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7fffffffffp+124, false, - -0xf.fffff7fffffffffp+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7fffffffffp+124, false, - -0xf.fffff7fffffffffp+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7fffffffffffffffffffcp+124, false, - -0xf.fffff7fffffffffffffffffffcp+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7fffffffffffffffffffff8p+124, false, - -0xf.fffff7fffffffffffffffffffff8p+124, false), + -INF, true, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7ffffff8p+124, false, false, + -0xf.fffff7ffffff8p+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7fffffffffp+124, false, false, + -0xf.fffff7fffffffffp+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7fffffffffp+124, false, false, + -0xf.fffff7fffffffffp+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7fffffffffffffffffffcp+124, false, false, + -0xf.fffff7fffffffffffffffffffcp+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7fffffffffffffffffffff8p+124, false, false, + -0xf.fffff7fffffffffffffffffffff8p+124, false, false), TEST ("-340282356779733661637539395458142568448", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false), TEST ("-340282356779733661637539395458142568448.001", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - false, - -0xf.fffff80000008p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - false, - -0xf.fffff8000000001p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - false, - -0xf.fffff8000000001p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - false, - -0xf.fffff800000000000000000004p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - false, - -0xf.fffff80000000000000000000008p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + false, + -0xf.fffff80000008p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + false, + -0xf.fffff8000000001p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + false, + -0xf.fffff8000000001p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + false, + -0xf.fffff800000000000000000004p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + false, + -0xf.fffff80000000000000000000008p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false), TEST ("179769313486231580793728971405303415079934132710037826936173" "778980444968292764750946649017977587207096330286416692887910" "946555547851940402630657488671505820681908902000708383676273" @@ -1855,35 +1855,35 @@ static const struct test tests[] = { "936475292719074168444365510704342711559699508093042880177904" "174497791.999", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, false, - 0xf.ffffffffffff8p+1020, false, - 0xf.ffffffffffff8p+1020, false, - INF, true, - false, - 0xf.ffffffffffffbffp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffbffp+1020, false, - 0xf.ffffffffffffcp+1020, false, - false, - 0xf.ffffffffffffbffp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffbffp+1020, false, - 0xf.ffffffffffffcp+1020, false, - false, - 0xf.ffffffffffffbffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffbffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, true, - false, - 0xf.ffffffffffffbffffffffffffff8p+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffbffffffffffffff8p+1020, false, - 0xf.ffffffffffffcp+1020, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, false, false, + 0xf.ffffffffffff8p+1020, false, false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + false, + 0xf.ffffffffffffbffp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffbffp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + false, + 0xf.ffffffffffffbffp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffbffp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + false, + 0xf.ffffffffffffbffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffbffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, true, false, + false, + 0xf.ffffffffffffbffffffffffffff8p+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffbffffffffffffff8p+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false), TEST ("179769313486231580793728971405303415079934132710037826936173" "778980444968292764750946649017977587207096330286416692887910" "946555547851940402630657488671505820681908902000708383676273" @@ -1891,35 +1891,35 @@ static const struct test tests[] = { "936475292719074168444365510704342711559699508093042880177904" "174497792", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, false, - INF, true, - 0xf.ffffffffffff8p+1020, false, - INF, true, - true, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - true, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - false, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - true, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + true, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + true, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + true, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false), TEST ("179769313486231580793728971405303415079934132710037826936173" "778980444968292764750946649017977587207096330286416692887910" "946555547851940402630657488671505820681908902000708383676273" @@ -1927,35 +1927,35 @@ static const struct test tests[] = { "936475292719074168444365510704342711559699508093042880177904" "174497792.001", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, false, - INF, true, - 0xf.ffffffffffff8p+1020, false, - INF, true, - false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffc01p+1020, false, - false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffc01p+1020, false, - false, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffc0000000000004p+1020, true, - false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffc000000000000008p+1020, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffc01p+1020, false, false, + false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffc01p+1020, false, false, + false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffc0000000000004p+1020, true, false, + false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffc000000000000008p+1020, false, false), TEST ("-17976931348623158079372897140530341507993413271003782693617" "377898044496829276475094664901797758720709633028641669288791" "094655554785194040263065748867150582068190890200070838367627" @@ -1963,35 +1963,35 @@ static const struct test tests[] = { "493647529271907416844436551070434271155969950809304288017790" "4174497791.999", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffbffp+1020, false, - -0xf.ffffffffffffbffp+1020, false, - false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffbffp+1020, false, - -0xf.ffffffffffffbffp+1020, false, - false, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffbffffffffffffcp+1020, false, - -0xf.ffffffffffffbffffffffffffcp+1020, false, - false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffbffffffffffffff8p+1020, false, - -0xf.ffffffffffffbffffffffffffff8p+1020, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffbffp+1020, false, false, + -0xf.ffffffffffffbffp+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffbffp+1020, false, false, + -0xf.ffffffffffffbffp+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffbffffffffffffcp+1020, false, false, + -0xf.ffffffffffffbffffffffffffcp+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffbffffffffffffff8p+1020, false, false, + -0xf.ffffffffffffbffffffffffffff8p+1020, false, false), TEST ("-17976931348623158079372897140530341507993413271003782693617" "377898044496829276475094664901797758720709633028641669288791" "094655554785194040263065748867150582068190890200070838367627" @@ -1999,35 +1999,35 @@ static const struct test tests[] = { "493647529271907416844436551070434271155969950809304288017790" "4174497792", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - true, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - true, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - false, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - true, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + true, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + true, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + true, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false), TEST ("-17976931348623158079372897140530341507993413271003782693617" "377898044496829276475094664901797758720709633028641669288791" "094655554785194040263065748867150582068190890200070838367627" @@ -2035,35 +2035,35 @@ static const struct test tests[] = { "493647529271907416844436551070434271155969950809304288017790" "4174497792.001", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - false, - -0xf.ffffffffffffc01p+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - false, - -0xf.ffffffffffffc01p+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - false, - -0xf.ffffffffffffc0000000000004p+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - false, - -0xf.ffffffffffffc000000000000008p+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + false, + -0xf.ffffffffffffc01p+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + false, + -0xf.ffffffffffffc01p+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + false, + -0xf.ffffffffffffc0000000000004p+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + false, + -0xf.ffffffffffffc000000000000008p+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false), TEST ("118973149535723176505351158982948866796625400469556721895649" "927756249918185172720476044944290457046138433056764616744328" "666255526748948793023632513609765434237723241753648908036202" @@ -2148,35 +2148,35 @@ static const struct test tests[] = { "578031503869424406179027994752890226443351619365453243328968" "8740976918527.999", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - 0xf.fffffffffffffffp+16380, false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - 0xf.fffffffffffffffp+16380, false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffff7fffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff7fffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + 0xf.fffffffffffffffp+16380, false, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + 0xf.fffffffffffffffp+16380, false, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffff7fffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff7fffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false), TEST ("118973149535723176505351158982948866796625400469556721895649" "927756249918185172720476044944290457046138433056764616744328" "666255526748948793023632513609765434237723241753648908036202" @@ -2261,35 +2261,35 @@ static const struct test tests[] = { "578031503869424406179027994752890226443351619365453243328968" "8740976918528", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - true, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + true, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false), TEST ("118973149535723176505351158982948866796625400469556721895649" "927756249918185172720476044944290457046138433056764616744328" "666255526748948793023632513609765434237723241753648908036202" @@ -2374,35 +2374,35 @@ static const struct test tests[] = { "578031503869424406179027994752890226443351619365453243328968" "8740976918528.001", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8000000000008p+16380, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8000000000008p+16380, false, false), TEST ("-11897314953572317650535115898294886679662540046955672189564" "992775624991818517272047604494429045704613843305676461674432" "866625552674894879302363251360976543423772324175364890803620" @@ -2487,35 +2487,35 @@ static const struct test tests[] = { "557803150386942440617902799475289022644335161936545324332896" "88740976918527.999", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff7fffffffffff8p+16380, false, - -0xf.fffffffffffffff7fffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff7fffffffffff8p+16380, false, false, + -0xf.fffffffffffffff7fffffffffff8p+16380, false, false), TEST ("-11897314953572317650535115898294886679662540046955672189564" "992775624991818517272047604494429045704613843305676461674432" "866625552674894879302363251360976543423772324175364890803620" @@ -2600,35 +2600,35 @@ static const struct test tests[] = { "557803150386942440617902799475289022644335161936545324332896" "88740976918528", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - true, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + true, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false), TEST ("-11897314953572317650535115898294886679662540046955672189564" "992775624991818517272047604494429045704613843305676461674432" "866625552674894879302363251360976543423772324175364890803620" @@ -2713,35 +2713,35 @@ static const struct test tests[] = { "557803150386942440617902799475289022644335161936545324332896" "88740976918528.001", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -0xf.fffffffffffffff8000000000008p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -0xf.fffffffffffffff8000000000008p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false), TEST ("118973149535723176508575932662800707347995686986910214150118" "685272271246896789803961473130416053705672050873552479421805" "932646640744124594447361172514341324846716679654551308018400" @@ -2826,35 +2826,35 @@ static const struct test tests[] = { "972233447491583165728635513802591543441145939539353470970452" "5536550715391.999", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false), TEST ("118973149535723176508575932662800707347995686986910214150118" "685272271246896789803961473130416053705672050873552479421805" "932646640744124594447361172514341324846716679654551308018400" @@ -2939,35 +2939,35 @@ static const struct test tests[] = { "972233447491583165728635513802591543441145939539353470970452" "5536550715392", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false), TEST ("118973149535723176508575932662800707347995686986910214150118" "685272271246896789803961473130416053705672050873552479421805" "932646640744124594447361172514341324846716679654551308018400" @@ -3052,35 +3052,35 @@ static const struct test tests[] = { "972233447491583165728635513802591543441145939539353470970452" "5536550715392.001", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false), TEST ("-11897314953572317650857593266280070734799568698691021415011" "868527227124689678980396147313041605370567205087355247942180" "593264664074412459444736117251434132484671667965455130801840" @@ -3165,35 +3165,35 @@ static const struct test tests[] = { "097223344749158316572863551380259154344114593953935347097045" "25536550715391.999", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -INF, true, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false), TEST ("-11897314953572317650857593266280070734799568698691021415011" "868527227124689678980396147313041605370567205087355247942180" "593264664074412459444736117251434132484671667965455130801840" @@ -3278,35 +3278,35 @@ static const struct test tests[] = { "097223344749158316572863551380259154344114593953935347097045" "25536550715392", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false), TEST ("-11897314953572317650857593266280070734799568698691021415011" "868527227124689678980396147313041605370567205087355247942180" "593264664074412459444736117251434132484671667965455130801840" @@ -3391,419 +3391,419 @@ static const struct test tests[] = { "097223344749158316572863551380259154344114593953935347097045" "25536550715392.001", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false), TEST ("2.1019476964872256063855943749348741969203929128147736576356" "0242583468662402879090222995728254318237304687499e-45", false, - 0x8p-152, false, - 0x8p-152, false, - 0x8p-152, false, - 0x1p-148, false, - false, - 0xb.ffffffffffff8p-152, false, - 0xcp-152, false, - 0xb.ffffffffffff8p-152, false, - 0xcp-152, false, - false, - 0xb.fffffffffffffffp-152, false, - 0xcp-152, false, - 0xb.fffffffffffffffp-152, false, - 0xcp-152, false, - false, - 0xb.fffffffffffffffp-152, false, - 0xcp-152, false, - 0xb.fffffffffffffffp-152, false, - 0xcp-152, false, - false, - 0xb.fffffffffffffffffffffffffcp-152, false, - 0xcp-152, false, - 0xb.fffffffffffffffffffffffffcp-152, false, - 0xcp-152, false, - false, - 0xb.fffffffffffffffffffffffffff8p-152, false, - 0xcp-152, false, - 0xb.fffffffffffffffffffffffffff8p-152, false, - 0xcp-152, false), + 0x8p-152, false, true, + 0x8p-152, false, true, + 0x8p-152, false, true, + 0x1p-148, false, true, + false, + 0xb.ffffffffffff8p-152, false, false, + 0xcp-152, false, false, + 0xb.ffffffffffff8p-152, false, false, + 0xcp-152, false, false, + false, + 0xb.fffffffffffffffp-152, false, false, + 0xcp-152, false, false, + 0xb.fffffffffffffffp-152, false, false, + 0xcp-152, false, false, + false, + 0xb.fffffffffffffffp-152, false, false, + 0xcp-152, false, false, + 0xb.fffffffffffffffp-152, false, false, + 0xcp-152, false, false, + false, + 0xb.fffffffffffffffffffffffffcp-152, false, false, + 0xcp-152, false, false, + 0xb.fffffffffffffffffffffffffcp-152, false, false, + 0xcp-152, false, false, + false, + 0xb.fffffffffffffffffffffffffff8p-152, false, false, + 0xcp-152, false, false, + 0xb.fffffffffffffffffffffffffff8p-152, false, false, + 0xcp-152, false, false), TEST ("2.1019476964872256063855943749348741969203929128147736576356" "02425834686624028790902229957282543182373046875e-45", false, - 0x8p-152, false, - 0x1p-148, false, - 0x8p-152, false, - 0x1p-148, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false), + 0x8p-152, false, true, + 0x1p-148, false, true, + 0x8p-152, false, true, + 0x1p-148, false, true, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false), TEST ("2.1019476964872256063855943749348741969203929128147736576356" "0242583468662402879090222995728254318237304687501e-45", false, - 0x8p-152, false, - 0x1p-148, false, - 0x8p-152, false, - 0x1p-148, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.0000000000008p-152, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.000000000000001p-152, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.000000000000001p-152, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.00000000000000000000000004p-152, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.0000000000000000000000000008p-152, false), + 0x8p-152, false, true, + 0x1p-148, false, true, + 0x8p-152, false, true, + 0x1p-148, false, true, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.0000000000008p-152, false, false, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.000000000000001p-152, false, false, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.000000000000001p-152, false, false, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.00000000000000000000000004p-152, false, false, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.0000000000000000000000000008p-152, false, false), TEST ("-2.101947696487225606385594374934874196920392912814773657635" "60242583468662402879090222995728254318237304687499e-45", false, - -0x1p-148, false, - -0x8p-152, false, - -0x8p-152, false, - -0x8p-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.ffffffffffff8p-152, false, - -0xb.ffffffffffff8p-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.fffffffffffffffp-152, false, - -0xb.fffffffffffffffp-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.fffffffffffffffp-152, false, - -0xb.fffffffffffffffp-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.fffffffffffffffffffffffffcp-152, false, - -0xb.fffffffffffffffffffffffffcp-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.fffffffffffffffffffffffffff8p-152, false, - -0xb.fffffffffffffffffffffffffff8p-152, false), + -0x1p-148, false, true, + -0x8p-152, false, true, + -0x8p-152, false, true, + -0x8p-152, false, true, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.ffffffffffff8p-152, false, false, + -0xb.ffffffffffff8p-152, false, false, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.fffffffffffffffp-152, false, false, + -0xb.fffffffffffffffp-152, false, false, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.fffffffffffffffp-152, false, false, + -0xb.fffffffffffffffp-152, false, false, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.fffffffffffffffffffffffffcp-152, false, false, + -0xb.fffffffffffffffffffffffffcp-152, false, false, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.fffffffffffffffffffffffffff8p-152, false, false, + -0xb.fffffffffffffffffffffffffff8p-152, false, false), TEST ("-2.101947696487225606385594374934874196920392912814773657635" "602425834686624028790902229957282543182373046875e-45", false, - -0x1p-148, false, - -0x1p-148, false, - -0x8p-152, false, - -0x8p-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false), + -0x1p-148, false, true, + -0x1p-148, false, true, + -0x8p-152, false, true, + -0x8p-152, false, true, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false), TEST ("-2.101947696487225606385594374934874196920392912814773657635" "60242583468662402879090222995728254318237304687501e-45", false, - -0x1p-148, false, - -0x1p-148, false, - -0x8p-152, false, - -0x8p-152, false, - false, - -0xc.0000000000008p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - false, - -0xc.000000000000001p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - false, - -0xc.000000000000001p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - false, - -0xc.00000000000000000000000004p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - false, - -0xc.0000000000000000000000000008p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false), + -0x1p-148, false, true, + -0x1p-148, false, true, + -0x8p-152, false, true, + -0x8p-152, false, true, + false, + -0xc.0000000000008p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + false, + -0xc.000000000000001p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + false, + -0xc.000000000000001p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + false, + -0xc.00000000000000000000000004p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + false, + -0xc.0000000000000000000000000008p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false), TEST ("3.5032461608120426773093239582247903282006548546912894293926" "7070972447770671465150371659547090530395507812499e-45", false, - 0x1p-148, false, - 0x1p-148, false, - 0x1p-148, false, - 0x1.8p-148, false, - false, - 0x1.3ffffffffffffp-148, false, - 0x1.4p-148, false, - 0x1.3ffffffffffffp-148, false, - 0x1.4p-148, false, - false, - 0x1.3ffffffffffffffep-148, false, - 0x1.4p-148, false, - 0x1.3ffffffffffffffep-148, false, - 0x1.4p-148, false, - false, - 0x1.3ffffffffffffffep-148, false, - 0x1.4p-148, false, - 0x1.3ffffffffffffffep-148, false, - 0x1.4p-148, false, - false, - 0x1.3fffffffffffffffffffffffff8p-148, false, - 0x1.4p-148, false, - 0x1.3fffffffffffffffffffffffff8p-148, false, - 0x1.4p-148, false, - false, - 0x1.3fffffffffffffffffffffffffffp-148, false, - 0x1.4p-148, false, - 0x1.3fffffffffffffffffffffffffffp-148, false, - 0x1.4p-148, false), + 0x1p-148, false, true, + 0x1p-148, false, true, + 0x1p-148, false, true, + 0x1.8p-148, false, true, + false, + 0x1.3ffffffffffffp-148, false, false, + 0x1.4p-148, false, false, + 0x1.3ffffffffffffp-148, false, false, + 0x1.4p-148, false, false, + false, + 0x1.3ffffffffffffffep-148, false, false, + 0x1.4p-148, false, false, + 0x1.3ffffffffffffffep-148, false, false, + 0x1.4p-148, false, false, + false, + 0x1.3ffffffffffffffep-148, false, false, + 0x1.4p-148, false, false, + 0x1.3ffffffffffffffep-148, false, false, + 0x1.4p-148, false, false, + false, + 0x1.3fffffffffffffffffffffffff8p-148, false, false, + 0x1.4p-148, false, false, + 0x1.3fffffffffffffffffffffffff8p-148, false, false, + 0x1.4p-148, false, false, + false, + 0x1.3fffffffffffffffffffffffffffp-148, false, false, + 0x1.4p-148, false, false, + 0x1.3fffffffffffffffffffffffffffp-148, false, false, + 0x1.4p-148, false, false), TEST ("3.5032461608120426773093239582247903282006548546912894293926" "70709724477706714651503716595470905303955078125e-45", false, - 0x1p-148, false, - 0x1p-148, false, - 0x1p-148, false, - 0x1.8p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false), + 0x1p-148, false, true, + 0x1p-148, false, true, + 0x1p-148, false, true, + 0x1.8p-148, false, true, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false), TEST ("3.5032461608120426773093239582247903282006548546912894293926" "7070972447770671465150371659547090530395507812501e-45", false, - 0x1p-148, false, - 0x1.8p-148, false, - 0x1p-148, false, - 0x1.8p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4000000000001p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4000000000000002p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4000000000000002p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.400000000000000000000000008p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4000000000000000000000000001p-148, false), + 0x1p-148, false, true, + 0x1.8p-148, false, true, + 0x1p-148, false, true, + 0x1.8p-148, false, true, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4000000000001p-148, false, false, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4000000000000002p-148, false, false, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4000000000000002p-148, false, false, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.400000000000000000000000008p-148, false, false, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4000000000000000000000000001p-148, false, false), TEST ("-3.503246160812042677309323958224790328200654854691289429392" "67070972447770671465150371659547090530395507812499e-45", false, - -0x1.8p-148, false, - -0x1p-148, false, - -0x1p-148, false, - -0x1p-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3ffffffffffffp-148, false, - -0x1.3ffffffffffffp-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3ffffffffffffffep-148, false, - -0x1.3ffffffffffffffep-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3ffffffffffffffep-148, false, - -0x1.3ffffffffffffffep-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3fffffffffffffffffffffffff8p-148, false, - -0x1.3fffffffffffffffffffffffff8p-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3fffffffffffffffffffffffffffp-148, false, - -0x1.3fffffffffffffffffffffffffffp-148, false), + -0x1.8p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3ffffffffffffp-148, false, false, + -0x1.3ffffffffffffp-148, false, false, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3ffffffffffffffep-148, false, false, + -0x1.3ffffffffffffffep-148, false, false, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3ffffffffffffffep-148, false, false, + -0x1.3ffffffffffffffep-148, false, false, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3fffffffffffffffffffffffff8p-148, false, false, + -0x1.3fffffffffffffffffffffffff8p-148, false, false, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3fffffffffffffffffffffffffffp-148, false, false, + -0x1.3fffffffffffffffffffffffffffp-148, false, false), TEST ("-3.503246160812042677309323958224790328200654854691289429392" "670709724477706714651503716595470905303955078125e-45", false, - -0x1.8p-148, false, - -0x1p-148, false, - -0x1p-148, false, - -0x1p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false), + -0x1.8p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false), TEST ("-3.503246160812042677309323958224790328200654854691289429392" "67070972447770671465150371659547090530395507812501e-45", false, - -0x1.8p-148, false, - -0x1.8p-148, false, - -0x1p-148, false, - -0x1p-148, false, - false, - -0x1.4000000000001p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - false, - -0x1.4000000000000002p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - false, - -0x1.4000000000000002p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - false, - -0x1.400000000000000000000000008p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - false, - -0x1.4000000000000000000000000001p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false), + -0x1.8p-148, false, true, + -0x1.8p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + false, + -0x1.4000000000001p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + false, + -0x1.4000000000000002p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + false, + -0x1.4000000000000002p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + false, + -0x1.400000000000000000000000008p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + false, + -0x1.4000000000000000000000000001p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false), TEST ("7.4109846876186981626485318930233205854758970392148714663837" "852375101326090531312779794975454245398856969484704316857659" "638998506553390969459816219401617281718945106978546710679176" @@ -3818,35 +3818,35 @@ static const struct test tests[] = { "337560846003984904972149117463085539556354188641513168478436" "31308023759629577398300170898437499e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-1076, false, - 0x4p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - false, - 0x5.fffffffffffffff8p-1076, false, - 0x6p-1076, false, - 0x5.fffffffffffffff8p-1076, false, - 0x6p-1076, false, - false, - 0x5.fffffffffffffff8p-1076, false, - 0x6p-1076, false, - 0x5.fffffffffffffff8p-1076, false, - 0x6p-1076, false, - false, - 0x4p-1076, false, - 0x4p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - false, - 0x5.fffffffffffffffffffffffffffcp-1076, false, - 0x6p-1076, false, - 0x5.fffffffffffffffffffffffffffcp-1076, false, - 0x6p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-1076, false, true, + 0x4p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + false, + 0x5.fffffffffffffff8p-1076, false, false, + 0x6p-1076, false, false, + 0x5.fffffffffffffff8p-1076, false, false, + 0x6p-1076, false, false, + false, + 0x5.fffffffffffffff8p-1076, false, false, + 0x6p-1076, false, false, + 0x5.fffffffffffffff8p-1076, false, false, + 0x6p-1076, false, false, + false, + 0x4p-1076, false, true, + 0x4p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + false, + 0x5.fffffffffffffffffffffffffffcp-1076, false, false, + 0x6p-1076, false, false, + 0x5.fffffffffffffffffffffffffffcp-1076, false, false, + 0x6p-1076, false, false), TEST ("7.4109846876186981626485318930233205854758970392148714663837" "852375101326090531312779794975454245398856969484704316857659" "638998506553390969459816219401617281718945106978546710679176" @@ -3861,35 +3861,35 @@ static const struct test tests[] = { "337560846003984904972149117463085539556354188641513168478436" "313080237596295773983001708984375e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-1076, false, - 0x8p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - true, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - true, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - false, - 0x4p-1076, false, - 0x8p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - true, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + true, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + true, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + false, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + true, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false), TEST ("7.4109846876186981626485318930233205854758970392148714663837" "852375101326090531312779794975454245398856969484704316857659" "638998506553390969459816219401617281718945106978546710679176" @@ -3904,35 +3904,35 @@ static const struct test tests[] = { "337560846003984904972149117463085539556354188641513168478436" "31308023759629577398300170898437501e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-1076, false, - 0x8p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6.0000000000000008p-1076, false, - false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6.0000000000000008p-1076, false, - false, - 0x4p-1076, false, - 0x8p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6.0000000000000000000000000004p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6.0000000000000008p-1076, false, false, + false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6.0000000000000008p-1076, false, false, + false, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6.0000000000000000000000000004p-1076, false, false), TEST ("-7.410984687618698162648531893023320585475897039214871466383" "785237510132609053131277979497545424539885696948470431685765" "963899850655339096945981621940161728171894510697854671067917" @@ -3947,35 +3947,35 @@ static const struct test tests[] = { "433756084600398490497214911746308553955635418864151316847843" "631308023759629577398300170898437499e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x5.fffffffffffffff8p-1076, false, - -0x5.fffffffffffffff8p-1076, false, - false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x5.fffffffffffffff8p-1076, false, - -0x5.fffffffffffffff8p-1076, false, - false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x5.fffffffffffffffffffffffffffcp-1076, false, - -0x5.fffffffffffffffffffffffffffcp-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x5.fffffffffffffff8p-1076, false, false, + -0x5.fffffffffffffff8p-1076, false, false, + false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x5.fffffffffffffff8p-1076, false, false, + -0x5.fffffffffffffff8p-1076, false, false, + false, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x5.fffffffffffffffffffffffffffcp-1076, false, false, + -0x5.fffffffffffffffffffffffffffcp-1076, false, false), TEST ("-7.410984687618698162648531893023320585475897039214871466383" "785237510132609053131277979497545424539885696948470431685765" "963899850655339096945981621940161728171894510697854671067917" @@ -3990,35 +3990,35 @@ static const struct test tests[] = { "433756084600398490497214911746308553955635418864151316847843" "6313080237596295773983001708984375e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-1076, false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - true, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - true, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - false, - -0x8p-1076, false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - true, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-1076, false, true, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + true, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + true, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + false, + -0x8p-1076, false, true, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + true, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false), TEST ("-7.410984687618698162648531893023320585475897039214871466383" "785237510132609053131277979497545424539885696948470431685765" "963899850655339096945981621940161728171894510697854671067917" @@ -4033,35 +4033,35 @@ static const struct test tests[] = { "433756084600398490497214911746308553955635418864151316847843" "631308023759629577398300170898437501e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-1076, false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - false, - -0x6.0000000000000008p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - false, - -0x6.0000000000000008p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - false, - -0x8p-1076, false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - false, - -0x6.0000000000000000000000000004p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-1076, false, true, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + false, + -0x6.0000000000000008p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + false, + -0x6.0000000000000008p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + false, + -0x8p-1076, false, true, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + false, + -0x6.0000000000000000000000000004p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -4255,35 +4255,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "683343374046671669930219650268554687499e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x8p-16448, false, - 0x8p-16448, false, - 0x1p-16444, false, - false, - 0x8p-16448, false, - 0xcp-16448, false, - 0x8p-16448, false, - 0xcp-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0xb.fffffffffffcp-16448, false, - 0xcp-16448, false, - 0xb.fffffffffffcp-16448, false, - 0xcp-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x8p-16448, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + false, + 0x8p-16448, false, true, + 0xcp-16448, false, true, + 0x8p-16448, false, true, + 0xcp-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xb.fffffffffffcp-16448, false, true, + 0xcp-16448, false, true, + 0xb.fffffffffffcp-16448, false, true, + 0xcp-16448, false, true), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -4477,35 +4477,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "6833433740466716699302196502685546875e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x1p-16444, false, - 0x8p-16448, false, - 0x1p-16444, false, - true, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + true, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -4699,35 +4699,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "683343374046671669930219650268554687501e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x1p-16444, false, - 0x8p-16448, false, - 0x1p-16444, false, - false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0x1p-16444, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xc.000000000004p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + false, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0x1p-16444, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xc.000000000004p-16448, false, true), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -4921,35 +4921,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "2683343374046671669930219650268554687499e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0xcp-16448, false, - -0xcp-16448, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xb.fffffffffffcp-16448, false, - -0xb.fffffffffffcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xb.fffffffffffcp-16448, false, true, + -0xb.fffffffffffcp-16448, false, true), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -5143,35 +5143,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "26833433740466716699302196502685546875e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - true, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + true, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -5365,35 +5365,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "2683343374046671669930219650268554687501e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0x1p-16444, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0xc.000000000004p-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0x1p-16444, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0xc.000000000004p-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -5587,35 +5587,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "683343374046671669930219650268554687499e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x8p-16448, false, - 0x8p-16448, false, - 0x1p-16444, false, - false, - 0x8p-16448, false, - 0xcp-16448, false, - 0x8p-16448, false, - 0xcp-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0xb.fffffffffffcp-16448, false, - 0xcp-16448, false, - 0xb.fffffffffffcp-16448, false, - 0xcp-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x8p-16448, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + false, + 0x8p-16448, false, true, + 0xcp-16448, false, true, + 0x8p-16448, false, true, + 0xcp-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xb.fffffffffffcp-16448, false, true, + 0xcp-16448, false, true, + 0xb.fffffffffffcp-16448, false, true, + 0xcp-16448, false, true), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -5809,35 +5809,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "6833433740466716699302196502685546875e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x1p-16444, false, - 0x8p-16448, false, - 0x1p-16444, false, - true, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + true, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -6031,35 +6031,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "683343374046671669930219650268554687501e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x1p-16444, false, - 0x8p-16448, false, - 0x1p-16444, false, - false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0x1p-16444, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xc.000000000004p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + false, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0x1p-16444, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xc.000000000004p-16448, false, true), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -6253,35 +6253,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "2683343374046671669930219650268554687499e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0xcp-16448, false, - -0xcp-16448, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xb.fffffffffffcp-16448, false, - -0xb.fffffffffffcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xb.fffffffffffcp-16448, false, true, + -0xb.fffffffffffcp-16448, false, true), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -6475,35 +6475,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "26833433740466716699302196502685546875e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - true, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + true, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -6697,630 +6697,630 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "2683343374046671669930219650268554687501e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0x1p-16444, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0xc.000000000004p-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0x1p-16444, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0xc.000000000004p-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true), TEST ("-0x0.7p-149", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false), TEST ("-0x0.7p-1074", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - true, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + true, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false), TEST ("-0x0.7p-16445", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x3.8p-16448, false, - -0x3.8p-16448, false, - -0x3.8p-16448, false, - -0x3.8p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x3.8p-16448, false, false, + -0x3.8p-16448, false, false, + -0x3.8p-16448, false, false, + -0x3.8p-16448, false, false), TEST ("-0x0.7p-16494", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST ("0x1p-150", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false), TEST ("0x1p-1075", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false), TEST ("0x1p-16446", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - true, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + true, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false), TEST ("0x1p-16495", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16496, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16496, false, true), TEST ("-0x1p-150", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false), TEST ("-0x1p-1075", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false), TEST ("-0x1p-16446", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false), TEST ("-0x1p-16495", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST (".70064923216240853546186479164495807e-45", false, - 0x0p+0, false, - 0x8p-152, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000004p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000008p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000008p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.00000000000000000000000002p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000000000000000004p-152, false), + 0x0p+0, false, true, + 0x8p-152, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000004p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000008p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000008p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.00000000000000000000000002p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000000000000000004p-152, false, false), TEST ("7.0064923216240853546186479164495806564013097093825788587853" "4141944895541342930300743319094181060791015624e-46", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x3.ffffffffffffep-152, false, - 0x4p-152, false, - 0x3.ffffffffffffep-152, false, - 0x4p-152, false, - false, - 0x3.fffffffffffffffcp-152, false, - 0x4p-152, false, - 0x3.fffffffffffffffcp-152, false, - 0x4p-152, false, - false, - 0x3.fffffffffffffffcp-152, false, - 0x4p-152, false, - 0x3.fffffffffffffffcp-152, false, - 0x4p-152, false, - false, - 0x3.ffffffffffffffffffffffffffp-152, false, - 0x4p-152, false, - 0x3.ffffffffffffffffffffffffffp-152, false, - 0x4p-152, false, - false, - 0x3.fffffffffffffffffffffffffffep-152, false, - 0x4p-152, false, - 0x3.fffffffffffffffffffffffffffep-152, false, - 0x4p-152, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x3.ffffffffffffep-152, false, false, + 0x4p-152, false, false, + 0x3.ffffffffffffep-152, false, false, + 0x4p-152, false, false, + false, + 0x3.fffffffffffffffcp-152, false, false, + 0x4p-152, false, false, + 0x3.fffffffffffffffcp-152, false, false, + 0x4p-152, false, false, + false, + 0x3.fffffffffffffffcp-152, false, false, + 0x4p-152, false, false, + 0x3.fffffffffffffffcp-152, false, false, + 0x4p-152, false, false, + false, + 0x3.ffffffffffffffffffffffffffp-152, false, false, + 0x4p-152, false, false, + 0x3.ffffffffffffffffffffffffffp-152, false, false, + 0x4p-152, false, false, + false, + 0x3.fffffffffffffffffffffffffffep-152, false, false, + 0x4p-152, false, false, + 0x3.fffffffffffffffffffffffffffep-152, false, false, + 0x4p-152, false, false), TEST ("7.0064923216240853546186479164495806564013097093825788587853" "4141944895541342930300743319094181060791015625e-46", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false), TEST ("7.0064923216240853546186479164495806564013097093825788587853" "4141944895541342930300743319094181060791015626e-46", false, - 0x0p+0, false, - 0x8p-152, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000004p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000008p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000008p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.00000000000000000000000002p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000000000000000004p-152, false), + 0x0p+0, false, true, + 0x8p-152, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000004p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000008p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000008p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.00000000000000000000000002p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000000000000000004p-152, false, false), TEST ("-7.006492321624085354618647916449580656401309709382578858785" "34141944895541342930300743319094181060791015624e-46", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.ffffffffffffep-152, false, - -0x3.ffffffffffffep-152, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.fffffffffffffffcp-152, false, - -0x3.fffffffffffffffcp-152, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.fffffffffffffffcp-152, false, - -0x3.fffffffffffffffcp-152, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.ffffffffffffffffffffffffffp-152, false, - -0x3.ffffffffffffffffffffffffffp-152, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.fffffffffffffffffffffffffffep-152, false, - -0x3.fffffffffffffffffffffffffffep-152, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.ffffffffffffep-152, false, false, + -0x3.ffffffffffffep-152, false, false, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.fffffffffffffffcp-152, false, false, + -0x3.fffffffffffffffcp-152, false, false, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.fffffffffffffffcp-152, false, false, + -0x3.fffffffffffffffcp-152, false, false, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.ffffffffffffffffffffffffffp-152, false, false, + -0x3.ffffffffffffffffffffffffffp-152, false, false, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.fffffffffffffffffffffffffffep-152, false, false, + -0x3.fffffffffffffffffffffffffffep-152, false, false), TEST ("-7.006492321624085354618647916449580656401309709382578858785" "34141944895541342930300743319094181060791015625e-46", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false), TEST ("-7.006492321624085354618647916449580656401309709382578858785" "34141944895541342930300743319094181060791015626e-46", false, - -0x8p-152, false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4.0000000000004p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - false, - -0x4.0000000000000008p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - false, - -0x4.0000000000000008p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - false, - -0x4.00000000000000000000000002p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - false, - -0x4.0000000000000000000000000004p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false), + -0x8p-152, false, true, + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4.0000000000004p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + false, + -0x4.0000000000000008p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + false, + -0x4.0000000000000008p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + false, + -0x4.00000000000000000000000002p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + false, + -0x4.0000000000000000000000000004p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false), TEST ("2.4703282292062327208828439643411068618252990130716238221279" "284125033775363510437593264991818081799618989828234772285886" "546332835517796989819938739800539093906315035659515570226392" @@ -7335,35 +7335,35 @@ static const struct test tests[] = { "779186948667994968324049705821028513185451396213837722826145" "437693412532098591327667236328124e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x1.fffffffffffffffep-1076, false, - 0x2p-1076, false, - 0x1.fffffffffffffffep-1076, false, - 0x2p-1076, false, - false, - 0x1.fffffffffffffffep-1076, false, - 0x2p-1076, false, - 0x1.fffffffffffffffep-1076, false, - 0x2p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x1.ffffffffffffffffffffffffffffp-1076, false, - 0x2p-1076, false, - 0x1.ffffffffffffffffffffffffffffp-1076, false, - 0x2p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffffffep-1076, false, false, + 0x2p-1076, false, false, + 0x1.fffffffffffffffep-1076, false, false, + 0x2p-1076, false, false, + false, + 0x1.fffffffffffffffep-1076, false, false, + 0x2p-1076, false, false, + 0x1.fffffffffffffffep-1076, false, false, + 0x2p-1076, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.ffffffffffffffffffffffffffffp-1076, false, false, + 0x2p-1076, false, false, + 0x1.ffffffffffffffffffffffffffffp-1076, false, false, + 0x2p-1076, false, false), TEST ("2.4703282292062327208828439643411068618252990130716238221279" "284125033775363510437593264991818081799618989828234772285886" "546332835517796989819938739800539093906315035659515570226392" @@ -7378,35 +7378,35 @@ static const struct test tests[] = { "779186948667994968324049705821028513185451396213837722826145" "437693412532098591327667236328125e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false), TEST ("2.4703282292062327208828439643411068618252990130716238221279" "284125033775363510437593264991818081799618989828234772285886" "546332835517796989819938739800539093906315035659515570226392" @@ -7421,35 +7421,35 @@ static const struct test tests[] = { "779186948667994968324049705821028513185451396213837722826145" "437693412532098591327667236328126e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x4p-1076, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2.0000000000000004p-1076, false, - false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2.0000000000000004p-1076, false, - false, - 0x0p+0, false, - 0x4p-1076, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2.0000000000000000000000000002p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x4p-1076, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2.0000000000000004p-1076, false, false, + false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2.0000000000000004p-1076, false, false, + false, + 0x0p+0, false, true, + 0x4p-1076, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2.0000000000000000000000000002p-1076, false, false), TEST ("-2.470328229206232720882843964341106861825299013071623822127" "928412503377536351043759326499181808179961898982823477228588" "654633283551779698981993873980053909390631503565951557022639" @@ -7464,35 +7464,35 @@ static const struct test tests[] = { "477918694866799496832404970582102851318545139621383772282614" "5437693412532098591327667236328124e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x1.fffffffffffffffep-1076, false, - -0x1.fffffffffffffffep-1076, false, - false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x1.fffffffffffffffep-1076, false, - -0x1.fffffffffffffffep-1076, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x1.ffffffffffffffffffffffffffffp-1076, false, - -0x1.ffffffffffffffffffffffffffffp-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x1.fffffffffffffffep-1076, false, false, + -0x1.fffffffffffffffep-1076, false, false, + false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x1.fffffffffffffffep-1076, false, false, + -0x1.fffffffffffffffep-1076, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x1.ffffffffffffffffffffffffffffp-1076, false, false, + -0x1.ffffffffffffffffffffffffffffp-1076, false, false), TEST ("-2.470328229206232720882843964341106861825299013071623822127" "928412503377536351043759326499181808179961898982823477228588" "654633283551779698981993873980053909390631503565951557022639" @@ -7507,35 +7507,35 @@ static const struct test tests[] = { "477918694866799496832404970582102851318545139621383772282614" "5437693412532098591327667236328125e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false), TEST ("-2.470328229206232720882843964341106861825299013071623822127" "928412503377536351043759326499181808179961898982823477228588" "654633283551779698981993873980053909390631503565951557022639" @@ -7550,35 +7550,35 @@ static const struct test tests[] = { "477918694866799496832404970582102851318545139621383772282614" "5437693412532098591327667236328126e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2.0000000000000004p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - false, - -0x2.0000000000000004p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - false, - -0x4p-1076, false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2.0000000000000000000000000002p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2.0000000000000004p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + false, + -0x2.0000000000000004p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + false, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2.0000000000000000000000000002p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false), TEST ("1.8225997659412373012642029668097099081995254078467816718604" "902435141858443166988684047803543129136025986236736736017655" "509834928163110160849867540377949045027419112905889658392846" @@ -7772,35 +7772,35 @@ static const struct test tests[] = { "622089641993900896756734276531931450266972752637997248151974" "2277811246822238899767398834228515624e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x4p-16448, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x3.fffffffffffcp-16448, false, - 0x4p-16448, false, - 0x3.fffffffffffcp-16448, false, - 0x4p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x4p-16448, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x3.fffffffffffcp-16448, false, true, + 0x4p-16448, false, true, + 0x3.fffffffffffcp-16448, false, true, + 0x4p-16448, false, true), TEST ("1.8225997659412373012642029668097099081995254078467816718604" "902435141858443166988684047803543129136025986236736736017655" "509834928163110160849867540377949045027419112905889658392846" @@ -7994,35 +7994,35 @@ static const struct test tests[] = { "622089641993900896756734276531931450266972752637997248151974" "2277811246822238899767398834228515625e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - true, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + true, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false), TEST ("1.8225997659412373012642029668097099081995254078467816718604" "902435141858443166988684047803543129136025986236736736017655" "509834928163110160849867540377949045027419112905889658392846" @@ -8216,35 +8216,35 @@ static const struct test tests[] = { "622089641993900896756734276531931450266972752637997248151974" "2277811246822238899767398834228515626e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x8p-16448, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4.000000000004p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x8p-16448, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x4p-16448, false, true, + 0x4p-16448, false, true, + 0x4p-16448, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x4p-16448, false, true, + 0x4p-16448, false, true, + 0x4p-16448, false, true, + 0x4.000000000004p-16448, false, true), TEST ("-1.822599765941237301264202966809709908199525407846781671860" "490243514185844316698868404780354312913602598623673673601765" "550983492816311016084986754037794904502741911290588965839284" @@ -8438,35 +8438,35 @@ static const struct test tests[] = { "462208964199390089675673427653193145026697275263799724815197" "42277811246822238899767398834228515624e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x3.fffffffffffcp-16448, false, - -0x3.fffffffffffcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x3.fffffffffffcp-16448, false, true, + -0x3.fffffffffffcp-16448, false, true), TEST ("-1.822599765941237301264202966809709908199525407846781671860" "490243514185844316698868404780354312913602598623673673601765" "550983492816311016084986754037794904502741911290588965839284" @@ -8660,35 +8660,35 @@ static const struct test tests[] = { "462208964199390089675673427653193145026697275263799724815197" "42277811246822238899767398834228515625e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false), TEST ("-1.822599765941237301264202966809709908199525407846781671860" "490243514185844316698868404780354312913602598623673673601765" "550983492816311016084986754037794904502741911290588965839284" @@ -8882,35 +8882,35 @@ static const struct test tests[] = { "462208964199390089675673427653193145026697275263799724815197" "42277811246822238899767398834228515626e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4.000000000004p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4.000000000004p-16448, false, true, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x4p-16448, false, true), TEST ("9.1129988297061865063210148340485495409976270392339083593024" "512175709292215834943420239017715645680129931183683680088277" "549174640815550804249337701889745225137095564529448291964230" @@ -9104,35 +9104,35 @@ static const struct test tests[] = { "110448209969504483783671382659657251334863763189986240759871" "1389056234111194498836994171142578124e-4952", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x1.fffffffffffcp-16448, false, - 0x2p-16448, false, - 0x1.fffffffffffcp-16448, false, - 0x2p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffcp-16448, false, true, + 0x2p-16448, false, true, + 0x1.fffffffffffcp-16448, false, true, + 0x2p-16448, false, true), TEST ("9.1129988297061865063210148340485495409976270392339083593024" "512175709292215834943420239017715645680129931183683680088277" "549174640815550804249337701889745225137095564529448291964230" @@ -9326,35 +9326,35 @@ static const struct test tests[] = { "110448209969504483783671382659657251334863763189986240759871" "1389056234111194498836994171142578125e-4952", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-16448, false, - 0x2p-16448, false, - 0x2p-16448, false, - 0x2p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-16448, false, false, + 0x2p-16448, false, false, + 0x2p-16448, false, false, + 0x2p-16448, false, false), TEST ("9.1129988297061865063210148340485495409976270392339083593024" "512175709292215834943420239017715645680129931183683680088277" "549174640815550804249337701889745225137095564529448291964230" @@ -9548,35 +9548,35 @@ static const struct test tests[] = { "110448209969504483783671382659657251334863763189986240759871" "1389056234111194498836994171142578126e-4952", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x4p-16448, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x2p-16448, false, - 0x2p-16448, false, - 0x2p-16448, false, - 0x2.000000000004p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x4p-16448, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x2p-16448, false, true, + 0x2p-16448, false, true, + 0x2p-16448, false, true, + 0x2.000000000004p-16448, false, true), TEST ("-9.112998829706186506321014834048549540997627039233908359302" "451217570929221583494342023901771564568012993118368368008827" "754917464081555080424933770188974522513709556452944829196423" @@ -9770,35 +9770,35 @@ static const struct test tests[] = { "311044820996950448378367138265965725133486376318998624075987" "11389056234111194498836994171142578124e-4952", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2p-16448, false, - -0x2p-16448, false, - -0x1.fffffffffffcp-16448, false, - -0x1.fffffffffffcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2p-16448, false, true, + -0x2p-16448, false, true, + -0x1.fffffffffffcp-16448, false, true, + -0x1.fffffffffffcp-16448, false, true), TEST ("-9.112998829706186506321014834048549540997627039233908359302" "451217570929221583494342023901771564568012993118368368008827" "754917464081555080424933770188974522513709556452944829196423" @@ -9992,35 +9992,35 @@ static const struct test tests[] = { "311044820996950448378367138265965725133486376318998624075987" "11389056234111194498836994171142578125e-4952", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-16448, false, - -0x2p-16448, false, - -0x2p-16448, false, - -0x2p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-16448, false, false, + -0x2p-16448, false, false, + -0x2p-16448, false, false, + -0x2p-16448, false, false), TEST ("-9.112998829706186506321014834048549540997627039233908359302" "451217570929221583494342023901771564568012993118368368008827" "754917464081555080424933770188974522513709556452944829196423" @@ -10214,35 +10214,35 @@ static const struct test tests[] = { "311044820996950448378367138265965725133486376318998624075987" "11389056234111194498836994171142578126e-4952", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2.000000000004p-16448, false, - -0x2p-16448, false, - -0x2p-16448, false, - -0x2p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2.000000000004p-16448, false, true, + -0x2p-16448, false, true, + -0x2p-16448, false, true, + -0x2p-16448, false, true), TEST ("3.2375875597190125554622194791138232762497846690173405048449" "421945985197700620596855088357456383249701279390707384240598" "382936099431912710233425550359863089915213963553756674672083" @@ -10437,35 +10437,35 @@ static const struct test tests[] = { "182358152808745703724362178773168996492870519432472065091133" "11767578124e-4966", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16496, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16496, false, true), TEST ("3.2375875597190125554622194791138232762497846690173405048449" "421945985197700620596855088357456383249701279390707384240598" "382936099431912710233425550359863089915213963553756674672083" @@ -10660,35 +10660,35 @@ static const struct test tests[] = { "182358152808745703724362178773168996492870519432472065091133" "11767578125e-4966", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16496, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16496, false, true), TEST ("3.2375875597190125554622194791138232762497846690173405048449" "421945985197700620596855088357456383249701279390707384240598" "382936099431912710233425550359863089915213963553756674672083" @@ -10883,35 +10883,35 @@ static const struct test tests[] = { "182358152808745703724362178773168996492870519432472065091133" "11767578126e-4966", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x4p-16496, false, - 0x0p+0, false, - 0x4p-16496, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x4p-16496, false, true, + 0x0p+0, false, true, + 0x4p-16496, false, true), TEST ("-3.237587559719012555462219479113823276249784669017340504844" "942194598519770062059685508835745638324970127939070738424059" "838293609943191271023342555035986308991521396355375667467208" @@ -11106,35 +11106,35 @@ static const struct test tests[] = { "218235815280874570372436217877316899649287051943247206509113" "311767578124e-4966", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST ("-3.237587559719012555462219479113823276249784669017340504844" "942194598519770062059685508835745638324970127939070738424059" "838293609943191271023342555035986308991521396355375667467208" @@ -11329,35 +11329,35 @@ static const struct test tests[] = { "218235815280874570372436217877316899649287051943247206509113" "311767578125e-4966", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST ("-3.237587559719012555462219479113823276249784669017340504844" "942194598519770062059685508835745638324970127939070738424059" "838293609943191271023342555035986308991521396355375667467208" @@ -11552,66 +11552,66 @@ static const struct test tests[] = { "218235815280874570372436217877316899649287051943247206509113" "311767578126e-4966", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST ("340282366920938463463374607431768211455", false, - 0xf.fffffp+124, false, - INF, true, - 0xf.fffffp+124, false, - INF, true, - false, - 0xf.ffffffffffff8p+124, false, - 0x1p+128, false, - 0xf.ffffffffffff8p+124, false, - 0x1p+128, false, - false, - 0xf.fffffffffffffffp+124, false, - 0x1p+128, false, - 0xf.fffffffffffffffp+124, false, - 0x1p+128, false, - false, - 0xf.fffffffffffffffp+124, false, - 0x1p+128, false, - 0xf.fffffffffffffffp+124, false, - 0x1p+128, false, - false, - 0xf.fffffffffffffffffffffffffcp+124, false, - 0x1p+128, false, - 0xf.fffffffffffffffffffffffffcp+124, false, - 0x1p+128, false, - false, - 0xf.fffffffffffffffffffffffffff8p+124, false, - 0x1p+128, false, - 0xf.fffffffffffffffffffffffffff8p+124, false, - 0x1p+128, false), + 0xf.fffffp+124, false, false, + INF, true, false, + 0xf.fffffp+124, false, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+124, false, false, + 0x1p+128, false, false, + 0xf.ffffffffffff8p+124, false, false, + 0x1p+128, false, false, + false, + 0xf.fffffffffffffffp+124, false, false, + 0x1p+128, false, false, + 0xf.fffffffffffffffp+124, false, false, + 0x1p+128, false, false, + false, + 0xf.fffffffffffffffp+124, false, false, + 0x1p+128, false, false, + 0xf.fffffffffffffffp+124, false, false, + 0x1p+128, false, false, + false, + 0xf.fffffffffffffffffffffffffcp+124, false, false, + 0x1p+128, false, false, + 0xf.fffffffffffffffffffffffffcp+124, false, false, + 0x1p+128, false, false, + false, + 0xf.fffffffffffffffffffffffffff8p+124, false, false, + 0x1p+128, false, false, + 0xf.fffffffffffffffffffffffffff8p+124, false, false, + 0x1p+128, false, false), TEST ("179769313486231590772930519078902473361797697894230657273430" "081157732675805500963132708477322407536021120113879871393357" "658789768814416622492847430639474124377767893424865485276302" @@ -11619,35 +11619,35 @@ static const struct test tests[] = { "540827237163350510684586298239947245938479716304835356329624" "224137215", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, false, - INF, true, - 0xf.ffffffffffff8p+1020, false, - INF, true, - false, - 0xf.fffffffffffffffp+1020, false, - 0x1p+1024, false, - 0xf.fffffffffffffffp+1020, false, - 0x1p+1024, false, - false, - 0xf.fffffffffffffffp+1020, false, - 0x1p+1024, false, - 0xf.fffffffffffffffp+1020, false, - 0x1p+1024, false, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+1020, false, - 0x1p+1024, false, - 0xf.fffffffffffffffffffffffffff8p+1020, false, - 0x1p+1024, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+1020, false, false, + 0x1p+1024, false, false, + 0xf.fffffffffffffffp+1020, false, false, + 0x1p+1024, false, false, + false, + 0xf.fffffffffffffffp+1020, false, false, + 0x1p+1024, false, false, + 0xf.fffffffffffffffp+1020, false, false, + 0x1p+1024, false, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+1020, false, false, + 0x1p+1024, false, false, + 0xf.fffffffffffffffffffffffffff8p+1020, false, false, + 0x1p+1024, false, false), TEST ("118973149535723176508575932662800713076344468709651023747267" "482123326135818048368690448859547261203991511543748483930925" "889766738130868742627452469834156500608087163436600489752214" @@ -11732,66 +11732,66 @@ static const struct test tests[] = { "047398248889922809181821393428829567971736994315246044702729" "0669964066815", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false), TEST ("-340282366920938463463374607431768211455", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.ffffffffffff8p+124, false, - -0xf.ffffffffffff8p+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.fffffffffffffffp+124, false, - -0xf.fffffffffffffffp+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.fffffffffffffffp+124, false, - -0xf.fffffffffffffffp+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.fffffffffffffffffffffffffcp+124, false, - -0xf.fffffffffffffffffffffffffcp+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.fffffffffffffffffffffffffff8p+124, false, - -0xf.fffffffffffffffffffffffffff8p+124, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.ffffffffffff8p+124, false, false, + -0xf.ffffffffffff8p+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.fffffffffffffffp+124, false, false, + -0xf.fffffffffffffffp+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.fffffffffffffffp+124, false, false, + -0xf.fffffffffffffffp+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.fffffffffffffffffffffffffcp+124, false, false, + -0xf.fffffffffffffffffffffffffcp+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.fffffffffffffffffffffffffff8p+124, false, false, + -0xf.fffffffffffffffffffffffffff8p+124, false, false), TEST ("-17976931348623159077293051907890247336179769789423065727343" "008115773267580550096313270847732240753602112011387987139335" "765878976881441662249284743063947412437776789342486548527630" @@ -11799,35 +11799,35 @@ static const struct test tests[] = { "054082723716335051068458629823994724593847971630483535632962" "4224137215", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - false, - -0x1p+1024, false, - -0x1p+1024, false, - -0xf.fffffffffffffffp+1020, false, - -0xf.fffffffffffffffp+1020, false, - false, - -0x1p+1024, false, - -0x1p+1024, false, - -0xf.fffffffffffffffp+1020, false, - -0xf.fffffffffffffffp+1020, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -0x1p+1024, false, - -0x1p+1024, false, - -0xf.fffffffffffffffffffffffffff8p+1020, false, - -0xf.fffffffffffffffffffffffffff8p+1020, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + false, + -0x1p+1024, false, false, + -0x1p+1024, false, false, + -0xf.fffffffffffffffp+1020, false, false, + -0xf.fffffffffffffffp+1020, false, false, + false, + -0x1p+1024, false, false, + -0x1p+1024, false, false, + -0xf.fffffffffffffffp+1020, false, false, + -0xf.fffffffffffffffp+1020, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -0x1p+1024, false, false, + -0x1p+1024, false, false, + -0xf.fffffffffffffffffffffffffff8p+1020, false, false, + -0xf.fffffffffffffffffffffffffff8p+1020, false, false), TEST ("-11897314953572317650857593266280071307634446870965102374726" "748212332613581804836869044885954726120399151154374848393092" "588976673813086874262745246983415650060808716343660048975221" @@ -11912,3529 +11912,3529 @@ static const struct test tests[] = { "904739824888992280918182139342882956797173699431524604470272" "90669964066815", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false), TEST ("+0x.80000000000000000000000000000001p1025", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1.0000000000000002p+1024, false, - false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1.0000000000000002p+1024, false, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1.0000000000000000000000000001p+1024, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1.0000000000000002p+1024, false, false, + false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1.0000000000000002p+1024, false, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1.0000000000000000000000000001p+1024, false, false), TEST ("1.5", true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false), + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false), TEST ("1.25", true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false), + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false), TEST ("1.125", true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false), + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false), TEST ("1.0625", true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false), + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false), TEST ("1.03125", true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false), + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false), TEST ("1.015625", true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false), + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false), TEST ("1.0078125", true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false), + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false), TEST ("1.00390625", true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false), + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false), TEST ("1.001953125", true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false), + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false), TEST ("1.0009765625", true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false), + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false), TEST ("1.00048828125", true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false), + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false), TEST ("1.000244140625", true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false), + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false), TEST ("1.0001220703125", true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false), + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false), TEST ("1.00006103515625", true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false), + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false), TEST ("1.000030517578125", true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false), + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false), TEST ("1.0000152587890625", true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false), + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false), TEST ("1.00000762939453125", true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false), + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false), TEST ("1.000003814697265625", true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false), + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false), TEST ("1.0000019073486328125", true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false), + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false), TEST ("1.00000095367431640625", true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false), + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false), TEST ("1.000000476837158203125", true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false), + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false), TEST ("1.0000000298023223876953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false), TEST ("1.00000001490116119384765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false), TEST ("1.000000007450580596923828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false), TEST ("1.0000000037252902984619140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false), TEST ("1.00000000186264514923095703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false), TEST ("1.000000000931322574615478515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false), TEST ("1.0000000004656612873077392578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false), TEST ("1.00000000023283064365386962890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false), TEST ("1.000000000116415321826934814453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false), TEST ("1.0000000000582076609134674072265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false), TEST ("1.00000000002910383045673370361328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false), TEST ("1.000000000014551915228366851806640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false), TEST ("1.0000000000072759576141834259033203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false), TEST ("1.00000000000363797880709171295166015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false), TEST ("1.000000000001818989403545856475830078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false), TEST ("1.0000000000009094947017729282379150390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false), TEST ("1.00000000000045474735088646411895751953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false), TEST ("1.000000000000227373675443232059478759765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false), TEST ("1.0000000000001136868377216160297393798828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false), TEST ("1.00000000000005684341886080801486968994140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false), TEST ("1.000000000000028421709430404007434844970703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false), TEST ("1.0000000000000142108547152020037174224853515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false), TEST ("1.00000000000000710542735760100185871124267578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false), TEST ("1.000000000000003552713678800500929355621337890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false), TEST ("1.0000000000000017763568394002504646778106689453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false), TEST ("1.00000000000000088817841970012523233890533447265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false), TEST ("1.000000000000000444089209850062616169452667236328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false), TEST ("1.0000000000000002220446049250313080847263336181640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false), TEST ("1.00000000000000011102230246251565404236316680908203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - true, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - true, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - true, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + true, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + true, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + true, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false), TEST ("1.000000000000000055511151231257827021181583404541015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - true, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - true, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - true, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + true, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + true, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + true, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false), TEST ("1.0000000000000000277555756156289135105907917022705078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - true, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - true, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - true, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + true, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + true, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + true, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false), TEST ("1.00000000000000001387778780781445675529539585113525390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - true, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - true, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - true, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + true, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + true, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + true, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false), TEST ("1.000000000000000006938893903907228377647697925567626953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - true, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - true, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - true, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + true, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + true, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + true, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false), TEST ("1.0000000000000000034694469519536141888238489627838134765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - true, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - true, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - true, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + true, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + true, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + true, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false), TEST ("1.0000000000000000017347234759768070944119244813919067382812" "5", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - true, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - true, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - true, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + true, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + true, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + true, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false), TEST ("1.0000000000000000008673617379884035472059622406959533691406" "25", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - true, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - true, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - true, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + true, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + true, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + true, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false), TEST ("1.0000000000000000004336808689942017736029811203479766845703" "125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - true, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - true, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - true, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + true, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + true, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + true, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false), TEST ("1.0000000000000000002168404344971008868014905601739883422851" "5625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - true, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - true, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - true, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + true, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + true, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + true, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false), TEST ("1.0000000000000000001084202172485504434007452800869941711425" "78125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false), TEST ("1.0000000000000000000542101086242752217003726400434970855712" "890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - true, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + true, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false), TEST ("1.0000000000000000000271050543121376108501863200217485427856" "4453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - true, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + true, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false), TEST ("1.0000000000000000000135525271560688054250931600108742713928" "22265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - true, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + true, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false), TEST ("1.0000000000000000000067762635780344027125465800054371356964" "111328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - true, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + true, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false), TEST ("1.0000000000000000000033881317890172013562732900027185678482" "0556640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - true, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + true, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false), TEST ("1.0000000000000000000016940658945086006781366450013592839241" "02783203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - true, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + true, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false), TEST ("1.0000000000000000000008470329472543003390683225006796419620" "513916015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - true, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + true, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false), TEST ("1.0000000000000000000004235164736271501695341612503398209810" "2569580078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - true, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + true, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false), TEST ("1.0000000000000000000002117582368135750847670806251699104905" "12847900390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - true, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + true, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false), TEST ("1.0000000000000000000001058791184067875423835403125849552452" "564239501953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - true, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + true, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false), TEST ("1.0000000000000000000000529395592033937711917701562924776226" "2821197509765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - true, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + true, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false), TEST ("1.0000000000000000000000264697796016968855958850781462388113" "14105987548828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - true, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + true, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false), TEST ("1.0000000000000000000000132348898008484427979425390731194056" "570529937744140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - true, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + true, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false), TEST ("1.0000000000000000000000066174449004242213989712695365597028" "2852649688720703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - true, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + true, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false), TEST ("1.0000000000000000000000033087224502121106994856347682798514" "14263248443603515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - true, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + true, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false), TEST ("1.0000000000000000000000016543612251060553497428173841399257" "071316242218017578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - true, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + true, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false), TEST ("1.0000000000000000000000008271806125530276748714086920699628" "5356581211090087890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - true, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + true, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false), TEST ("1.0000000000000000000000004135903062765138374357043460349814" "26782906055450439453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - true, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000002067951531382569187178521730174907" "133914530277252197265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - true, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + true, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000001033975765691284593589260865087453" "5669572651386260986328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - true, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + true, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000516987882845642296794630432543726" "78347863256931304931640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - true, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + true, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000258493941422821148397315216271863" "391739316284656524658203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - true, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000129246970711410574198657608135931" "6958696581423282623291015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - true, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + true, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000064623485355705287099328804067965" "84793482907116413116455078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - true, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + true, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000032311742677852643549664402033982" "923967414535582065582275390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - true, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + true, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000016155871338926321774832201016991" "4619837072677910327911376953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - true, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + true, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000008077935669463160887416100508495" "73099185363389551639556884765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - true, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + true, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000004038967834731580443708050254247" "865495926816947758197784423828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - true, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + true, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000002019483917365790221854025127123" "9327479634084738790988922119140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - true, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + true, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000001009741958682895110927012563561" "96637398170423693954944610595703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000504870979341447555463506281780" "983186990852118469774723052978515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - true, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + true, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000252435489670723777731753140890" "4915934954260592348873615264892578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - true, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + true, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000126217744835361888865876570445" "24579674771302961744368076324462890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - true, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + true, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000063108872417680944432938285222" "622898373856514808721840381622314453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000031554436208840472216469142611" "3114491869282574043609201908111572265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - true, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + true, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000015777218104420236108234571305" "65572459346412870218046009540557861328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - true, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000007888609052210118054117285652" "827862296732064351090230047702789306640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - true, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + true, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000003944304526105059027058642826" "4139311483660321755451150238513946533203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - true, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + true, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000001972152263052529513529321413" "20696557418301608777255751192569732666015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - true, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + true, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000000986076131526264756764660706" "603482787091508043886278755962848663330078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - true, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000000493038065763132378382330353" "3017413935457540219431393779814243316650390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - true, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + true, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000000246519032881566189191165176" "65087069677287701097156968899071216583251953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000000123259516440783094595582588" "325435348386438505485784844495356082916259765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000000004p+0, false, - 0x1.000000000000000000000000004p+0, false, - 0x1.000000000000000000000000004p+0, false, - 0x1.000000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000000004p+0, false, false, + 0x1.000000000000000000000000004p+0, false, false, + 0x1.000000000000000000000000004p+0, false, false, + 0x1.000000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000000061629758220391547297791294" "1627176741932192527428924222476780414581298828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000000002p+0, false, - 0x1.000000000000000000000000002p+0, false, - 0x1.000000000000000000000000002p+0, false, - 0x1.000000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000000002p+0, false, false, + 0x1.000000000000000000000000002p+0, false, false, + 0x1.000000000000000000000000002p+0, false, false, + 0x1.000000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000000030814879110195773648895647" "08135883709660962637144621112383902072906494140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000000001p+0, false, - 0x1.000000000000000000000000001p+0, false, - 0x1.000000000000000000000000001p+0, false, - 0x1.000000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000000001p+0, false, false, + 0x1.000000000000000000000000001p+0, false, false, + 0x1.000000000000000000000000001p+0, false, false, + 0x1.000000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000000015407439555097886824447823" "540679418548304813185723105561919510364532470703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000000008p+0, false, - 0x1.0000000000000000000000000008p+0, false, - 0x1.0000000000000000000000000008p+0, false, - 0x1.0000000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000000007703719777548943412223911" "7703397092741524065928615527809597551822662353515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000000004p+0, false, - 0x1.0000000000000000000000000004p+0, false, - 0x1.0000000000000000000000000004p+0, false, - 0x1.0000000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000000003851859888774471706111955" "88516985463707620329643077639047987759113311767578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000000002p+0, false, - 0x1.0000000000000000000000000002p+0, false, - 0x1.0000000000000000000000000002p+0, false, - 0x1.0000000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000000001925929944387235853055977" "942584927318538101648215388195239938795566558837890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000000001p+0, false, - 0x1.0000000000000000000000000001p+0, false, - 0x1.0000000000000000000000000001p+0, false, - 0x1.0000000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000000000962964972193617926527988" "9712924636592690508241076940976199693977832794189453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000000000000000001p+0, false, false), }; diff --git a/stdlib/tst-strtod-round-skeleton.c b/stdlib/tst-strtod-round-skeleton.c index c3cc0201d4..be081ba416 100644 --- a/stdlib/tst-strtod-round-skeleton.c +++ b/stdlib/tst-strtod-round-skeleton.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "tst-strtod.h" @@ -139,16 +140,26 @@ gen-tst-strtod-round utility to select the appropriately rounded long double value for a given format. */ #define TEST(s, \ - fx, fd, fdo, fn, fno, fz, fzo, fu, fuo, \ - dx, dd, ddo, dn, dno, dz, dzo, du, duo, \ - ld64ix, ld64id, ld64ido, ld64in, ld64ino, \ - ld64iz, ld64izo, ld64iu, ld64iuo, \ - ld64mx, ld64md, ld64mdo, ld64mn, ld64mno, \ - ld64mz, ld64mzo, ld64mu, ld64muo, \ - ld106x, ld106d, ld106do, ld106n, ld106no, \ - ld106z, ld106zo, ld106u, ld106uo, \ - ld113x, ld113d, ld113do, ld113n, ld113no, \ - ld113z, ld113zo, ld113u, ld113uo) \ + fx, fd, fdo, fdu, fn, fno, fnu, \ + fz, fzo, fzu, fu, fuo, fuu, \ + dx, dd, ddo, ddu, dn, dno, dnu, \ + dz, dzo, dzu, du, duo, duu, \ + ld64ix, ld64id, ld64ido, ld64idu, \ + ld64in, ld64ino, ld64inu, \ + ld64iz, ld64izo, ld64izu, \ + ld64iu, ld64iuo, ld64iuu, \ + ld64mx, ld64md, ld64mdo, ld64mdu, \ + ld64mn, ld64mno, ld64mnu, \ + ld64mz, ld64mzo, ld64mzu, \ + ld64mu, ld64muo, ld64muu, \ + ld106x, ld106d, ld106do, ld106du, \ + ld106n, ld106no, ld106nu, \ + ld106z, ld106zo, ld106zu, \ + ld106u, ld106uo, ld106uu, \ + ld113x, ld113d, ld113do, ld113du, \ + ld113n, ld113no, ld113nu, \ + ld113z, ld113zo, ld113zu, \ + ld113u, ld113uo, ld113uu) \ { \ L_ (s), \ { XNTRY (fx, dx, ld64ix, ld64mx, ld106x, ld113x) }, \ @@ -163,6 +174,12 @@ { XNTRY (fdo, ddo, ld64ido, ld64mdo, ld106do, ld113do) }, \ { XNTRY (fzo, dzo, ld64izo, ld64mzo, ld106zo, ld113zo) }, \ { XNTRY (fuo, duo, ld64iuo, ld64muo, ld106uo, ld113uo) } \ + }, \ + { \ + { XNTRY (fnu, dnu, ld64inu, ld64mnu, ld106nu, ld113nu) }, \ + { XNTRY (fdu, ddu, ld64idu, ld64mdu, ld106du, ld113du) }, \ + { XNTRY (fzu, dzu, ld64izu, ld64mzu, ld106zu, ld113zu) }, \ + { XNTRY (fuu, duu, ld64iuu, ld64muu, ld106uu, ld113uu) } \ } \ } @@ -181,11 +198,17 @@ struct test_overflow STRUCT_FOREACH_FLOAT_BOOL }; +struct test_underflow + { + STRUCT_FOREACH_FLOAT_BOOL + }; + struct test { const CHAR *s; struct test_exactness exact; struct test_results r[4]; struct test_overflow o[4]; + struct test_underflow u[4]; }; /* Include the generated test data. */ @@ -203,10 +226,14 @@ struct test { # define FE_OVERFLOW 0 #endif +#ifndef FE_UNDERFLOW +# define FE_UNDERFLOW 0 +#endif + #define GEN_ONE_TEST(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ { \ feclearexcept (FE_ALL_EXCEPT); \ - errno = 0; \ + errno = 12345; \ FTYPE f = STRTO (FSUF) (s, NULL); \ int new_errno = errno; \ if (f != expected->FSUF \ @@ -265,6 +292,40 @@ struct test { s, new_errno, ERANGE); \ result = 1; \ } \ + if (FE_UNDERFLOW != 0) \ + { \ + bool underflow_raised \ + = fetestexcept (FE_UNDERFLOW) != 0; \ + if (underflow_raised != underflow->FSUF) \ + { \ + printf (FNPFXS "to" #FSUF \ + " (" STRM ") underflow %d " \ + "not %d\n", s, underflow_raised, \ + underflow->FSUF); \ + if (EXCEPTION_TESTS (FTYPE)) \ + result = 1; \ + else \ + printf ("ignoring this exception error\n"); \ + } \ + } \ + if (underflow->FSUF && new_errno != ERANGE) \ + { \ + printf (FNPFXS "to" #FSUF \ + " (" STRM ") left errno == %d," \ + " not %d (ERANGE)\n", \ + s, new_errno, ERANGE); \ + result = 1; \ + } \ + if (!overflow->FSUF \ + && !underflow->FSUF \ + && new_errno != 12345) \ + { \ + printf (FNPFXS "to" #FSUF \ + " (" STRM ") set errno == %d," \ + " should be unchanged\n", \ + s, new_errno); \ + result = 1; \ + } \ } \ } @@ -272,6 +333,7 @@ static int test_in_one_mode (const CHAR *s, const struct test_results *expected, const struct test_exactness *exact, const struct test_overflow *overflow, + const struct test_underflow *underflow, const char *mode_name, int rnd_mode) { int result = 0; @@ -307,6 +369,7 @@ do_test (void) { result |= test_in_one_mode (tests[i].s, &tests[i].r[modes[0].rnd_i], &tests[i].exact, &tests[i].o[modes[0].rnd_i], + &tests[i].u[modes[0].rnd_i], modes[0].mode_name, modes[0].rnd_mode); for (const struct fetestmodes *m = &modes[1]; m->mode_name != NULL; m++) { @@ -314,7 +377,9 @@ do_test (void) { result |= test_in_one_mode (tests[i].s, &tests[i].r[m->rnd_i], &tests[i].exact, - &tests[i].o[m->rnd_i], m->mode_name, + &tests[i].o[m->rnd_i], + &tests[i].u[m->rnd_i], + m->mode_name, m->rnd_mode); fesetround (save_round_mode); } commit d0c1792ad269566f877208ffda91c21dcd1a72e6 Author: Joseph Myers Date: Tue Aug 27 12:41:02 2024 +0000 Fix strtod subnormal rounding (bug 30220) As reported in bug 30220, the implementation of strtod-family functions has a bug in the following case: the input string would, with infinite exponent range, take one more bit to represent than is available in the normal precision of the return type; the value represented is in the subnormal range; and there are no nonzero bits in the value, below those that can be represented in subnormal precision, other than the least significant bit and possibly the 0.5ulp bit. In this case, round_and_return ends up discarding the least significant bit. Fix by saving that bit to merge into more_bits (it can't be merged in at the time it's computed, because more_bits mustn't include this bit in the case of after-rounding tininess detection checking if the result is still subnormal when rounded to normal precision, so merging this bit into more_bits needs to take place after that check). Tested for x86_64. (cherry picked from commit 457622c2fa8f9f7435822d5287a437bc8be8090d) diff --git a/stdlib/strtod_l.c b/stdlib/strtod_l.c index be515ce659..beb97b3d0c 100644 --- a/stdlib/strtod_l.c +++ b/stdlib/strtod_l.c @@ -222,6 +222,7 @@ round_and_return (mp_limb_t *retval, intmax_t exponent, int negative, mp_size_t shift = MIN_EXP - 1 - exponent; bool is_tiny = true; + bool old_half_bit = (round_limb & (((mp_limb_t) 1) << round_bit)) != 0; more_bits |= (round_limb & ((((mp_limb_t) 1) << round_bit) - 1)) != 0; if (shift == MANT_DIG) @@ -292,6 +293,7 @@ round_and_return (mp_limb_t *retval, intmax_t exponent, int negative, round_bit = shift - 1; (void) __mpn_rshift (retval, retval, RETURN_LIMB_SIZE, shift); } + more_bits |= old_half_bit; /* This is a hook for the m68k long double format, where the exponent bias is the same for normalized and denormalized numbers. */ diff --git a/stdlib/tst-strtod-round-data b/stdlib/tst-strtod-round-data index 84ab705709..9489fbcc9c 100644 --- a/stdlib/tst-strtod-round-data +++ b/stdlib/tst-strtod-round-data @@ -265,3 +265,15 @@ 1.000000000000000000000000000000000385185988877447170611195588516985463707620329643077639047987759113311767578125 1.0000000000000000000000000000000001925929944387235853055977942584927318538101648215388195239938795566558837890625 1.00000000000000000000000000000000009629649721936179265279889712924636592690508241076940976199693977832794189453125 +0x30000002222225p-1077 +0x0.7fffffffffffeap-1022 +0x0.7fffffffffffe9p-1022 +0x0.7ffffd4p-126 +0x0.7ffffffffffffffd4p-16382 +0x0.7ffffffffffffffd4p-16383 +0x0.7ffffffffffffffffffffffffffeap-16382 +0x0.7000004p-126 +0x0.70000000000002p-1022 +0x0.70000000000000004p-16382 +0x0.70000000000000004p-16383 +0x0.70000000000000000000000000002p-16382 diff --git a/stdlib/tst-strtod-round-data.h b/stdlib/tst-strtod-round-data.h index 13e62dd2b0..ed50eb2537 100644 --- a/stdlib/tst-strtod-round-data.h +++ b/stdlib/tst-strtod-round-data.h @@ -15437,4 +15437,376 @@ static const struct test tests[] = { 0x1p+0, false, false, 0x1p+0, false, false, 0x1.0000000000000000000000000001p+0, false, false), + TEST ("0x30000002222225p-1077", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x1.800000111111p-1024, false, true, + 0x1.8000001111114p-1024, false, true, + 0x1.800000111111p-1024, false, true, + 0x1.8000001111114p-1024, false, true, + true, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + true, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + false, + 0x1.800000111111p-1024, false, true, + 0x1.8000001111114p-1024, false, true, + 0x1.800000111111p-1024, false, true, + 0x1.8000001111114p-1024, false, true, + true, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false), + TEST ("0x0.7fffffffffffeap-1022", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + true, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + true, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + false, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + true, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false), + TEST ("0x0.7fffffffffffe9p-1022", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + true, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + true, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + false, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + true, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false), + TEST ("0x0.7ffffd4p-126", + false, + 0x1.fffffp-128, false, true, + 0x1.fffff8p-128, false, true, + 0x1.fffffp-128, false, true, + 0x1.fffff8p-128, false, true, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false), + TEST ("0x0.7ffffffffffffffd4p-16382", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffffffp-16384, false, true, + 0x1.fffffffffffffff8p-16384, false, true, + 0x1.fffffffffffffffp-16384, false, true, + 0x1.fffffffffffffff8p-16384, false, true, + false, + 0x1.fffffffffffffff4p-16384, false, true, + 0x1.fffffffffffffff4p-16384, false, true, + 0x1.fffffffffffffff4p-16384, false, true, + 0x1.fffffffffffffff8p-16384, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x1.fffffffffffffff5p-16384, false, false, + 0x1.fffffffffffffff5p-16384, false, false, + 0x1.fffffffffffffff5p-16384, false, false, + 0x1.fffffffffffffff5p-16384, false, false), + TEST ("0x0.7ffffffffffffffd4p-16383", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xf.ffffffffffffff8p-16388, false, true, + 0xf.ffffffffffffff8p-16388, false, true, + 0xf.ffffffffffffff8p-16388, false, true, + 0x1p-16384, false, true, + false, + 0xf.ffffffffffffff8p-16388, false, true, + 0xf.ffffffffffffffcp-16388, false, true, + 0xf.ffffffffffffff8p-16388, false, true, + 0xf.ffffffffffffffcp-16388, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0xf.ffffffffffffffa8p-16388, false, false, + 0xf.ffffffffffffffa8p-16388, false, false, + 0xf.ffffffffffffffa8p-16388, false, false, + 0xf.ffffffffffffffa8p-16388, false, false), + TEST ("0x0.7ffffffffffffffffffffffffffeap-16382", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffffff8p-16384, false, true, + 0x2p-16384, false, true, + 0x1.fffffffffffffff8p-16384, false, true, + 0x2p-16384, false, true, + false, + 0x1.fffffffffffffffcp-16384, false, true, + 0x2p-16384, false, true, + 0x1.fffffffffffffffcp-16384, false, true, + 0x2p-16384, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffffffffffffffffff8p-16384, false, true, + 0x1.fffffffffffffffffffffffffffcp-16384, false, true, + 0x1.fffffffffffffffffffffffffff8p-16384, false, true, + 0x1.fffffffffffffffffffffffffffcp-16384, false, true), + TEST ("0x0.7000004p-126", + false, + 0x1.cp-128, false, true, + 0x1.cp-128, false, true, + 0x1.cp-128, false, true, + 0x1.c00008p-128, false, true, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false), + TEST ("0x0.70000000000002p-1022", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x1.cp-1024, false, true, + 0x1.cp-1024, false, true, + 0x1.cp-1024, false, true, + 0x1.c000000000004p-1024, false, true, + true, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + true, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + false, + 0x1.cp-1024, false, true, + 0x1.cp-1024, false, true, + 0x1.cp-1024, false, true, + 0x1.c000000000004p-1024, false, true, + true, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false), + TEST ("0x0.70000000000000004p-16382", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000008p-16384, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000004p-16384, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x1.c000000000000001p-16384, false, false, + 0x1.c000000000000001p-16384, false, false, + 0x1.c000000000000001p-16384, false, false, + 0x1.c000000000000001p-16384, false, false), + TEST ("0x0.70000000000000004p-16383", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xep-16388, false, true, + 0xep-16388, false, true, + 0xep-16388, false, true, + 0xe.000000000000008p-16388, false, true, + false, + 0xep-16388, false, true, + 0xep-16388, false, true, + 0xep-16388, false, true, + 0xe.000000000000004p-16388, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0xe.0000000000000008p-16388, false, false, + 0xe.0000000000000008p-16388, false, false, + 0xe.0000000000000008p-16388, false, false, + 0xe.0000000000000008p-16388, false, false), + TEST ("0x0.70000000000000000000000000002p-16382", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000008p-16384, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000004p-16384, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000000000000000004p-16384, false, true), }; commit cac10d88c684c0171e549d813bfef7a31029f257 Author: Joseph Myers Date: Tue Aug 27 20:41:54 2024 +0000 Make __strtod_internal tests type-generic Some of the strtod tests use type-generic machinery in tst-strtod.h to test the strto* functions for all floating types, while others only test double even when the tests are in fact meaningful for all floating types. Convert the tests of the internal __strtod_internal interface to cover all floating types. I haven't tried to convert them to use newer test interfaces in other ways, just made the changes necessary to use the type-generic machinery. As an internal interface, there are no aliases for different types with the same ABI (however, __strtold_internal is defined even if long double has the same ABI as double), so macros used by the type-generic testing code are redefined as needed to avoid expecting such aliases to be present. Tested for x86_64. (cherry picked from commit 3fc063dee01da4f80920a14b7db637c8501d6fd4) diff --git a/stdlib/tst-strtod1i.c b/stdlib/tst-strtod1i.c index 9d6bb760fb..44ae0264f4 100644 --- a/stdlib/tst-strtod1i.c +++ b/stdlib/tst-strtod1i.c @@ -25,60 +25,91 @@ #include #include -/* Perform a few tests in a locale with thousands separators. */ -static int -do_test (void) -{ - static const struct - { - const char *loc; - const char *str; - double exp; - ptrdiff_t nread; - } tests[] = - { - { "de_DE.UTF-8", "1,5", 1.5, 3 }, - { "de_DE.UTF-8", "1.5", 1.0, 1 }, - { "de_DE.UTF-8", "1.500", 1500.0, 5 }, - { "de_DE.UTF-8", "36.893.488.147.419.103.232", 0x1.0p65, 26 } - }; -#define ntests (sizeof (tests) / sizeof (tests[0])) - size_t n; - int result = 0; - - puts ("\nLocale tests"); +#include "tst-strtod.h" - for (n = 0; n < ntests; ++n) - { - double d; - char *endp; +/* This tests internal interfaces, which are only defined for types + with distinct ABIs, so disable testing for types without distinct + ABIs. */ +#undef IF_FLOAT32 +#define IF_FLOAT32(x) +#undef IF_FLOAT64 +#define IF_FLOAT64(x) +#undef IF_FLOAT32X +#define IF_FLOAT32X(x) +#undef IF_FLOAT64X +#define IF_FLOAT64X(x) +#if !__HAVE_DISTINCT_FLOAT128 +# undef IF_FLOAT128 +# define IF_FLOAT128(x) +#endif - if (setlocale (LC_ALL, tests[n].loc) == NULL) - { - printf ("cannot set locale %s\n", tests[n].loc); - result = 1; - continue; - } +#define ntests (sizeof (tests) / sizeof (tests[0])) - d = __strtod_internal (tests[n].str, &endp, 1); - if (d != tests[n].exp) - { - printf ("strtod(\"%s\") returns %g and not %g\n", - tests[n].str, d, tests[n].exp); - result = 1; - } - else if (endp - tests[n].str != tests[n].nread) - { - printf ("strtod(\"%s\") read %td bytes and not %td\n", - tests[n].str, endp - tests[n].str, tests[n].nread); - result = 1; - } - } +/* Perform a few tests in a locale with thousands separators. */ +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static int \ +test_strto ## FSUF (void) \ +{ \ + static const struct \ + { \ + const char *loc; \ + const char *str; \ + FTYPE exp; \ + ptrdiff_t nread; \ + } tests[] = \ + { \ + { "de_DE.UTF-8", "1,5", 1.5 ## LSUF, 3 }, \ + { "de_DE.UTF-8", "1.5", 1.0 ## LSUF, 1 }, \ + { "de_DE.UTF-8", "1.500", 1500.0 ## LSUF, 5 }, \ + { "de_DE.UTF-8", "36.893.488.147.419.103.232", 0x1.0p65 ## LSUF, 26 } \ + }; \ + size_t n; \ + int result = 0; \ + \ + puts ("\nLocale tests"); \ + \ + for (n = 0; n < ntests; ++n) \ + { \ + FTYPE d; \ + char *endp; \ + \ + if (setlocale (LC_ALL, tests[n].loc) == NULL) \ + { \ + printf ("cannot set locale %s\n", tests[n].loc); \ + result = 1; \ + continue; \ + } \ + \ + d = __strto ## FSUF ## _internal (tests[n].str, &endp, 1); \ + if (d != tests[n].exp) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", d); \ + FTOSTR (buf2, sizeof (buf2), "%g", tests[n].exp); \ + printf ("strto" # FSUF "(\"%s\") returns %s and not %s\n", \ + tests[n].str, buf1, buf2); \ + result = 1; \ + } \ + else if (endp - tests[n].str != tests[n].nread) \ + { \ + printf ("strto" # FSUF "(\"%s\") read %td bytes and not %td\n", \ + tests[n].str, endp - tests[n].str, tests[n].nread); \ + result = 1; \ + } \ + } \ + \ + if (result == 0) \ + puts ("all OK"); \ + \ + return result ? EXIT_FAILURE : EXIT_SUCCESS; \ +} - if (result == 0) - puts ("all OK"); +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) - return result ? EXIT_FAILURE : EXIT_SUCCESS; +static int +do_test (void) +{ + return STRTOD_TEST_FOREACH (test_strto); } #include diff --git a/stdlib/tst-strtod3.c b/stdlib/tst-strtod3.c index 23abec1896..0d662d8be8 100644 --- a/stdlib/tst-strtod3.c +++ b/stdlib/tst-strtod3.c @@ -3,19 +3,73 @@ #include #include -static const struct -{ - const char *in; - const char *out; - double expected; -} tests[] = - { - { "000,,,e1", ",,,e1", 0.0 }, - { "000e1", "", 0.0 }, - { "000,1e1", ",1e1", 0.0 } - }; -#define NTESTS (sizeof (tests) / sizeof (tests[0])) +#include "tst-strtod.h" + +/* This tests internal interfaces, which are only defined for types + with distinct ABIs, so disable testing for types without distinct + ABIs. */ +#undef IF_FLOAT32 +#define IF_FLOAT32(x) +#undef IF_FLOAT64 +#define IF_FLOAT64(x) +#undef IF_FLOAT32X +#define IF_FLOAT32X(x) +#undef IF_FLOAT64X +#define IF_FLOAT64X(x) +#if !__HAVE_DISTINCT_FLOAT128 +# undef IF_FLOAT128 +# define IF_FLOAT128(x) +#endif +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static const struct \ +{ \ + const char *in; \ + const char *out; \ + FTYPE expected; \ +} tests_strto ## FSUF[] = \ + { \ + { "000,,,e1", ",,,e1", 0.0 ## LSUF }, \ + { "000e1", "", 0.0 ## LSUF }, \ + { "000,1e1", ",1e1", 0.0 ## LSUF } \ + }; \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + \ + for (int i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = __strto ## FSUF ## _internal (tests_strto ## FSUF[i].in, \ + &ep, 1); \ + \ + if (strcmp (ep, tests_strto ## FSUF[i].out) != 0) \ + { \ + printf ("%d: got rest string \"%s\", expected \"%s\"\n", \ + i, ep, tests_strto ## FSUF[i].out); \ + status = 1; \ + } \ + \ + if (r != tests_strto ## FSUF[i].expected) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", \ + tests_strto ## FSUF[i].expected); \ + printf ("%d: got wrong results %s, expected %s\n", \ + i, buf1, buf2); \ + status = 1; \ + } \ + } \ + \ + return status; \ +} + +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) @@ -26,29 +80,7 @@ do_test (void) return 1; } - int status = 0; - - for (int i = 0; i < NTESTS; ++i) - { - char *ep; - double r = __strtod_internal (tests[i].in, &ep, 1); - - if (strcmp (ep, tests[i].out) != 0) - { - printf ("%d: got rest string \"%s\", expected \"%s\"\n", - i, ep, tests[i].out); - status = 1; - } - - if (r != tests[i].expected) - { - printf ("%d: got wrong results %g, expected %g\n", - i, r, tests[i].expected); - status = 1; - } - } - - return status; + return STRTOD_TEST_FOREACH (test_strto); } #define TEST_FUNCTION do_test () diff --git a/stdlib/tst-strtod4.c b/stdlib/tst-strtod4.c index 6cc4e843c7..dfd3f05027 100644 --- a/stdlib/tst-strtod4.c +++ b/stdlib/tst-strtod4.c @@ -3,22 +3,76 @@ #include #include +#include "tst-strtod.h" + +/* This tests internal interfaces, which are only defined for types + with distinct ABIs, so disable testing for types without distinct + ABIs. */ +#undef IF_FLOAT32 +#define IF_FLOAT32(x) +#undef IF_FLOAT64 +#define IF_FLOAT64(x) +#undef IF_FLOAT32X +#define IF_FLOAT32X(x) +#undef IF_FLOAT64X +#define IF_FLOAT64X(x) +#if !__HAVE_DISTINCT_FLOAT128 +# undef IF_FLOAT128 +# define IF_FLOAT128(x) +#endif + #define NNBSP "\xe2\x80\xaf" -static const struct -{ - const char *in; - const char *out; - double expected; -} tests[] = - { - { "000"NNBSP"000"NNBSP"000", "", 0.0 }, - { "1"NNBSP"000"NNBSP"000,5x", "x", 1000000.5 }, - /* Bug 30964 */ - { "10"NNBSP NNBSP"200", NNBSP NNBSP"200", 10.0 } - }; -#define NTESTS (sizeof (tests) / sizeof (tests[0])) +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static const struct \ +{ \ + const char *in; \ + const char *out; \ + FTYPE expected; \ +} tests_strto ## FSUF[] = \ + { \ + { "000"NNBSP"000"NNBSP"000", "", 0.0 ## LSUF }, \ + { "1"NNBSP"000"NNBSP"000,5x", "x", 1000000.5 ## LSUF }, \ + /* Bug 30964 */ \ + { "10"NNBSP NNBSP"200", NNBSP NNBSP"200", 10.0 ## LSUF } \ + }; \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + \ + for (int i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = __strto ## FSUF ## _internal (tests_strto ## FSUF[i].in, \ + &ep, 1); \ + \ + if (strcmp (ep, tests_strto ## FSUF[i].out) != 0) \ + { \ + printf ("%d: got rest string \"%s\", expected \"%s\"\n", \ + i, ep, tests_strto ## FSUF[i].out); \ + status = 1; \ + } \ + \ + if (r != tests_strto ## FSUF[i].expected) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", \ + tests_strto ## FSUF[i].expected); \ + printf ("%d: got wrong results %s, expected %s\n", \ + i, buf1, buf2); \ + status = 1; \ + } \ + } \ + \ + return status; \ +} +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) @@ -29,29 +83,7 @@ do_test (void) return 1; } - int status = 0; - - for (int i = 0; i < NTESTS; ++i) - { - char *ep; - double r = __strtod_internal (tests[i].in, &ep, 1); - - if (strcmp (ep, tests[i].out) != 0) - { - printf ("%d: got rest string \"%s\", expected \"%s\"\n", - i, ep, tests[i].out); - status = 1; - } - - if (r != tests[i].expected) - { - printf ("%d: got wrong results %g, expected %g\n", - i, r, tests[i].expected); - status = 1; - } - } - - return status; + return STRTOD_TEST_FOREACH (test_strto); } #define TEST_FUNCTION do_test () diff --git a/stdlib/tst-strtod5i.c b/stdlib/tst-strtod5i.c index ee54e3404c..136aedea68 100644 --- a/stdlib/tst-strtod5i.c +++ b/stdlib/tst-strtod5i.c @@ -16,52 +16,112 @@ License along with the GNU C Library; if not, see . */ +/* Defining _LIBC_TEST ensures long double math functions are + declared in the headers. */ +#define _LIBC_TEST 1 #include #include #include #include #include +#include "tst-strtod.h" + +/* This tests internal interfaces, which are only defined for types + with distinct ABIs, so disable testing for types without distinct + ABIs. */ +#undef IF_FLOAT32 +#define IF_FLOAT32(x) +#undef IF_FLOAT64 +#define IF_FLOAT64(x) +#undef IF_FLOAT32X +#define IF_FLOAT32X(x) +#undef IF_FLOAT64X +#define IF_FLOAT64X(x) +#if !__HAVE_DISTINCT_FLOAT128 +# undef IF_FLOAT128 +# define IF_FLOAT128(x) +#endif + #define NNBSP "\xe2\x80\xaf" -static const struct -{ - const char *in; - int group; - double expected; -} tests[] = - { - { "0", 0, 0.0 }, - { "000", 0, 0.0 }, - { "-0", 0, -0.0 }, - { "-000", 0, -0.0 }, - { "0,", 0, 0.0 }, - { "-0,", 0, -0.0 }, - { "0,0", 0, 0.0 }, - { "-0,0", 0, -0.0 }, - { "0e-10", 0, 0.0 }, - { "-0e-10", 0, -0.0 }, - { "0,e-10", 0, 0.0 }, - { "-0,e-10", 0, -0.0 }, - { "0,0e-10", 0, 0.0 }, - { "-0,0e-10", 0, -0.0 }, - { "0e-1000000", 0, 0.0 }, - { "-0e-1000000", 0, -0.0 }, - { "0,0e-1000000", 0, 0.0 }, - { "-0,0e-1000000", 0, -0.0 }, - { "0", 1, 0.0 }, - { "000", 1, 0.0 }, - { "-0", 1, -0.0 }, - { "-000", 1, -0.0 }, - { "0e-10", 1, 0.0 }, - { "-0e-10", 1, -0.0 }, - { "0e-1000000", 1, 0.0 }, - { "-0e-1000000", 1, -0.0 }, - { "000"NNBSP"000"NNBSP"000", 1, 0.0 }, - { "-000"NNBSP"000"NNBSP"000", 1, -0.0 } - }; -#define NTESTS (sizeof (tests) / sizeof (tests[0])) +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static const struct \ +{ \ + const char *in; \ + int group; \ + FTYPE expected; \ +} tests_strto ## FSUF[] = \ + { \ + { "0", 0, 0.0 ## LSUF }, \ + { "000", 0, 0.0 ## LSUF }, \ + { "-0", 0, -0.0 ## LSUF }, \ + { "-000", 0, -0.0 ## LSUF }, \ + { "0,", 0, 0.0 ## LSUF }, \ + { "-0,", 0, -0.0 ## LSUF }, \ + { "0,0", 0, 0.0 ## LSUF }, \ + { "-0,0", 0, -0.0 ## LSUF }, \ + { "0e-10", 0, 0.0 ## LSUF }, \ + { "-0e-10", 0, -0.0 ## LSUF }, \ + { "0,e-10", 0, 0.0 ## LSUF }, \ + { "-0,e-10", 0, -0.0 ## LSUF }, \ + { "0,0e-10", 0, 0.0 ## LSUF }, \ + { "-0,0e-10", 0, -0.0 ## LSUF }, \ + { "0e-1000000", 0, 0.0 ## LSUF }, \ + { "-0e-1000000", 0, -0.0 ## LSUF }, \ + { "0,0e-1000000", 0, 0.0 ## LSUF }, \ + { "-0,0e-1000000", 0, -0.0 ## LSUF }, \ + { "0", 1, 0.0 ## LSUF }, \ + { "000", 1, 0.0 ## LSUF }, \ + { "-0", 1, -0.0 ## LSUF }, \ + { "-000", 1, -0.0 ## LSUF }, \ + { "0e-10", 1, 0.0 ## LSUF }, \ + { "-0e-10", 1, -0.0 ## LSUF }, \ + { "0e-1000000", 1, 0.0 ## LSUF }, \ + { "-0e-1000000", 1, -0.0 ## LSUF }, \ + { "000"NNBSP"000"NNBSP"000", 1, 0.0 ## LSUF }, \ + { "-000"NNBSP"000"NNBSP"000", 1, -0.0 ## LSUF } \ + }; \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + \ + for (int i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = __strto ## FSUF ## _internal (tests_strto ## FSUF[i].in, \ + &ep, \ + tests_strto ## FSUF[i].group); \ + \ + if (*ep != '\0') \ + { \ + printf ("%d: got rest string \"%s\", expected \"\"\n", i, ep); \ + status = 1; \ + } \ + \ + if (r != tests_strto ## FSUF[i].expected \ + || (copysign ## CSUF (10.0 ## LSUF, r) \ + != copysign ## CSUF (10.0 ## LSUF, \ + tests_strto ## FSUF[i].expected))) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", \ + tests_strto ## FSUF[i].expected); \ + printf ("%d: got wrong results %s, expected %s\n", \ + i, buf1, buf2); \ + status = 1; \ + } \ + } \ + \ + return status; \ +} +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) @@ -72,29 +132,7 @@ do_test (void) return 1; } - int status = 0; - - for (int i = 0; i < NTESTS; ++i) - { - char *ep; - double r = __strtod_internal (tests[i].in, &ep, tests[i].group); - - if (*ep != '\0') - { - printf ("%d: got rest string \"%s\", expected \"\"\n", i, ep); - status = 1; - } - - if (r != tests[i].expected - || copysign (10.0, r) != copysign (10.0, tests[i].expected)) - { - printf ("%d: got wrong results %g, expected %g\n", - i, r, tests[i].expected); - status = 1; - } - } - - return status; + return STRTOD_TEST_FOREACH (test_strto); } #include commit ad93c2047d791044d45e8f65070d821b0b918993 Author: Joseph Myers Date: Wed Sep 4 13:20:18 2024 +0000 Improve NaN payload testing There are two separate sets of tests of NaN payloads in glibc: * libm-test-{get,set}payload* verify that getpayload, setpayload, setpayloadsig and __builtin_nan functions are consistent in their payload handling. * test-nan-payload verifies that strtod-family functions and the not-built-in nan functions are consistent in their payload handling. Nothing, however, connects the two sets of functions (i.e., verifies that strtod / nan are consistent with getpayload / setpayload / __builtin_nan). Improve test-nan-payload to check actual payload value with getpayload rather than just verifying that the strtod and nan functions produce the same NaN. Also check that the NaNs produced aren't signaling and extend the tests to cover _FloatN / _FloatNx. Tested for x86_64. (cherry picked from commit be77d5ae417236883c02d3d67c0716e3f669fa41) diff --git a/math/test-nan-payload.c b/math/test-nan-payload.c index 4a81dc348b..55c13de14e 100644 --- a/math/test-nan-payload.c +++ b/math/test-nan-payload.c @@ -16,6 +16,8 @@ License along with the GNU C Library; if not, see . */ +#define _LIBC_TEST 1 +#define __STDC_WANT_IEC_60559_TYPES_EXT__ #include #include #include @@ -31,7 +33,7 @@ #define CHECK_IS_NAN(TYPE, A) \ do \ { \ - if (isnan (A)) \ + if (isnan (A) && !issignaling (A)) \ puts ("PASS: " #TYPE " " #A); \ else \ { \ @@ -41,6 +43,19 @@ } \ while (0) +#define CHECK_PAYLOAD(TYPE, FUNC, A, P) \ + do \ + { \ + if (FUNC (&(A)) == (P)) \ + puts ("PASS: " #TYPE " payload " #A); \ + else \ + { \ + puts ("FAIL: " #TYPE " payload " #A); \ + result = 1; \ + } \ + } \ + while (0) + #define CHECK_SAME_NAN(TYPE, A, B) \ do \ { \ @@ -71,7 +86,7 @@ bits. */ #define CAN_TEST_EQ(MANT_DIG) ((MANT_DIG) != 64 && (MANT_DIG) != 106) -#define RUN_TESTS(TYPE, SFUNC, FUNC, MANT_DIG) \ +#define RUN_TESTS(TYPE, SFUNC, FUNC, PLFUNC, MANT_DIG) \ do \ { \ TYPE n123 = WRAP_NAN (FUNC, "123"); \ @@ -82,6 +97,10 @@ CHECK_IS_NAN (TYPE, n456); \ TYPE s456 = WRAP_STRTO (SFUNC, "NAN(456)"); \ CHECK_IS_NAN (TYPE, s456); \ + TYPE nh123 = WRAP_NAN (FUNC, "0x123"); \ + CHECK_IS_NAN (TYPE, nh123); \ + TYPE sh123 = WRAP_STRTO (SFUNC, "NAN(0x123)"); \ + CHECK_IS_NAN (TYPE, sh123); \ TYPE n123x = WRAP_NAN (FUNC, "123)"); \ CHECK_IS_NAN (TYPE, n123x); \ TYPE nemp = WRAP_NAN (FUNC, ""); \ @@ -92,8 +111,16 @@ CHECK_IS_NAN (TYPE, sx); \ if (CAN_TEST_EQ (MANT_DIG)) \ CHECK_SAME_NAN (TYPE, n123, s123); \ + CHECK_PAYLOAD (TYPE, PLFUNC, n123, 123); \ + CHECK_PAYLOAD (TYPE, PLFUNC, s123, 123); \ if (CAN_TEST_EQ (MANT_DIG)) \ CHECK_SAME_NAN (TYPE, n456, s456); \ + CHECK_PAYLOAD (TYPE, PLFUNC, n456, 456); \ + CHECK_PAYLOAD (TYPE, PLFUNC, s456, 456); \ + if (CAN_TEST_EQ (MANT_DIG)) \ + CHECK_SAME_NAN (TYPE, nh123, sh123); \ + CHECK_PAYLOAD (TYPE, PLFUNC, nh123, 0x123); \ + CHECK_PAYLOAD (TYPE, PLFUNC, sh123, 0x123); \ if (CAN_TEST_EQ (MANT_DIG)) \ CHECK_SAME_NAN (TYPE, nemp, semp); \ if (CAN_TEST_EQ (MANT_DIG)) \ @@ -110,9 +137,31 @@ static int do_test (void) { int result = 0; - RUN_TESTS (float, strtof, nanf, FLT_MANT_DIG); - RUN_TESTS (double, strtod, nan, DBL_MANT_DIG); - RUN_TESTS (long double, strtold, nanl, LDBL_MANT_DIG); + RUN_TESTS (float, strtof, nanf, getpayloadf, FLT_MANT_DIG); + RUN_TESTS (double, strtod, nan, getpayload, DBL_MANT_DIG); + RUN_TESTS (long double, strtold, nanl, getpayloadl, LDBL_MANT_DIG); +#if __HAVE_FLOAT16 + RUN_TESTS (_Float16, strtof16, nanf16, getpayloadf16, FLT16_MANT_DIG); +#endif +#if __HAVE_FLOAT32 + RUN_TESTS (_Float32, strtof32, nanf32, getpayloadf32, FLT32_MANT_DIG); +#endif +#if __HAVE_FLOAT64 + RUN_TESTS (_Float64, strtof64, nanf64, getpayloadf64, FLT64_MANT_DIG); +#endif +#if __HAVE_FLOAT128 + RUN_TESTS (_Float128, strtof128, nanf128, getpayloadf128, FLT128_MANT_DIG); +#endif +#if __HAVE_FLOAT32X + RUN_TESTS (_Float32x, strtof32x, nanf32x, getpayloadf32x, FLT32X_MANT_DIG); +#endif +#if __HAVE_FLOAT64X + RUN_TESTS (_Float64x, strtof64x, nanf64x, getpayloadf64x, FLT64X_MANT_DIG); +#endif +#if __HAVE_FLOAT128X + RUN_TESTS (_Float128x, strtof128x, nanf128x, getpayloadf128x, + FLT128X_MANT_DIG); +#endif return result; } commit c4cc72d2efc741872d65ae1fd77572e47042d179 Author: Joseph Myers Date: Wed Sep 4 13:21:23 2024 +0000 Do not set errno for overflowing NaN payload in strtod/nan (bug 32045) As reported in bug 32045, it's incorrect for strtod/nan functions to set errno based on overflowing payload (strtod should only set errno for overflow / underflow of its actual result, and potentially if nothing in the string can be parsed as a number at all; nan should be a pure function that never sets it). Save and restore errno around the internal strtoull call and add associated test coverage. Tested for x86_64. (cherry picked from commit 64f62c47e9c350f353336f2df6714e1d48ec50d8) diff --git a/math/Makefile b/math/Makefile index f06d370383..b64c3eedd5 100644 --- a/math/Makefile +++ b/math/Makefile @@ -1077,6 +1077,7 @@ CFLAGS-test-flt-eval-method.c += -fexcess-precision=standard CFLAGS-test-fe-snans-always-signal.c += $(config-cflags-signaling-nans) CFLAGS-test-nan-const.c += -fno-builtin +CFLAGS-test-nan-payload.c += -fno-builtin CFLAGS-test-ceil-except-2.c += -fno-builtin CFLAGS-test-floor-except-2.c += -fno-builtin diff --git a/math/test-nan-payload.c b/math/test-nan-payload.c index 55c13de14e..413791e09f 100644 --- a/math/test-nan-payload.c +++ b/math/test-nan-payload.c @@ -18,6 +18,7 @@ #define _LIBC_TEST 1 #define __STDC_WANT_IEC_60559_TYPES_EXT__ +#include #include #include #include @@ -82,6 +83,26 @@ } \ while (0) +#define CLEAR_ERRNO \ + do \ + { \ + errno = 12345; \ + } \ + while (0) + +#define CHECK_ERRNO(TYPE, A) \ + do \ + { \ + if (errno == 12345) \ + puts ("PASS: " #TYPE " " #A " errno"); \ + else \ + { \ + puts ("FAIL: " #TYPE " " #A " errno"); \ + result = 1; \ + } \ + } \ + while (0) + /* Cannot test payloads by memcmp for formats where NaNs have padding bits. */ #define CAN_TEST_EQ(MANT_DIG) ((MANT_DIG) != 64 && (MANT_DIG) != 106) @@ -89,26 +110,58 @@ #define RUN_TESTS(TYPE, SFUNC, FUNC, PLFUNC, MANT_DIG) \ do \ { \ + CLEAR_ERRNO; \ TYPE n123 = WRAP_NAN (FUNC, "123"); \ + CHECK_ERRNO (TYPE, n123); \ CHECK_IS_NAN (TYPE, n123); \ + CLEAR_ERRNO; \ TYPE s123 = WRAP_STRTO (SFUNC, "NAN(123)"); \ + CHECK_ERRNO (TYPE, s123); \ CHECK_IS_NAN (TYPE, s123); \ + CLEAR_ERRNO; \ TYPE n456 = WRAP_NAN (FUNC, "456"); \ + CHECK_ERRNO (TYPE, n456); \ CHECK_IS_NAN (TYPE, n456); \ + CLEAR_ERRNO; \ TYPE s456 = WRAP_STRTO (SFUNC, "NAN(456)"); \ + CHECK_ERRNO (TYPE, s456); \ CHECK_IS_NAN (TYPE, s456); \ + CLEAR_ERRNO; \ TYPE nh123 = WRAP_NAN (FUNC, "0x123"); \ + CHECK_ERRNO (TYPE, nh123); \ CHECK_IS_NAN (TYPE, nh123); \ + CLEAR_ERRNO; \ TYPE sh123 = WRAP_STRTO (SFUNC, "NAN(0x123)"); \ + CHECK_ERRNO (TYPE, sh123); \ CHECK_IS_NAN (TYPE, sh123); \ + CLEAR_ERRNO; \ TYPE n123x = WRAP_NAN (FUNC, "123)"); \ + CHECK_ERRNO (TYPE, n123x); \ CHECK_IS_NAN (TYPE, n123x); \ + CLEAR_ERRNO; \ TYPE nemp = WRAP_NAN (FUNC, ""); \ + CHECK_ERRNO (TYPE, nemp); \ CHECK_IS_NAN (TYPE, nemp); \ + CLEAR_ERRNO; \ TYPE semp = WRAP_STRTO (SFUNC, "NAN()"); \ + CHECK_ERRNO (TYPE, semp); \ CHECK_IS_NAN (TYPE, semp); \ + CLEAR_ERRNO; \ TYPE sx = WRAP_STRTO (SFUNC, "NAN"); \ + CHECK_ERRNO (TYPE, sx); \ CHECK_IS_NAN (TYPE, sx); \ + CLEAR_ERRNO; \ + TYPE novf = WRAP_NAN (FUNC, "9999999999" \ + "99999999999999999999" \ + "9999999999"); \ + CHECK_ERRNO (TYPE, novf); \ + CHECK_IS_NAN (TYPE, novf); \ + CLEAR_ERRNO; \ + TYPE sovf = WRAP_STRTO (SFUNC, "NAN(9999999999" \ + "99999999999999999999" \ + "9999999999)"); \ + CHECK_ERRNO (TYPE, sovf); \ + CHECK_IS_NAN (TYPE, sovf); \ if (CAN_TEST_EQ (MANT_DIG)) \ CHECK_SAME_NAN (TYPE, n123, s123); \ CHECK_PAYLOAD (TYPE, PLFUNC, n123, 123); \ diff --git a/stdlib/strtod_nan_main.c b/stdlib/strtod_nan_main.c index 4cb286d2b3..39fb7e9f75 100644 --- a/stdlib/strtod_nan_main.c +++ b/stdlib/strtod_nan_main.c @@ -16,6 +16,7 @@ License along with the GNU C Library; if not, see . */ +#include #include #include #include @@ -50,7 +51,9 @@ STRTOD_NAN (const STRING_TYPE *str, STRING_TYPE **endptr, STRING_TYPE endc) STRING_TYPE *endp; unsigned long long int mant; + int save_errno = errno; mant = STRTOULL (str, &endp, 0); + __set_errno (save_errno); if (endp == cp) SET_NAN_PAYLOAD (retval, mant); commit 5a10d05c39689dcf7ee694ec94cd2fd069c747ee Author: Florian Weimer Date: Thu Sep 5 21:18:23 2024 +0200 powerpc64le: Build new strtod tests with long double ABI flags (bug 32145) This fixes several test failures: =====FAIL: stdlib/tst-strtod1i.out===== Locale tests all OK Locale tests all OK Locale tests strtold("1,5") returns -6,38643e+367 and not 1,5 strtold("1.5") returns 1,5 and not 1 strtold("1.500") returns 1 and not 1500 strtold("36.893.488.147.419.103.232") returns 1500 and not 3,68935e+19 Locale tests all OK =====FAIL: stdlib/tst-strtod3.out===== 0: got wrong results -2.5937e+4826, expected 0 =====FAIL: stdlib/tst-strtod4.out===== 0: got wrong results -6,38643e+367, expected 0 1: got wrong results 0, expected 1e+06 2: got wrong results 1e+06, expected 10 =====FAIL: stdlib/tst-strtod5i.out===== 0: got wrong results -6,38643e+367, expected 0 2: got wrong results 0, expected -0 4: got wrong results -0, expected 0 5: got wrong results 0, expected -0 6: got wrong results -0, expected 0 7: got wrong results 0, expected -0 8: got wrong results -0, expected 0 9: got wrong results 0, expected -0 10: got wrong results -0, expected 0 11: got wrong results 0, expected -0 12: got wrong results -0, expected 0 13: got wrong results 0, expected -0 14: got wrong results -0, expected 0 15: got wrong results 0, expected -0 16: got wrong results -0, expected 0 17: got wrong results 0, expected -0 18: got wrong results -0, expected 0 20: got wrong results 0, expected -0 22: got wrong results -0, expected 0 23: got wrong results 0, expected -0 24: got wrong results -0, expected 0 25: got wrong results 0, expected -0 26: got wrong results -0, expected 0 27: got wrong results 0, expected -0 Fixes commit 3fc063dee01da4f80920a14b7db637c8501d6fd4 ("Make __strtod_internal tests type-generic"). Suggested-by: Joseph Myers Reviewed-by: Carlos O'Donell (cherry picked from commit cc3e743fc09ee6fca45767629df9cbcbe1feba82) diff --git a/sysdeps/powerpc/powerpc64/le/Makefile b/sysdeps/powerpc/powerpc64/le/Makefile index 9d568d4f44..b77775cf95 100644 --- a/sysdeps/powerpc/powerpc64/le/Makefile +++ b/sysdeps/powerpc/powerpc64/le/Makefile @@ -129,6 +129,10 @@ CFLAGS-tst-strtod-round.c += $(type-float128-CFLAGS) CFLAGS-tst-wcstod-round.c += $(type-float128-CFLAGS) CFLAGS-tst-strtod-nan-locale.c += $(type-float128-CFLAGS) CFLAGS-tst-wcstod-nan-locale.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod1i.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod3.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod4.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod5i.c += $(type-float128-CFLAGS) CFLAGS-tst-strtod6.c += $(type-float128-CFLAGS) CFLAGS-tst-strfrom.c += $(type-float128-CFLAGS) CFLAGS-tst-strfrom-locale.c += $(type-float128-CFLAGS) commit 4a9b6cdc88335e2a7291418563073a58fe97346e Author: Joseph Myers Date: Fri Sep 20 23:23:13 2024 +0000 Make tst-strtod2 and tst-strtod5 type-generic Some of the strtod tests use type-generic machinery in tst-strtod.h to test the strto* functions for all floating types, while others only test double even when the tests are in fact meaningful for all floating types. Convert tst-strtod2 and tst-strtod5 to use the type-generic machinery so they test all floating types. I haven't tried to convert them to use newer test interfaces in other ways, just made the changes necessary to use the type-generic machinery. Tested for x86_64. (cherry picked from commit 8de031bcb9adfa736c0caed2c79d10947b8d8f48) diff --git a/stdlib/tst-strtod2.c b/stdlib/tst-strtod2.c index a7df82ebbd..2cb0953fa9 100644 --- a/stdlib/tst-strtod2.c +++ b/stdlib/tst-strtod2.c @@ -1,43 +1,61 @@ #include #include -struct test -{ - const char *str; - double result; - size_t offset; -} tests[] = -{ - { "0xy", 0.0, 1 }, - { "0x.y", 0.0, 1 }, - { "0x0.y", 0.0, 4 }, - { "0x.0y", 0.0, 4 }, - { ".y", 0.0, 0 }, - { "0.y", 0.0, 2 }, - { ".0y", 0.0, 2 } -}; +#include "tst-strtod.h" + +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +struct test_strto ## FSUF \ +{ \ + const char *str; \ + FTYPE result; \ + size_t offset; \ +} tests_strto ## FSUF[] = \ +{ \ + { "0xy", 0.0 ## LSUF, 1 }, \ + { "0x.y", 0.0 ## LSUF, 1 }, \ + { "0x0.y", 0.0 ## LSUF, 4 }, \ + { "0x.0y", 0.0 ## LSUF, 4 }, \ + { ".y", 0.0 ## LSUF, 0 }, \ + { "0.y", 0.0 ## LSUF, 2 }, \ + { ".0y", 0.0 ## LSUF, 2 } \ +}; \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + for (size_t i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = strto ## FSUF (tests_strto ## FSUF[i].str, &ep); \ + if (r != tests_strto ## FSUF[i].result) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", tests_strto ## FSUF[i].result); \ + printf ("test %zu r = %s, expect %s\n", i, buf1, buf2); \ + status = 1; \ + } \ + if (ep != tests_strto ## FSUF[i].str + tests_strto ## FSUF[i].offset) \ + { \ + printf ("test %zu strto" #FSUF \ + " parsed %tu characters, expected %zu\n", \ + i, ep - tests_strto ## FSUF[i].str, \ + tests_strto ## FSUF[i].offset); \ + status = 1; \ + } \ + } \ + return status; \ +} + +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) { - int status = 0; - for (size_t i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) - { - char *ep; - double r = strtod (tests[i].str, &ep); - if (r != tests[i].result) - { - printf ("test %zu r = %g, expect %g\n", i, r, tests[i].result); - status = 1; - } - if (ep != tests[i].str + tests[i].offset) - { - printf ("test %zu strtod parsed %tu characters, expected %zu\n", - i, ep - tests[i].str, tests[i].offset); - status = 1; - } - } - return status; + return STRTOD_TEST_FOREACH (test_strto); } #define TEST_FUNCTION do_test () diff --git a/stdlib/tst-strtod5.c b/stdlib/tst-strtod5.c index 29153ec005..7eb9b3a2d7 100644 --- a/stdlib/tst-strtod5.c +++ b/stdlib/tst-strtod5.c @@ -22,35 +22,75 @@ #include #include +#include "tst-strtod.h" + #define NBSP "\xc2\xa0" -static const struct -{ - const char *in; - double expected; -} tests[] = - { - { "0", 0.0 }, - { "000", 0.0 }, - { "-0", -0.0 }, - { "-000", -0.0 }, - { "0,", 0.0 }, - { "-0,", -0.0 }, - { "0,0", 0.0 }, - { "-0,0", -0.0 }, - { "0e-10", 0.0 }, - { "-0e-10", -0.0 }, - { "0,e-10", 0.0 }, - { "-0,e-10", -0.0 }, - { "0,0e-10", 0.0 }, - { "-0,0e-10", -0.0 }, - { "0e-1000000", 0.0 }, - { "-0e-1000000", -0.0 }, - { "0,0e-1000000", 0.0 }, - { "-0,0e-1000000", -0.0 }, - }; -#define NTESTS (sizeof (tests) / sizeof (tests[0])) +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static const struct \ +{ \ + const char *in; \ + FTYPE expected; \ +} tests_strto ## FSUF[] = \ + { \ + { "0", 0.0 ## LSUF }, \ + { "000", 0.0 ## LSUF }, \ + { "-0", -0.0 ## LSUF }, \ + { "-000", -0.0 ## LSUF }, \ + { "0,", 0.0 ## LSUF }, \ + { "-0,", -0.0 ## LSUF }, \ + { "0,0", 0.0 ## LSUF }, \ + { "-0,0", -0.0 ## LSUF }, \ + { "0e-10", 0.0 ## LSUF }, \ + { "-0e-10", -0.0 ## LSUF }, \ + { "0,e-10", 0.0 ## LSUF }, \ + { "-0,e-10", -0.0 ## LSUF }, \ + { "0,0e-10", 0.0 ## LSUF }, \ + { "-0,0e-10", -0.0 ## LSUF }, \ + { "0e-1000000", 0.0 ## LSUF }, \ + { "-0e-1000000", -0.0 ## LSUF }, \ + { "0,0e-1000000", 0.0 ## LSUF }, \ + { "-0,0e-1000000", -0.0 ## LSUF }, \ + }; \ + \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + \ + for (int i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = strto ## FSUF (tests_strto ## FSUF[i].in, &ep); \ + \ + if (*ep != '\0') \ + { \ + printf ("%d: got rest string \"%s\", expected \"\"\n", i, ep); \ + status = 1; \ + } \ + \ + if (r != tests_strto ## FSUF[i].expected \ + || (copysign ## CSUF (10.0 ## LSUF, r) \ + != copysign ## CSUF (10.0 ## LSUF, \ + tests_strto ## FSUF[i].expected))) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", \ + tests_strto ## FSUF[i].expected); \ + printf ("%d: got wrong results %s, expected %s\n", \ + i, buf1, buf2); \ + status = 1; \ + } \ + } \ + \ + return status; \ +} +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) @@ -61,29 +101,7 @@ do_test (void) return 1; } - int status = 0; - - for (int i = 0; i < NTESTS; ++i) - { - char *ep; - double r = strtod (tests[i].in, &ep); - - if (*ep != '\0') - { - printf ("%d: got rest string \"%s\", expected \"\"\n", i, ep); - status = 1; - } - - if (r != tests[i].expected - || copysign (10.0, r) != copysign (10.0, tests[i].expected)) - { - printf ("%d: got wrong results %g, expected %g\n", - i, r, tests[i].expected); - status = 1; - } - } - - return status; + return STRTOD_TEST_FOREACH (test_strto); } #include commit 8f40dfbe2ad8a4a2d2fc3bbe01d289037d113ced Author: Joseph Myers Date: Fri Sep 20 23:24:02 2024 +0000 Add more tests of strtod end pointer Although there are some tests in tst-strtod2 and tst-strtod3 for the end pointer provided by strtod when it doesn't parse the whole string, they aren't very thorough. Add tests of more such cases to tst-strtod2. Tested for x86_64. (cherry picked from commit b5d3737b305525315e0c7c93ca49eadc868eabd5) diff --git a/stdlib/tst-strtod2.c b/stdlib/tst-strtod2.c index 2cb0953fa9..c84bd792c1 100644 --- a/stdlib/tst-strtod2.c +++ b/stdlib/tst-strtod2.c @@ -1,3 +1,4 @@ +#include #include #include @@ -17,10 +18,46 @@ struct test_strto ## FSUF \ { "0x.0y", 0.0 ## LSUF, 4 }, \ { ".y", 0.0 ## LSUF, 0 }, \ { "0.y", 0.0 ## LSUF, 2 }, \ - { ".0y", 0.0 ## LSUF, 2 } \ + { ".0y", 0.0 ## LSUF, 2 }, \ + { "1.0e", 1.0 ## LSUF, 3 }, \ + { "1.0e+", 1.0 ## LSUF, 3 }, \ + { "1.0e-", 1.0 ## LSUF, 3 }, \ + { "1.0ex", 1.0 ## LSUF, 3 }, \ + { "1.0e+x", 1.0 ## LSUF, 3 }, \ + { "1.0e-x", 1.0 ## LSUF, 3 }, \ + { "0x1p", 1.0 ## LSUF, 3 }, \ + { "0x1p+", 1.0 ## LSUF, 3 }, \ + { "0x1p-", 1.0 ## LSUF, 3 }, \ + { "0x1px", 1.0 ## LSUF, 3 }, \ + { "0x1p+x", 1.0 ## LSUF, 3 }, \ + { "0x1p-x", 1.0 ## LSUF, 3 }, \ + { "INFx", INFINITY, 3 }, \ + { "infx", INFINITY, 3 }, \ + { "INFINITx", INFINITY, 3 }, \ + { "infinitx", INFINITY, 3 }, \ + { "INFINITYY", INFINITY, 8 }, \ + { "infinityy", INFINITY, 8 }, \ + { "NANx", NAN, 3 }, \ + { "nanx", NAN, 3 }, \ + { "NAN(", NAN, 3 }, \ + { "nan(", NAN, 3 }, \ + { "NAN(x", NAN, 3 }, \ + { "nan(x", NAN, 3 }, \ + { "NAN(x)y", NAN, 6 }, \ + { "nan(x)y", NAN, 6 }, \ + { "NAN(*)y", NAN, 3 }, \ + { "nan(*)y", NAN, 3 } \ }; \ \ static int \ +compare_strto ## FSUF (FTYPE x, FTYPE y) \ +{ \ + if (isnan (x) && isnan (y)) \ + return 1; \ + return x == y; \ +} \ + \ +static int \ test_strto ## FSUF (void) \ { \ int status = 0; \ @@ -30,7 +67,7 @@ test_strto ## FSUF (void) \ { \ char *ep; \ FTYPE r = strto ## FSUF (tests_strto ## FSUF[i].str, &ep); \ - if (r != tests_strto ## FSUF[i].result) \ + if (!compare_strto ## FSUF (r, tests_strto ## FSUF[i].result)) \ { \ char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ FTOSTR (buf1, sizeof (buf1), "%g", r); \ commit cc256952ecb07789c423dff9712eb7a38f80e963 Author: Joseph Myers Date: Fri Sep 20 23:24:45 2024 +0000 Add tests of more strtod special cases There is very little test coverage of inputs to strtod-family functions that don't contain anything that can be parsed as a number (one test of ".y" in tst-strtod2), and none that I can see of skipping initial whitespace. Add some tests of these things to tst-strtod2. Tested for x86_64. (cherry picked from commit 378039ca578c2ea93095a1e710d96f58c68a3997) diff --git a/stdlib/tst-strtod2.c b/stdlib/tst-strtod2.c index c84bd792c1..d00bc13323 100644 --- a/stdlib/tst-strtod2.c +++ b/stdlib/tst-strtod2.c @@ -31,6 +31,20 @@ struct test_strto ## FSUF \ { "0x1px", 1.0 ## LSUF, 3 }, \ { "0x1p+x", 1.0 ## LSUF, 3 }, \ { "0x1p-x", 1.0 ## LSUF, 3 }, \ + { "", 0.0 ## LSUF, 0 }, \ + { ".", 0.0 ## LSUF, 0 }, \ + { "-", 0.0 ## LSUF, 0 }, \ + { "-.", 0.0 ## LSUF, 0 }, \ + { ".e", 0.0 ## LSUF, 0 }, \ + { "-.e", 0.0 ## LSUF, 0 }, \ + { " \t", 0.0 ## LSUF, 0 }, \ + { " \t.", 0.0 ## LSUF, 0 }, \ + { " \t-", 0.0 ## LSUF, 0 }, \ + { " \t-.", 0.0 ## LSUF, 0 }, \ + { " \t.e", 0.0 ## LSUF, 0 }, \ + { " \t-.e", 0.0 ## LSUF, 0 }, \ + { " \t\f\r\n\v1", 1.0 ## LSUF, 7 }, \ + { " \t\f\r\n\v-1.5e2", -150.0 ## LSUF, 12 }, \ { "INFx", INFINITY, 3 }, \ { "infx", INFINITY, 3 }, \ { "INFINITx", INFINITY, 3 }, \ commit 5c06c6e0b5078ffb0aa0c09bac79f086145e0897 Author: H.J. Lu Date: Sat Sep 7 08:32:32 2024 -0700 libio: Set _vtable_offset before calling _IO_link_in [BZ #32148] Since _IO_vtable_offset is used to detect the old binaries, set it in _IO_old_file_init_internal before calling _IO_link_in which checks _IO_vtable_offset. Add a glibc 2.0 test with copy relocation on _IO_stderr_@GLIBC_2.0 to verify that fopen won't cause memory corruption. This fixes BZ #32148. Signed-off-by: H.J. Lu Reviewed-by: Noah Goldstein (cherry picked from commit 9dfea3de7f690bff70e3c6eb346b9ad082bb2e35) diff --git a/libio/Makefile b/libio/Makefile index 6a507b67ea..5292baa4e0 100644 --- a/libio/Makefile +++ b/libio/Makefile @@ -286,11 +286,18 @@ endif ifeq ($(build-shared),yes) aux += oldfileops oldstdfiles tests += \ + tst-fopen-compat \ tst-stderr-compat \ # tests tests-2.0 += \ + tst-fopen-compat \ tst-stderr-compat \ # tests-2.0 + +tst-fopen-compat-ARGS = tst-fopen-compat.c +# Disable PIE to trigger copy relocation. +CFLAGS-tst-fopen-compat.c += -fno-pie +tst-fopen-compat-no-pie = yes endif shared-only-routines = oldiofopen oldiofdopen oldiofclose oldfileops \ diff --git a/libio/oldfileops.c b/libio/oldfileops.c index 97148dba9b..8f775c9094 100644 --- a/libio/oldfileops.c +++ b/libio/oldfileops.c @@ -103,9 +103,11 @@ _IO_old_file_init_internal (struct _IO_FILE_plus *fp) fp->file._old_offset = _IO_pos_BAD; fp->file._flags |= CLOSED_FILEBUF_FLAGS; - _IO_link_in (fp); + /* NB: _vtable_offset must be set before calling _IO_link_in since + _IO_vtable_offset is used to detect the old binaries. */ fp->file._vtable_offset = ((int) sizeof (struct _IO_FILE) - (int) sizeof (struct _IO_FILE_complete)); + _IO_link_in (fp); fp->file._fileno = -1; if (&_IO_stdin_used != NULL || !_IO_legacy_file ((FILE *) fp)) diff --git a/libio/tst-fopen-compat.c b/libio/tst-fopen-compat.c new file mode 100644 index 0000000000..f241b61043 --- /dev/null +++ b/libio/tst-fopen-compat.c @@ -0,0 +1,85 @@ +/* Verify that fopen works with copy relocation on _IO_stderr_ in binaries + linked with glibc 2.0. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if TEST_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) +# define _LIBC +# define _IO_USE_OLD_IO_FILE +# include +# include +# include +# include +# include +# include + +struct _IO_jump_t; + +struct _IO_FILE_plus +{ + FILE file; + const struct _IO_jump_t *vtable; +}; + +extern struct _IO_FILE_plus _IO_stderr_; +compat_symbol_reference (libc, _IO_stderr_, _IO_stderr_, GLIBC_2_0); +compat_symbol_reference (libc, fopen, fopen, GLIBC_2_0); +compat_symbol_reference (libc, fclose, fclose, GLIBC_2_0); + +static int +do_test (int argc, char *argv[]) +{ + static char filename[PATH_MAX + 1]; + struct stat st; + char *name = NULL; + int i; + + /* Try to trigger copy relocation. */ + TEST_VERIFY_EXIT (_IO_stderr_.file._fileno == STDERR_FILENO); + + for (i = 1; i < argc; i++) + { + name = argv[i]; + if (stat (name, &st) == 0) + { + TEST_VERIFY_EXIT (strlen (name) <= PATH_MAX); + break; + } + } + TEST_VERIFY_EXIT (name != NULL); + + strcpy (filename, name); + FILE *fp = fopen (filename, "r"); + TEST_VERIFY_EXIT (strcmp (filename, name) == 0); + TEST_VERIFY_EXIT (fp != NULL); + TEST_VERIFY_EXIT (fclose (fp) == 0); + return 0; +} +#else +# include + +static int +do_test (int argc, char *argv[]) +{ + return EXIT_UNSUPPORTED; +} +#endif + +#define TEST_FUNCTION_ARGV do_test +#include commit 85e5850f2f4ea5f304be5356ecb7a15998766a4e Author: Joseph Myers Date: Fri Sep 20 23:25:32 2024 +0000 Make tst-strtod-underflow type-generic The test tst-strtod-underflow covers various edge cases close to the underflow threshold for strtod (especially cases where underflow on architectures with after-rounding tininess detection depends on the rounding mode). Make it use the type-generic machinery, with corresponding test inputs for each supported floating-point format, so that other functions in the strtod family are tested for underflow edge cases as well. Tested for x86_64. (cherry picked from commit 94ca2c0894f0e1b62625c369cc598a2b9236622c) diff --git a/stdlib/tst-strtod-underflow.c b/stdlib/tst-strtod-underflow.c index a5ced18599..8598b95b6d 100644 --- a/stdlib/tst-strtod-underflow.c +++ b/stdlib/tst-strtod-underflow.c @@ -17,6 +17,10 @@ License along with the GNU C Library; if not, see . */ +/* Defining _LIBC_TEST ensures long double math functions are + declared in the headers. */ +#define _LIBC_TEST 1 +#define __STDC_WANT_IEC_60559_TYPES_EXT__ #include #include #include @@ -25,6 +29,60 @@ #include #include +#include "tst-strtod.h" + +/* Logic for selecting between tests for different formats is as in + tst-strtod-skeleton.c, but here it is selecting string inputs with + different underflow properties, rather than generated test + data. */ + +#define _CONCAT(a, b) a ## b +#define CONCAT(a, b) _CONCAT (a, b) + +#define MEMBER(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ + const char *s_ ## FSUF; + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +# define CHOOSE_ld(f,d,...) d +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && LDBL_MIN_EXP == -16381 +# define CHOOSE_ld(f,d,ld64i,...) ld64i +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && LDBL_MIN_EXP == -16382 +# define CHOOSE_ld(f,d,ld64i,ld64m,...) ld64m +#elif LDBL_MANT_DIG == 106 && LDBL_MAX_EXP == 1024 +# define CHOOSE_ld(f,d,ld64i,ld64m,ld106,...) ld106 +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 +# define CHOOSE_ld(f,d,ld64i,ld64m,ld106,ld113,...) ld113 +#else +# error "unknown long double format" +#endif + +#define CHOOSE_f(f,...) f +#define CHOOSE_f32(f,...) f +#define CHOOSE_d(f,d,...) d +#define CHOOSE_f64(f,d,...) d +#define CHOOSE_f32x(f,d,...) d +#define CHOOSE_f128(f,d,ld64i,ld64m,ld106,ld113,...) ld113 + +#if __HAVE_FLOAT64X +# if FLT64X_MANT_DIG == 113 && FLT64X_MAX_EXP == 16384 +# define CHOOSE_f64x(f,d,ld64i,ld64m,ld106,ld113,...) ld113 +# elif (FLT64X_MANT_DIG == 64 \ + && FLT64X_MAX_EXP == 16384 \ + && FLT64X_MIN_EXP == -16381) +# define CHOOSE_f64x(f,d,ld64i,...) ld64i +# else +# error "unknown _Float64x format" +# endif +#endif + +#define _XNTRY(FSUF, FTYPE, FTOSTR, LSUF, CSUF, ...) \ + CHOOSE_ ## FSUF (__VA_ARGS__), +#define XNTRY(...) \ + GEN_TEST_STRTOD_FOREACH (_XNTRY, __VA_ARGS__) + +#define TEST(f, d, ld64i, ld64m, ld106, ld113, u) \ + { XNTRY(f, d, ld64i, ld64m, ld106, ld113) u } + enum underflow_case { /* Result is exact or outside the subnormal range. */ @@ -55,38 +113,194 @@ enum underflow_case struct test { - const char *s; + GEN_TEST_STRTOD_FOREACH (MEMBER) enum underflow_case c; }; static const struct test tests[] = { - { "0x1p-1022", UNDERFLOW_NONE }, - { "-0x1p-1022", UNDERFLOW_NONE }, - { "0x0p-10000000000000000000000000", UNDERFLOW_NONE }, - { "-0x0p-10000000000000000000000000", UNDERFLOW_NONE }, - { "0x1p-10000000000000000000000000", UNDERFLOW_ALWAYS }, - { "-0x1p-10000000000000000000000000", UNDERFLOW_ALWAYS }, - { "0x1.000000000000000000001p-1022", UNDERFLOW_NONE }, - { "-0x1.000000000000000000001p-1022", UNDERFLOW_NONE }, - { "0x1p-1075", UNDERFLOW_ALWAYS }, - { "-0x1p-1075", UNDERFLOW_ALWAYS }, - { "0x1p-1023", UNDERFLOW_NONE }, - { "-0x1p-1023", UNDERFLOW_NONE }, - { "0x1p-1074", UNDERFLOW_NONE }, - { "-0x1p-1074", UNDERFLOW_NONE }, - { "0x1.ffffffffffffep-1023", UNDERFLOW_NONE }, - { "-0x1.ffffffffffffep-1023", UNDERFLOW_NONE }, - { "0x1.fffffffffffffp-1023", UNDERFLOW_ALWAYS }, - { "-0x1.fffffffffffffp-1023", UNDERFLOW_ALWAYS }, - { "0x1.fffffffffffff0001p-1023", UNDERFLOW_EXCEPT_UPWARD }, - { "-0x1.fffffffffffff0001p-1023", UNDERFLOW_EXCEPT_DOWNWARD }, - { "0x1.fffffffffffff7fffp-1023", UNDERFLOW_EXCEPT_UPWARD }, - { "-0x1.fffffffffffff7fffp-1023", UNDERFLOW_EXCEPT_DOWNWARD }, - { "0x1.fffffffffffff8p-1023", UNDERFLOW_ONLY_DOWNWARD_ZERO }, - { "-0x1.fffffffffffff8p-1023", UNDERFLOW_ONLY_UPWARD_ZERO }, - { "0x1.fffffffffffffffffp-1023", UNDERFLOW_ONLY_DOWNWARD_ZERO }, - { "-0x1.fffffffffffffffffp-1023", UNDERFLOW_ONLY_UPWARD_ZERO }, + TEST ("0x1p-126", + "0x1p-1022", + "0x1p-16382", + "0x1p-16383", + "0x1p-969", + "0x1p-16382", + UNDERFLOW_NONE), + TEST ("-0x1p-126", + "-0x1p-1022", + "-0x1p-16382", + "-0x1p-16383", + "-0x1p-969", + "-0x1p-16382", + UNDERFLOW_NONE), + TEST ("0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + UNDERFLOW_NONE), + TEST ("-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + UNDERFLOW_NONE), + TEST ("0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + UNDERFLOW_ALWAYS), + TEST ("-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + UNDERFLOW_ALWAYS), + TEST ("0x1.000000000000000000001p-126", + "0x1.000000000000000000001p-1022", + "0x1.000000000000000000001p-16382", + "0x1.000000000000000000001p-16383", + "0x1.000000000000000000001p-969", + "0x1.00000000000000000000000000000000000000001p-16382", + UNDERFLOW_NONE), + TEST ("-0x1.000000000000000000001p-126", + "-0x1.000000000000000000001p-1022", + "-0x1.000000000000000000001p-16382", + "-0x1.000000000000000000001p-16383", + "-0x1.000000000000000000001p-969", + "-0x1.00000000000000000000000000000000000000001p-16382", + UNDERFLOW_NONE), + TEST ("0x1p-150", + "0x1p-1075", + "0x1p-16446", + "0x1p-16447", + "0x1p-1075", + "0x1p-16495", + UNDERFLOW_ALWAYS), + TEST ("-0x1p-150", + "-0x1p-1075", + "-0x1p-16446", + "-0x1p-16447", + "-0x1p-1075", + "-0x1p-16495", + UNDERFLOW_ALWAYS), + TEST ("0x1p-127", + "0x1p-1023", + "0x1p-16383", + "0x1p-16384", + "0x1p-970", + "0x1p-16383", + UNDERFLOW_NONE), + TEST ("-0x1p-127", + "-0x1p-1023", + "-0x1p-16383", + "-0x1p-16384", + "-0x1p-970", + "-0x1p-16383", + UNDERFLOW_NONE), + TEST ("0x1p-149", + "0x1p-1074", + "0x1p-16445", + "0x1p-16446", + "0x1p-1074", + "0x1p-16494", + UNDERFLOW_NONE), + TEST ("-0x1p-149", + "-0x1p-1074", + "-0x1p-16445", + "-0x1p-16446", + "-0x1p-1074", + "-0x1p-16494", + UNDERFLOW_NONE), + TEST ("0x1.fffffcp-127", + "0x1.ffffffffffffep-1023", + "0x1.fffffffffffffffcp-16383", + "0x1.fffffffffffffffcp-16384", + "0x1.ffffffffffffffffffffffffffp-970", + "0x1.fffffffffffffffffffffffffffep-16383", + UNDERFLOW_NONE), + TEST ("-0x1.fffffcp-127", + "-0x1.ffffffffffffep-1023", + "-0x1.fffffffffffffffcp-16383", + "-0x1.fffffffffffffffcp-16384", + "-0x1.ffffffffffffffffffffffffffp-970", + "-0x1.fffffffffffffffffffffffffffep-16383", + UNDERFLOW_NONE), + TEST ("0x1.fffffep-127", + "0x1.fffffffffffffp-1023", + "0x1.fffffffffffffffep-16383", + "0x1.fffffffffffffffep-16384", + "0x1.ffffffffffffffffffffffffff8p-970", + "0x1.ffffffffffffffffffffffffffffp-16383", + UNDERFLOW_ALWAYS), + TEST ("-0x1.fffffep-127", + "-0x1.fffffffffffffp-1023", + "-0x1.fffffffffffffffep-16383", + "-0x1.fffffffffffffffep-16384", + "-0x1.ffffffffffffffffffffffffff8p-970", + "-0x1.ffffffffffffffffffffffffffffp-16383", + UNDERFLOW_ALWAYS), + TEST ("0x1.fffffe0001p-127", + "0x1.fffffffffffff0001p-1023", + "0x1.fffffffffffffffe0001p-16383", + "0x1.fffffffffffffffe0001p-16384", + "0x1.ffffffffffffffffffffffffff80001p-970", + "0x1.ffffffffffffffffffffffffffff0001p-16383", + UNDERFLOW_EXCEPT_UPWARD), + TEST ("-0x1.fffffe0001p-127", + "-0x1.fffffffffffff0001p-1023", + "-0x1.fffffffffffffffe0001p-16383", + "-0x1.fffffffffffffffe0001p-16384", + "-0x1.ffffffffffffffffffffffffff80001p-970", + "-0x1.ffffffffffffffffffffffffffff0001p-16383", + UNDERFLOW_EXCEPT_DOWNWARD), + TEST ("0x1.fffffeffffp-127", + "0x1.fffffffffffff7fffp-1023", + "0x1.fffffffffffffffeffffp-16383", + "0x1.fffffffffffffffeffffp-16384", + "0x1.ffffffffffffffffffffffffffbffffp-970", + "0x1.ffffffffffffffffffffffffffff7fffp-16383", + UNDERFLOW_EXCEPT_UPWARD), + TEST ("-0x1.fffffeffffp-127", + "-0x1.fffffffffffff7fffp-1023", + "-0x1.fffffffffffffffeffffp-16383", + "-0x1.fffffffffffffffeffffp-16384", + "-0x1.ffffffffffffffffffffffffffbffffp-970", + "-0x1.ffffffffffffffffffffffffffff7fffp-16383", + UNDERFLOW_EXCEPT_DOWNWARD), + TEST ("0x1.ffffffp-127", + "0x1.fffffffffffff8p-1023", + "0x1.ffffffffffffffffp-16383", + "0x1.ffffffffffffffffp-16384", + "0x1.ffffffffffffffffffffffffffcp-970", + "0x1.ffffffffffffffffffffffffffff8p-16383", + UNDERFLOW_ONLY_DOWNWARD_ZERO), + TEST ("-0x1.ffffffp-127", + "-0x1.fffffffffffff8p-1023", + "-0x1.ffffffffffffffffp-16383", + "-0x1.ffffffffffffffffp-16384", + "-0x1.ffffffffffffffffffffffffffcp-970", + "-0x1.ffffffffffffffffffffffffffff8p-16383", + UNDERFLOW_ONLY_UPWARD_ZERO), + TEST ("0x1.ffffffffffp-127", + "0x1.fffffffffffffffffp-1023", + "0x1.ffffffffffffffffffffp-16383", + "0x1.ffffffffffffffffffffp-16384", + "0x1.ffffffffffffffffffffffffffffffp-970", + "0x1.ffffffffffffffffffffffffffffffffp-16383", + UNDERFLOW_ONLY_DOWNWARD_ZERO), + TEST ("-0x1.ffffffffffp-127", + "-0x1.fffffffffffffffffp-1023", + "-0x1.ffffffffffffffffffffp-16383", + "-0x1.ffffffffffffffffffffp-16384", + "-0x1.ffffffffffffffffffffffffffffffp-970", + "-0x1.ffffffffffffffffffffffffffffffffp-16383", + UNDERFLOW_ONLY_UPWARD_ZERO), }; /* Return whether to expect underflow from a particular testcase, in a @@ -133,39 +347,62 @@ static bool support_underflow_exception = false; volatile double d = DBL_MIN; volatile double dd; -static int -test_in_one_mode (const char *s, enum underflow_case c, int rm, - const char *mode_name) +static bool +test_got_fe_underflow (void) { - int result = 0; - feclearexcept (FE_ALL_EXCEPT); - errno = 0; - double d = strtod (s, NULL); - int got_errno = errno; #ifdef FE_UNDERFLOW - bool got_fe_underflow = fetestexcept (FE_UNDERFLOW) != 0; + return fetestexcept (FE_UNDERFLOW) != 0; #else - bool got_fe_underflow = false; + return false; #endif - printf ("strtod (%s) (%s) returned %a, errno = %d, %sunderflow exception\n", - s, mode_name, d, got_errno, got_fe_underflow ? "" : "no "); - bool this_expect_underflow = expect_underflow (c, rm); - if (got_errno != 0 && got_errno != ERANGE) - { - puts ("FAIL: errno neither 0 nor ERANGE"); - result = 1; - } - else if (this_expect_underflow != (errno == ERANGE)) - { - puts ("FAIL: underflow from errno differs from expectations"); - result = 1; - } - if (support_underflow_exception && got_fe_underflow != this_expect_underflow) - { - puts ("FAIL: underflow from exceptions differs from expectations"); - result = 1; - } - return result; +} + +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static int \ +test_strto ## FSUF (int i, int rm, const char *mode_name) \ +{ \ + const char *s = tests[i].s_ ## FSUF; \ + enum underflow_case c = tests[i].c; \ + int result = 0; \ + feclearexcept (FE_ALL_EXCEPT); \ + errno = 0; \ + FTYPE d = strto ## FSUF (s, NULL); \ + int got_errno = errno; \ + bool got_fe_underflow = test_got_fe_underflow (); \ + char buf[FSTRLENMAX]; \ + FTOSTR (buf, sizeof (buf), "%a", d); \ + printf ("strto" #FSUF \ + " (%s) (%s) returned %s, errno = %d, " \ + "%sunderflow exception\n", \ + s, mode_name, buf, got_errno, \ + got_fe_underflow ? "" : "no "); \ + bool this_expect_underflow = expect_underflow (c, rm); \ + if (got_errno != 0 && got_errno != ERANGE) \ + { \ + puts ("FAIL: errno neither 0 nor ERANGE"); \ + result = 1; \ + } \ + else if (this_expect_underflow != (errno == ERANGE)) \ + { \ + puts ("FAIL: underflow from errno differs from expectations"); \ + result = 1; \ + } \ + if (support_underflow_exception \ + && got_fe_underflow != this_expect_underflow) \ + { \ + puts ("FAIL: underflow from exceptions " \ + "differs from expectations"); \ + result = 1; \ + } \ + return result; \ +} + +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) + +static int +test_in_one_mode (size_t i, int rm, const char *mode_name) +{ + return STRTOD_TEST_FOREACH (test_strto, i, rm, mode_name); } static int @@ -191,12 +428,12 @@ do_test (void) #endif for (size_t i = 0; i < sizeof (tests) / sizeof (tests[0]); i++) { - result |= test_in_one_mode (tests[i].s, tests[i].c, fe_tonearest, + result |= test_in_one_mode (i, fe_tonearest, "default rounding mode"); #ifdef FE_DOWNWARD if (!fesetround (FE_DOWNWARD)) { - result |= test_in_one_mode (tests[i].s, tests[i].c, FE_DOWNWARD, + result |= test_in_one_mode (i, FE_DOWNWARD, "FE_DOWNWARD"); fesetround (save_round_mode); } @@ -204,7 +441,7 @@ do_test (void) #ifdef FE_TOWARDZERO if (!fesetround (FE_TOWARDZERO)) { - result |= test_in_one_mode (tests[i].s, tests[i].c, FE_TOWARDZERO, + result |= test_in_one_mode (i, FE_TOWARDZERO, "FE_TOWARDZERO"); fesetround (save_round_mode); } @@ -212,7 +449,7 @@ do_test (void) #ifdef FE_UPWARD if (!fesetround (FE_UPWARD)) { - result |= test_in_one_mode (tests[i].s, tests[i].c, FE_UPWARD, + result |= test_in_one_mode (i, FE_UPWARD, "FE_UPWARD"); fesetround (save_round_mode); } commit 3a34851103d554b2c9b269ecae111648f9d7bb6d Author: Florian Weimer Date: Mon Oct 28 14:45:30 2024 +0100 elf: Change ldconfig auxcache magic number (bug 32231) In commit c628c2296392ed3bf2cb8d8470668e64fe53389f (elf: Remove ldconfig kernel version check), the layout of auxcache entries changed because the osversion field was removed from struct aux_cache_file_entry. However, AUX_CACHEMAGIC was not changed, so existing files are still used, potentially leading to unintended ldconfig behavior. This commit changes AUX_CACHEMAGIC, so that the file is regenerated. Reported-by: DJ Delorie Reviewed-by: Adhemerval Zanella (cherry picked from commit 0a536f6e2f76e3ef581b3fd9af1e5cf4ddc7a5a2) diff --git a/NEWS b/NEWS index 9033335db1..928c516bec 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,7 @@ The following bugs are resolved with this release: [32026] strerror/strsignal TLS not handled correctly for secondary namespaces [32052] Name space violation in fortify wrappers [32137] libio: Attempt wide backup free only for non-legacy code + [32231] elf: Change ldconfig auxcache magic number Version 2.40 diff --git a/elf/cache.c b/elf/cache.c index 8a618e11fa..62d681df42 100644 --- a/elf/cache.c +++ b/elf/cache.c @@ -820,7 +820,7 @@ struct aux_cache_entry struct aux_cache_entry *next; }; -#define AUX_CACHEMAGIC "glibc-ld.so.auxcache-1.0" +#define AUX_CACHEMAGIC "glibc-ld.so.auxcache-2.0" struct aux_cache_file_entry { commit 234458024300f0b4b430785999f33eddf059af6a Author: Michael Karcher Date: Sun Jul 28 15:30:57 2024 +0200 Mitigation for "clone on sparc might fail with -EFAULT for no valid reason" (bz 31394) It seems the kernel can not deal with uncommitted stack space in the area intended for the register window when executing the clone() system call. So create a nested frame (proxy for the kernel frame) and flush it from the processor to memory to force committing pages to the stack before invoking the system call. Bug: https://www.mail-archive.com/debian-glibc@lists.debian.org/msg62592.html Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=31394 See-also: https://lore.kernel.org/sparclinux/62f9be9d-a086-4134-9a9f-5df8822708af@mkarcher.dialup.fu-berlin.de/ Signed-off-by: Michael Karcher Reviewed-by: DJ Delorie (cherry picked from commit faeaa3bc9f76030b9882ccfdee232fc0ca6dcb06) diff --git a/NEWS b/NEWS index 928c516bec..dc815fb6d3 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,7 @@ The following bugs are resolved with this release: [27821] ungetc: Fix backup buffer leak on program exit [30081] resolv: Do not wait for non-existing second DNS response after error + [31394] clone on sparc might fail with -EFAULT for no valid reason [31717] elf: Avoid re-initializing already allocated TLS in dlopen [31890] resolv: Allow short error responses to match any DNS query [31968] mremap implementation in C does not handle arguments correctly diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S index 748d25fcfe..c9cf9bb055 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S @@ -28,6 +28,9 @@ .text ENTRY (__clone) save %sp,-96,%sp + save %sp,-96,%sp + flushw + restore cfi_def_cfa_register(%fp) cfi_window_save cfi_register(%o7, %i7) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S index e5ff2cf1a0..370d51fda2 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S @@ -32,6 +32,9 @@ ENTRY (__clone) save %sp, -192, %sp + save %sp, -192, %sp + flushw + restore cfi_def_cfa_register(%fp) cfi_window_save cfi_register(%o7, %i7) commit efb710034e4c5e734d100cc4ef1b1e27d4315825 Author: Adhemerval Zanella Date: Mon Sep 2 16:58:51 2024 -0300 linux: sparc: Fix clone for LEON/sparcv8 (BZ 31394) The sparc clone mitigation (faeaa3bc9f76030) added the use of flushw, which is not support by LEON/sparcv8. As discussed on the libc-alpha, 'ta 3' is a working alternative [1]. [1] https://sourceware.org/pipermail/libc-alpha/2024-August/158905.html Checked with a build for sparcv8-linux-gnu targetting leon. Acked-by: John Paul Adrian Glaubitz (cherry picked from commit 5e8cfc5d625e6dd000a0371d21d792836ea7951a) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S index c9cf9bb055..c84244f56b 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S @@ -29,7 +29,11 @@ ENTRY (__clone) save %sp,-96,%sp save %sp,-96,%sp +#ifdef __sparcv9 flushw +#else + ta 3 +#endif restore cfi_def_cfa_register(%fp) cfi_window_save commit 626c048f32a979f77662bdcb1cca477c11d3f9c1 Author: Aurelien Jarno Date: Sun Nov 10 10:50:34 2024 +0100 elf: handle addition overflow in _dl_find_object_update_1 [BZ #32245] The remaining_to_add variable can be 0 if (current_used + count) wraps, This is caught by GCC 14+ on hppa, which determines from there that target_seg could be be NULL when remaining_to_add is zero, which in turns causes a -Wstringop-overflow warning: In file included from ../include/atomic.h:49, from dl-find_object.c:20: In function '_dlfo_update_init_seg', inlined from '_dl_find_object_update_1' at dl-find_object.c:689:30, inlined from '_dl_find_object_update' at dl-find_object.c:805:13: ../sysdeps/unix/sysv/linux/hppa/atomic-machine.h:44:4: error: '__atomic_store_4' writing 4 bytes into a region of size 0 overflows the destination [-Werror=stringop-overflow=] 44 | __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dl-find_object.c:644:3: note: in expansion of macro 'atomic_store_relaxed' 644 | atomic_store_relaxed (&seg->size, new_seg_size); | ^~~~~~~~~~~~~~~~~~~~ In function '_dl_find_object_update': cc1: note: destination object is likely at address zero In practice, this is not possible as it represent counts of link maps. Link maps have sizes larger than 1 byte, so the sum of any two link map counts will always fit within a size_t without wrapping around. This patch therefore adds a check on remaining_to_add == 0 and tell GCC that this can not happen using __builtin_unreachable. Thanks to Andreas Schwab for the investigation. Closes: BZ #32245 Signed-off-by: Aurelien Jarno Tested-by: John David Anglin Reviewed-by: Florian Weimer (cherry picked from commit 6c915c73d08028987232f6dc718f218c61113240) diff --git a/NEWS b/NEWS index dc815fb6d3..bd0b3bd66a 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,7 @@ The following bugs are resolved with this release: [32052] Name space violation in fortify wrappers [32137] libio: Attempt wide backup free only for non-legacy code [32231] elf: Change ldconfig auxcache magic number + [32245] glibc -Wstringop-overflow= build failure on hppa Version 2.40 diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c index 449302eda3..ae18b438d3 100644 --- a/elf/dl-find_object.c +++ b/elf/dl-find_object.c @@ -662,6 +662,14 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count) = _dlfo_loaded_mappings[!active_idx]; size_t remaining_to_add = current_used + count; + /* remaining_to_add can be 0 if (current_used + count) wraps, but in practice + this is not possible as it represent counts of link maps. Link maps have + sizes larger than 1 byte, so the sum of any two link map counts will + always fit within a size_t without wrapping around. This check ensures + that target_seg is not erroneously considered potentially NULL by GCC. */ + if (remaining_to_add == 0) + __builtin_unreachable (); + /* Ensure that the new segment chain has enough space. */ { size_t new_allocated commit 9b9545ba27613fa41efdfa7965b6fc580bf1b919 Author: Michael Jeanson Date: Thu Nov 7 22:23:49 2024 +0100 nptl: initialize rseq area prior to registration Per the rseq syscall documentation, 3 fields are required to be initialized by userspace prior to registration, they are 'cpu_id', 'rseq_cs' and 'flags'. Since we have no guarantee that 'struct pthread' is cleared on all architectures, explicitly set those 3 fields prior to registration. Signed-off-by: Michael Jeanson Reviewed-by: Florian Weimer (cherry picked from commit 97f60abd25628425971f07e9b0e7f8eec0741235) diff --git a/nptl/descr.h b/nptl/descr.h index 8cef95810c..c4bdd7757a 100644 --- a/nptl/descr.h +++ b/nptl/descr.h @@ -414,6 +414,8 @@ struct pthread { uint32_t cpu_id_start; uint32_t cpu_id; + uint64_t rseq_cs; + uint32_t flags; }; char pad[32]; /* Original rseq area size. */ } rseq_area __attribute__ ((aligned (32))); diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h index 7ea935b4ad..37a8f630b6 100644 --- a/sysdeps/unix/sysv/linux/rseq-internal.h +++ b/sysdeps/unix/sysv/linux/rseq-internal.h @@ -51,11 +51,21 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) /* The initial implementation used only 20 bytes out of 32, but still expected size 32. */ size = RSEQ_AREA_SIZE_INITIAL; + + /* Initialize the rseq fields that are read by the kernel on + registration, there is no guarantee that struct pthread is + cleared on all architectures. */ + THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); + THREAD_SETMEM (self, rseq_area.rseq_cs, 0); + THREAD_SETMEM (self, rseq_area.flags, 0); + int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area, size, 0, RSEQ_SIG); if (!INTERNAL_SYSCALL_ERROR_P (ret)) return true; } + /* When rseq is disabled by tunables or the registration fails, inform + userspace by setting 'cpu_id' to RSEQ_CPU_ID_REGISTRATION_FAILED. */ THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); return false; } commit 091dd12831792cef16eee24fe240c73a25b47a1d Author: Michael Jeanson Date: Wed Nov 20 14:15:42 2024 -0500 nptl: initialize cpu_id_start prior to rseq registration When adding explicit initialization of rseq fields prior to registration, I glossed over the fact that 'cpu_id_start' is also documented as initialized by user-space. While current kernels don't validate the content of this field on registration, future ones could. Signed-off-by: Michael Jeanson Reviewed-by: Mathieu Desnoyers (cherry picked from commit d9f40387d3305d97e30a8cf8724218c42a63680a) diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h index 37a8f630b6..ef3eab1fef 100644 --- a/sysdeps/unix/sysv/linux/rseq-internal.h +++ b/sysdeps/unix/sysv/linux/rseq-internal.h @@ -56,6 +56,7 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) registration, there is no guarantee that struct pthread is cleared on all architectures. */ THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); + THREAD_SETMEM (self, rseq_area.cpu_id_start, 0); THREAD_SETMEM (self, rseq_area.rseq_cs, 0); THREAD_SETMEM (self, rseq_area.flags, 0); commit c6cdab1e01bc11bc4036dc5b1be6086f6259c123 Author: Sam James Date: Mon Dec 9 23:11:25 2024 +0000 malloc: add indirection for malloc(-like) functions in tests [BZ #32366] GCC 15 introduces allocation dead code removal (DCE) for PR117370 in r15-5255-g7828dc070510f8. This breaks various glibc tests which want to assert various properties of the allocator without doing anything obviously useful with the allocated memory. Alexander Monakov rightly pointed out that we can and should do better than passing -fno-malloc-dce to paper over the problem. Not least because GCC 14 already does such DCE where there's no testing of malloc's return value against NULL, and LLVM has such optimisations too. Handle this by providing malloc (and friends) wrappers with a volatile function pointer to obscure that we're calling malloc (et. al) from the compiler. Reviewed-by: Paul Eggert (cherry picked from commit a9944a52c967ce76a5894c30d0274b824df43c7a) diff --git a/malloc/tst-aligned-alloc.c b/malloc/tst-aligned-alloc.c index 91167d1392..b0f05a8fec 100644 --- a/malloc/tst-aligned-alloc.c +++ b/malloc/tst-aligned-alloc.c @@ -25,6 +25,8 @@ #include #include +#include "tst-malloc-aux.h" + static int do_test (void) { diff --git a/malloc/tst-compathooks-off.c b/malloc/tst-compathooks-off.c index d0106f3fb7..4cce6e5a80 100644 --- a/malloc/tst-compathooks-off.c +++ b/malloc/tst-compathooks-off.c @@ -25,6 +25,8 @@ #include #include +#include "tst-malloc-aux.h" + extern void (*volatile __free_hook) (void *, const void *); extern void *(*volatile __malloc_hook)(size_t, const void *); extern void *(*volatile __realloc_hook)(void *, size_t, const void *); diff --git a/malloc/tst-malloc-aux.h b/malloc/tst-malloc-aux.h new file mode 100644 index 0000000000..54908b4a24 --- /dev/null +++ b/malloc/tst-malloc-aux.h @@ -0,0 +1,41 @@ +/* Wrappers for malloc-like functions to allow testing the implementation + without optimization. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + +#ifndef TST_MALLOC_AUX_H +#define TST_MALLOC_AUX_H + +#include +#include + +static void *(*volatile aligned_alloc_indirect)(size_t, size_t) = aligned_alloc; +static void *(*volatile calloc_indirect)(size_t, size_t) = calloc; +static void *(*volatile malloc_indirect)(size_t) = malloc; +static void *(*volatile realloc_indirect)(void*, size_t) = realloc; + +#undef aligned_alloc +#undef calloc +#undef malloc +#undef realloc + +#define aligned_alloc aligned_alloc_indirect +#define calloc calloc_indirect +#define malloc malloc_indirect +#define realloc realloc_indirect + +#endif /* TST_MALLOC_AUX_H */ diff --git a/malloc/tst-malloc-check.c b/malloc/tst-malloc-check.c index fde8863ad7..cc88bff3b3 100644 --- a/malloc/tst-malloc-check.c +++ b/malloc/tst-malloc-check.c @@ -20,6 +20,8 @@ #include #include +#include "tst-malloc-aux.h" + static int errors = 0; static void diff --git a/malloc/tst-malloc-too-large.c b/malloc/tst-malloc-too-large.c index 8e9e0d5fa2..2b91377e54 100644 --- a/malloc/tst-malloc-too-large.c +++ b/malloc/tst-malloc-too-large.c @@ -43,6 +43,7 @@ #include #include +#include "tst-malloc-aux.h" /* This function prepares for each 'too-large memory allocation' test by performing a small successful malloc/free and resetting errno prior to diff --git a/malloc/tst-malloc.c b/malloc/tst-malloc.c index f7a6e4654c..68af399022 100644 --- a/malloc/tst-malloc.c +++ b/malloc/tst-malloc.c @@ -22,6 +22,8 @@ #include #include +#include "tst-malloc-aux.h" + static int errors = 0; static void diff --git a/malloc/tst-realloc.c b/malloc/tst-realloc.c index f50499ecb1..74a28fb45e 100644 --- a/malloc/tst-realloc.c +++ b/malloc/tst-realloc.c @@ -23,6 +23,8 @@ #include #include +#include "tst-malloc-aux.h" + static int do_test (void) { diff --git a/support/support.h b/support/support.h index ba21ec9b5a..1a77f79793 100644 --- a/support/support.h +++ b/support/support.h @@ -113,7 +113,7 @@ void *xposix_memalign (size_t alignment, size_t n) __attribute_malloc__ __attribute_alloc_align__ ((1)) __attribute_alloc_size__ ((2)) __attr_dealloc_free __returns_nonnull; char *xasprintf (const char *format, ...) - __attribute__ ((format (printf, 1, 2), malloc)) __attr_dealloc_free + __attribute__ ((format (printf, 1, 2), __malloc__)) __attr_dealloc_free __returns_nonnull; char *xstrdup (const char *) __attr_dealloc_free __returns_nonnull; char *xstrndup (const char *, size_t) __attr_dealloc_free __returns_nonnull; diff --git a/test-skeleton.c b/test-skeleton.c index ae185a4f28..690f26e7cf 100644 --- a/test-skeleton.c +++ b/test-skeleton.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include commit 846e64257e5fc9b5b723c2eec2b7155ab5944d1f Author: Adhemerval Zanella Date: Fri Sep 13 11:10:05 2024 -0300 support: Make support_process_state_wait return the found state So caller can check which state was found if multiple ones are asked. Checked on x86_64-linux-gnu. Reviewed-by: Florian Weimer (cherry picked from commit 38316352e0f742f3a2b5816a61a4b603cb5573f8) (cherry picked from commit 2e38c5a090b3a54040b6e508d42e5a76e492c6e8) diff --git a/support/process_state.h b/support/process_state.h index 1cf902e91b..9541d8c343 100644 --- a/support/process_state.h +++ b/support/process_state.h @@ -31,13 +31,16 @@ enum support_process_state support_process_state_dead = 0x20, /* X (dead). */ support_process_state_zombie = 0x40, /* Z (zombie). */ support_process_state_parked = 0x80, /* P (parked). */ + support_process_state_invalid = 0x100 /* Invalid state. */ }; /* Wait for process PID to reach state STATE. It can be a combination of multiple possible states ('process_state_running | process_state_sleeping') where the function return when any of these state are observed. For an invalid state not represented by SUPPORT_PROCESS_STATE, it fallbacks - to a 2 second sleep. */ -void support_process_state_wait (pid_t pid, enum support_process_state state); + to a 2 second sleep. + Return the found process state. */ +enum support_process_state +support_process_state_wait (pid_t pid, enum support_process_state state); #endif diff --git a/support/support_process_state.c b/support/support_process_state.c index 062335234f..ae8e0a531c 100644 --- a/support/support_process_state.c +++ b/support/support_process_state.c @@ -27,7 +27,7 @@ #include #include -void +enum support_process_state support_process_state_wait (pid_t pid, enum support_process_state state) { #ifdef __linux__ @@ -75,7 +75,7 @@ support_process_state_wait (pid_t pid, enum support_process_state state) { free (line); xfclose (fstatus); - return; + return process_states[i].s; } rewind (fstatus); @@ -90,4 +90,6 @@ support_process_state_wait (pid_t pid, enum support_process_state state) /* Fallback to nanosleep if an invalid state is found. */ #endif nanosleep (&(struct timespec) { 1, 0 }, NULL); + + return support_process_state_invalid; } diff --git a/support/tst-support-process_state.c b/support/tst-support-process_state.c index d73269320f..4a88eae3a7 100644 --- a/support/tst-support-process_state.c +++ b/support/tst-support-process_state.c @@ -68,28 +68,39 @@ do_test (void) if (test_verbose) printf ("info: waiting pid %d, state_stopped/state_tracing_stop\n", (int) pid); - support_process_state_wait (pid, stop_state); + { + enum support_process_state state = + support_process_state_wait (pid, stop_state); + TEST_VERIFY (state == support_process_state_stopped + || state == support_process_state_tracing_stop); + } if (kill (pid, SIGCONT) != 0) FAIL_RET ("kill (%d, SIGCONT): %m\n", pid); if (test_verbose) printf ("info: waiting pid %d, state_sleeping\n", (int) pid); - support_process_state_wait (pid, support_process_state_sleeping); + TEST_COMPARE (support_process_state_wait (pid, + support_process_state_sleeping), + support_process_state_sleeping); if (kill (pid, SIGUSR1) != 0) FAIL_RET ("kill (%d, SIGUSR1): %m\n", pid); if (test_verbose) printf ("info: waiting pid %d, state_running\n", (int) pid); - support_process_state_wait (pid, support_process_state_running); + TEST_COMPARE (support_process_state_wait (pid, + support_process_state_running), + support_process_state_running); if (kill (pid, SIGKILL) != 0) FAIL_RET ("kill (%d, SIGKILL): %m\n", pid); if (test_verbose) printf ("info: waiting pid %d, state_zombie\n", (int) pid); - support_process_state_wait (pid, support_process_state_zombie); + TEST_COMPARE (support_process_state_wait (pid, + support_process_state_zombie), + support_process_state_zombie);; siginfo_t info; int r = waitid (P_PID, pid, &info, WEXITED); commit 11d9f49cebe64939f50e16a59c9ebefb80a294ab Author: Adhemerval Zanella Date: Fri Sep 13 11:11:56 2024 -0300 sparc: Fix restartable syscalls (BZ 32173) The commit 'sparc: Use Linux kABI for syscall return' (86c5d2cf0ce046279baddc7faa27da71f1a89fde) did not take into account a subtle sparc syscall kABI constraint. For syscalls that might block indefinitely, on an interrupt (like SIGCONT) the kernel will set the instruction pointer to just before the syscall: arch/sparc/kernel/signal_64.c 476 static void do_signal(struct pt_regs *regs, unsigned long orig_i0) 477 { [...] 525 if (restart_syscall) { 526 switch (regs->u_regs[UREG_I0]) { 527 case ERESTARTNOHAND: 528 case ERESTARTSYS: 529 case ERESTARTNOINTR: 530 /* replay the system call when we are done */ 531 regs->u_regs[UREG_I0] = orig_i0; 532 regs->tpc -= 4; 533 regs->tnpc -= 4; 534 pt_regs_clear_syscall(regs); 535 fallthrough; 536 case ERESTART_RESTARTBLOCK: 537 regs->u_regs[UREG_G1] = __NR_restart_syscall; 538 regs->tpc -= 4; 539 regs->tnpc -= 4; 540 pt_regs_clear_syscall(regs); 541 } However, on a SIGCONT it seems that 'g1' register is being clobbered after the syscall returns. Before 86c5d2cf0ce046279, the 'g1' was always placed jus before the 'ta' instruction which then reloads the syscall number and restarts the syscall. On master, where 'g1' might be placed before 'ta': $ cat test.c #include int main () { pause (); } $ gcc test.c -o test $ strace -f ./t [...] ppoll(NULL, 0, NULL, NULL, 0 On another terminal $ kill -STOP 2262828 $ strace -f ./t [...] --- SIGSTOP {si_signo=SIGSTOP, si_code=SI_USER, si_pid=2521813, si_uid=8289} --- --- stopped by SIGSTOP --- And then $ kill -CONT 2262828 Results in: --- SIGCONT {si_signo=SIGCONT, si_code=SI_USER, si_pid=2521813, si_uid=8289} --- restart_syscall(<... resuming interrupted ppoll ...>) = -1 EINTR (Interrupted system call) Where the expected behaviour would be: $ strace -f ./t [...] ppoll(NULL, 0, NULL, NULL, 0) = ? ERESTARTNOHAND (To be restarted if no handler) --- SIGSTOP {si_signo=SIGSTOP, si_code=SI_USER, si_pid=2521813, si_uid=8289} --- --- stopped by SIGSTOP --- --- SIGCONT {si_signo=SIGCONT, si_code=SI_USER, si_pid=2521813, si_uid=8289} --- ppoll(NULL, 0, NULL, NULL, 0 Just moving the 'g1' setting near the syscall asm is not suffice, the compiler might optimize it away (as I saw on cancellation.c by trying this fix). Instead, I have change the inline asm to put the 'g1' setup in ithe asm block. This would require to change the asm constraint for INTERNAL_SYSCALL_NCS, since the syscall number is not constant. Checked on sparc64-linux-gnu. Reported-by: René Rebe Tested-by: Sam James Reviewed-by: Sam James (cherry picked from commit 2c1903cbbac0022153a67776f474c221250ad6ed) (cherry picked from commit 1cd7e13289b91e1495a1865c1f678196d1bb7be4) diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 59998c7af4..34890ef69a 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -227,6 +227,7 @@ tests += \ tst-scm_rights \ tst-sigtimedwait \ tst-sync_file_range \ + tst-syscall-restart \ tst-sysconf-iov_max \ tst-sysvmsg-linux \ tst-sysvsem-linux \ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h index d2d68f5312..c2ffbb5c8f 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h @@ -107,6 +107,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x10;" \ "bcc 1f;" \ " nop;" \ @@ -114,7 +115,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h index 96047424e9..5598fab08a 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h @@ -106,6 +106,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x6d;" \ "bcc,pt %%xcc, 1f;" \ " nop;" \ @@ -113,7 +114,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sysdep.h index dcabb57fe2..c287740a8c 100644 --- a/sysdeps/unix/sysv/linux/sparc/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sysdep.h @@ -50,97 +50,109 @@ #undef INTERNAL_SYSCALL_NCS #define INTERNAL_SYSCALL_NCS(name, nr, args...) \ - internal_syscall##nr(__SYSCALL_STRING, name, args) + _internal_syscall##nr(__SYSCALL_STRING, "p", name, args) -#define internal_syscall0(string,name,dummy...) \ +#define _internal_syscall0(string,nc,name,dummy...) \ ({ \ - register long int __g1 __asm__ ("g1") = (name); \ register long __o0 __asm__ ("o0"); \ + long int _name = (long int) (name); \ __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall0(string,name,args...) \ + _internal_syscall0(string, "i", name, args) -#define internal_syscall1(string,name,arg1) \ +#define _internal_syscall1(string,nc,name,arg1) \ ({ \ long int _arg1 = (long int) (arg1); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall1(string,name,args...) \ + _internal_syscall1(string, "i", name, args) -#define internal_syscall2(string,name,arg1,arg2) \ +#define _internal_syscall2(string,nc,name,arg1,arg2) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall2(string,name,args...) \ + _internal_syscall2(string, "i", name, args) -#define internal_syscall3(string,name,arg1,arg2,arg3) \ +#define _internal_syscall3(string,nc,name,arg1,arg2,arg3) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall3(string,name,args...) \ + _internal_syscall3(string, "i", name, args) -#define internal_syscall4(string,name,arg1,arg2,arg3,arg4) \ +#define _internal_syscall4(string,nc,name,arg1,arg2,arg3,arg4) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall4(string,name,args...) \ + _internal_syscall4(string, "i", name, args) -#define internal_syscall5(string,name,arg1,arg2,arg3,arg4,arg5) \ +#define _internal_syscall5(string,nc,name,arg1,arg2,arg3,arg4,arg5) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall5(string,name,args...) \ + _internal_syscall5(string, "i", name, args) -#define internal_syscall6(string,name,arg1,arg2,arg3,arg4,arg5,arg6) \ +#define _internal_syscall6(string,nc,name,arg1,arg2,arg3,arg4,arg5,arg6)\ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ @@ -148,20 +160,22 @@ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ long int _arg6 = (long int) (arg6); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ register long int __o5 __asm__ ("o5") = _arg6; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4), \ "r" (__o5) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall6(string,name,args...) \ + _internal_syscall6(string, "i", name, args) #define INLINE_CLONE_SYSCALL(arg1,arg2,arg3,arg4,arg5) \ ({ \ @@ -170,15 +184,15 @@ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ + long int _name = __NR_clone; \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - register long int __g1 __asm__ ("g1") = __NR_clone; \ __asm __volatile (__SYSCALL_STRING : \ "=r" (__o0), "=r" (__o1) : \ - "r" (__g1), "0" (__o0), "1" (__o1), \ + [scn] "i" (_name), "0" (__o0), "1" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ if (__glibc_unlikely ((unsigned long int) (__o0) > -4096UL)) \ diff --git a/sysdeps/unix/sysv/linux/tst-syscall-restart.c b/sysdeps/unix/sysv/linux/tst-syscall-restart.c new file mode 100644 index 0000000000..84a8a41b5c --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-syscall-restart.c @@ -0,0 +1,112 @@ +/* Test if a syscall is correctly restarted. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +check_pid (pid_t pid) +{ + /* Wait until the child has called pause and it blocking on kernel. */ + support_process_state_wait (pid, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGSTOP), 0); + + /* Adding process_state_tracing_stop ('t') allows the test to work under + trace programs such as ptrace. */ + support_process_state_wait (pid, support_process_state_stopped + | support_process_state_tracing_stop); + + TEST_COMPARE (kill (pid, SIGCONT), 0); + + enum support_process_state state + = support_process_state_wait (pid, support_process_state_sleeping + | support_process_state_zombie); + + TEST_COMPARE (state, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGTERM), 0); + + siginfo_t info; + TEST_COMPARE (waitid (P_PID, pid, &info, WEXITED), 0); + TEST_COMPARE (info.si_signo, SIGCHLD); + TEST_COMPARE (info.si_code, CLD_KILLED); + TEST_COMPARE (info.si_status, SIGTERM); + TEST_COMPARE (info.si_pid, pid); + + return 0; +} + +static void * +tf (void *) +{ + pause (); + return NULL; +} + +static void +child_mt (void) +{ + /* Let only the created thread to handle signals. */ + sigset_t set; + sigfillset (&set); + xpthread_sigmask (SIG_BLOCK, &set, NULL); + + sigdelset (&set, SIGSTOP); + sigdelset (&set, SIGCONT); + sigdelset (&set, SIGTERM); + + pthread_attr_t attr; + xpthread_attr_init (&attr); + TEST_COMPARE (pthread_attr_setsigmask_np (&attr, &set), 0); + + xpthread_join (xpthread_create (&attr, tf, NULL)); +} + +static void +do_test_syscall (bool multithread) +{ + pid_t pid = xfork (); + if (pid == 0) + { + if (multithread) + child_mt (); + else + pause (); + _exit (127); + } + + check_pid (pid); +} + +static int +do_test (void) +{ + /* Check for both single and multi thread, since they use different syscall + mechanisms. */ + do_test_syscall (false); + do_test_syscall (true); + + return 0; +} + +#include commit 9af64ca64c532b7e42a40b48fe5e01726a9b7943 Author: H.J. Lu Date: Thu Dec 5 08:39:44 2024 +0800 math: Exclude internal math symbols for tests [BZ #32414] Since internal tests don't have access to internal symbols in libm, exclude them for internal tests. Also make tst-strtod5 and tst-strtod5i depend on $(libm) to support older versions of GCC which can't inline copysign family functions. This fixes BZ #32414. Signed-off-by: H.J. Lu Reviewed-by: Sunil K Pandey (cherry picked from commit 5df09b444835fca6e64b3d4b4a5beb19b3b2ba21) diff --git a/include/math.h b/include/math.h index fa11a710a6..035fd160ff 100644 --- a/include/math.h +++ b/include/math.h @@ -130,7 +130,10 @@ fabsf128 (_Float128 x) } # endif -# if !(defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0) + +/* NB: Internal tests don't have access to internal symbols. */ +# if !IS_IN (testsuite_internal) \ + && !(defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0) # ifndef NO_MATH_REDIRECT /* Declare some functions for use within GLIBC. Compilers typically inline those functions as a single instruction. Use an asm to diff --git a/stdlib/Makefile b/stdlib/Makefile index 8b0ac63ddb..8213fa83ef 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -603,6 +603,8 @@ $(objpfx)bug-strtod2: $(libm) $(objpfx)tst-strtod-round: $(libm) $(objpfx)tst-tininess: $(libm) $(objpfx)tst-strtod-underflow: $(libm) +$(objpfx)tst-strtod5: $(libm) +$(objpfx)tst-strtod5i: $(libm) $(objpfx)tst-strtod6: $(libm) $(objpfx)tst-strtod-nan-locale: $(libm) $(objpfx)tst-strtod-nan-sign: $(libm) commit 0b39fe801208e805cc911f64a2fdd25d04b7151e Author: Adhemerval Zanella Date: Fri Oct 18 08:48:22 2024 -0300 linux: Fix tst-syscall-restart.c on old gcc (BZ 32283) To avoid a parameter name omitted error. (cherry picked from commit ab564362d0470d10947c24155ec048c4e14a009d) diff --git a/sysdeps/unix/sysv/linux/tst-syscall-restart.c b/sysdeps/unix/sysv/linux/tst-syscall-restart.c index 84a8a41b5c..0ee7dc8517 100644 --- a/sysdeps/unix/sysv/linux/tst-syscall-restart.c +++ b/sysdeps/unix/sysv/linux/tst-syscall-restart.c @@ -57,7 +57,7 @@ check_pid (pid_t pid) } static void * -tf (void *) +tf (void *closure) { pause (); return NULL; commit 94e4a8c7d68129075f1b494a0b26151a4f989b36 Author: Florian Weimer Date: Tue Dec 17 18:12:03 2024 +0100 x86: Avoid integer truncation with large cache sizes (bug 32470) Some hypervisors report 1 TiB L3 cache size. This results in some variables incorrectly getting zeroed, causing crashes in memcpy/memmove because invariants are violated. (cherry picked from commit 61c3450db96dce96ad2b24b4f0b548e6a46d68e5) diff --git a/NEWS b/NEWS index bd0b3bd66a..97a1e1f5d4 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,7 @@ The following bugs are resolved with this release: [32137] libio: Attempt wide backup free only for non-legacy code [32231] elf: Change ldconfig auxcache magic number [32245] glibc -Wstringop-overflow= build failure on hppa + [32470] x86: Avoid integer truncation with large cache sizes Version 2.40 diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index a1c03b8903..ac97414b5b 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -961,11 +961,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) non_temporal_threshold = maximum_non_temporal_threshold; /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */ - unsigned int minimum_rep_movsb_threshold; + unsigned long int minimum_rep_movsb_threshold; /* NB: The default REP MOVSB threshold is 4096 * (VEC_SIZE / 16) for VEC_SIZE == 64 or 32. For VEC_SIZE == 16, the default REP MOVSB threshold is 2048 * (VEC_SIZE / 16). */ - unsigned int rep_movsb_threshold; + unsigned long int rep_movsb_threshold; if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512)) { commit 9fbfbd924f718663d5303858f34d1f857c375093 Author: John David Anglin Date: Thu Dec 19 11:30:09 2024 -0500 hppa: Fix strace detach-vfork test This change implements vfork.S for direct support of the vfork syscall. clone.S is revised to correct child support for the vfork case. The main bug was creating a frame prior to the clone syscall. This was done to allow the rp and r4 registers to be saved and restored from the stack frame. r4 was used to save and restore the PIC register, r19, across the system call and the call to set errno. But in the vfork case, it is undefined behavior for the child to return from the function in which vfork was called. It is surprising that this usually worked. Syscalls on hppa save and restore rp and r19, so we don't need to create a frame prior to the clone syscall. We only need a frame when __syscall_error is called. We also don't need to save and restore r19 around the call to $$dyncall as r19 is not used in the code after $$dyncall. This considerably simplifies clone.S. Signed-off-by: John David Anglin diff --git a/sysdeps/unix/sysv/linux/hppa/clone.S b/sysdeps/unix/sysv/linux/hppa/clone.S index a31afea429..c18163d0f7 100644 --- a/sysdeps/unix/sysv/linux/hppa/clone.S +++ b/sysdeps/unix/sysv/linux/hppa/clone.S @@ -59,16 +59,6 @@ .text ENTRY(__clone) - /* Prologue */ - stwm %r4, 64(%sp) - .cfi_def_cfa_offset -64 - .cfi_offset 4, 0 - stw %sp, -4(%sp) -#ifdef PIC - stw %r19, -32(%sp) - .cfi_offset 19, 32 -#endif - /* Sanity check arguments. */ comib,=,n 0,%arg0,.LerrorSanity /* no NULL function pointers */ comib,=,n 0,%arg1,.LerrorSanity /* no NULL stack pointers */ @@ -87,54 +77,34 @@ ENTRY(__clone) /* User stack pointer is in the correct register already */ /* Load args from stack... */ - ldw -116(%sp), %r24 /* Load parent_tidptr */ - ldw -120(%sp), %r23 /* Load newtls */ - ldw -124(%sp), %r22 /* Load child_tidptr */ - - /* Save the PIC register. */ -#ifdef PIC - copy %r19, %r4 /* parent */ -#endif + ldw -52(%sp), %r24 /* Load parent_tidptr */ + ldw -56(%sp), %r23 /* Load newtls */ + ldw -60(%sp), %r22 /* Load child_tidptr */ /* Do the system call */ ble 0x100(%sr2, %r0) ldi __NR_clone, %r20 ldi -4096, %r1 - comclr,>>= %r1, %ret0, %r0 /* Note: unsigned compare. */ - b,n .LerrorRest - - /* Restore the PIC register. */ -#ifdef PIC - copy %r4, %r19 /* parent */ -#endif - + comb,<<,n %r1, %ret0, .LerrorRest /* Note: unsigned compare. */ comib,=,n 0, %ret0, .LthreadStart - - /* Successful return from the parent - No need to restore the PIC register, - since we return immediately. */ - - ldw -84(%sp), %rp - bv %r0(%rp) - ldwm -64(%sp), %r4 + bv,n %r0(%rp) .LerrorRest: - /* Something bad happened -- no child created */ + /* Something bad happened -- no child created -- need a frame */ + ldo 64(%sp),%sp + .cfi_def_cfa_offset -64 bl __syscall_error, %rp sub %r0, %ret0, %arg0 ldw -84(%sp), %rp /* Return after setting errno, ret0 is set to -1 by __syscall_error. */ bv %r0(%rp) - ldwm -64(%sp), %r4 + ldo -64(%sp), %sp .LerrorSanity: /* Sanity checks failed, return -1, and set errno to EINVAL. */ - bl __syscall_error, %rp - ldi EINVAL, %arg0 - ldw -84(%sp), %rp - bv %r0(%rp) - ldwm -64(%sp), %r4 + b .LerrorRest + ldi -EINVAL, %ret0 .LthreadStart: /* Load up the arguments. */ @@ -144,14 +114,8 @@ ENTRY(__clone) /* $$dyncall fixes child's PIC register */ /* Call the user's function */ -#ifdef PIC - copy %r19, %r4 -#endif bl $$dyncall, %r31 copy %r31, %rp -#ifdef PIC - copy %r4, %r19 -#endif copy %r28, %r26 ble 0x100(%sr2, %r0) ldi __NR_exit, %r20 diff --git a/sysdeps/unix/sysv/linux/hppa/vfork.S b/sysdeps/unix/sysv/linux/hppa/vfork.S new file mode 100644 index 0000000000..5fd368f3cf --- /dev/null +++ b/sysdeps/unix/sysv/linux/hppa/vfork.S @@ -0,0 +1,53 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#define _ERRNO_H 1 +#include + +/* Clone the calling process, but without copying the whole address space. + The calling process is suspended until the new process exits or is + replaced by a call to `execve'. Return -1 for errors, 0 to the new process, + and the process ID of the new process to the old process. */ + +ENTRY (__vfork) + ble 0x100(%sr2, %r0) + ldi __NR_vfork, %r20 + + ldi -4096, %r1 + comclr,<< %r1, %ret0, %r0 /* Note: unsigned compare. */ + bv,n %r0(%rp) + + /* Something bad happened -- no child created -- we need a frame */ + ldo 64(%sp), %sp + .cfi_def_cfa_offset -64 + + /* Set errno */ + bl __syscall_error, %rp + sub %r0, %ret0, %arg0 + + /* ret0 is set to -1 by __syscall_error */ + ldw -84(%sp), %rp + bv %r0(%rp) + ldo -64(%sp), %sp + +PSEUDO_END (__vfork) +libc_hidden_def (__vfork) + +weak_alias (__vfork, vfork) +strong_alias (__vfork, __libc_vfork) commit 7648e3c8e80b3f1b3b43506b2fbe370e4824ab97 Author: John David Anglin Date: Sun Dec 22 09:58:02 2024 -0500 hppa: Simplify handling of sanity check errors in clone.S. This simplifies the handling of sanity check errors in clone.S. Adjusted a couple of comments to reflect current code. Signed-off-by: John David Anglin diff --git a/sysdeps/unix/sysv/linux/hppa/clone.S b/sysdeps/unix/sysv/linux/hppa/clone.S index c18163d0f7..e85e7f517f 100644 --- a/sysdeps/unix/sysv/linux/hppa/clone.S +++ b/sysdeps/unix/sysv/linux/hppa/clone.S @@ -90,6 +90,10 @@ ENTRY(__clone) comib,=,n 0, %ret0, .LthreadStart bv,n %r0(%rp) +.LerrorSanity: + /* Sanity checks failed, set errno to EINVAL. */ + ldi -EINVAL, %ret0 + .LerrorRest: /* Something bad happened -- no child created -- need a frame */ ldo 64(%sp),%sp @@ -101,11 +105,6 @@ ENTRY(__clone) bv %r0(%rp) ldo -64(%sp), %sp -.LerrorSanity: - /* Sanity checks failed, return -1, and set errno to EINVAL. */ - b .LerrorRest - ldi -EINVAL, %ret0 - .LthreadStart: /* Load up the arguments. */ ldw -60(%sp), %arg0 @@ -121,7 +120,7 @@ ENTRY(__clone) ldi __NR_exit, %r20 /* We should not return from exit. - We do not restore r4, or the stack state. */ + We do not restore the stack state. */ iitlbp %r0, (%sr0, %r0) PSEUDO_END(__clone) commit 473597d8167f86afee3544215db108b170ec13c0 Author: Andreas Schwab Date: Wed Sep 25 11:49:30 2024 +0200 Fix missing randomness in __gen_tempname (bug 32214) Make sure to update the random value also if getrandom fails. Fixes: 686d542025 ("posix: Sync tempname with gnulib") (cherry picked from commit 5f62cf88c4530c11904482775b7582bd7f6d80d2) diff --git a/NEWS b/NEWS index 97a1e1f5d4..57feba81cd 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,7 @@ The following bugs are resolved with this release: [32026] strerror/strsignal TLS not handled correctly for secondary namespaces [32052] Name space violation in fortify wrappers [32137] libio: Attempt wide backup free only for non-legacy code + [32214] Fix missing randomness in __gen_tempname [32231] elf: Change ldconfig auxcache magic number [32245] glibc -Wstringop-overflow= build failure on hppa [32470] x86: Avoid integer truncation with large cache sizes diff --git a/sysdeps/posix/tempname.c b/sysdeps/posix/tempname.c index c00fe0c181..fc30958a0c 100644 --- a/sysdeps/posix/tempname.c +++ b/sysdeps/posix/tempname.c @@ -117,6 +117,8 @@ random_bits (random_value *r, random_value s) succeed. */ #if !_LIBC *r = mix_random_values (v, clock ()); +#else + *r = v; #endif return false; } commit 7d4b6bcae91f29d7b4daf15bab06b66cf1d2217c Author: Siddhesh Poyarekar Date: Tue Jan 21 16:11:06 2025 -0500 Fix underallocation of abort_msg_s struct (CVE-2025-0395) Include the space needed to store the length of the message itself, in addition to the message string. This resolves BZ #32582. Signed-off-by: Siddhesh Poyarekar Reviewed: Adhemerval Zanella (cherry picked from commit 68ee0f704cb81e9ad0a78c644a83e1e9cd2ee578) diff --git a/assert/assert.c b/assert/assert.c index c29629f5f6..b6e37d694c 100644 --- a/assert/assert.c +++ b/assert/assert.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,8 @@ __assert_fail_base (const char *fmt, const char *assertion, const char *file, (void) __fxprintf (NULL, "%s", str); (void) fflush (stderr); - total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1); + total = ALIGN_UP (total + sizeof (struct abort_msg_s) + 1, + GLRO(dl_pagesize)); struct abort_msg_s *buf = __mmap (NULL, total, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); if (__glibc_likely (buf != MAP_FAILED)) diff --git a/sysdeps/posix/libc_fatal.c b/sysdeps/posix/libc_fatal.c index f9e3425e04..089c47b04b 100644 --- a/sysdeps/posix/libc_fatal.c +++ b/sysdeps/posix/libc_fatal.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -105,7 +106,8 @@ __libc_message_impl (const char *fmt, ...) { WRITEV_FOR_FATAL (fd, iov, iovcnt, total); - total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1); + total = ALIGN_UP (total + sizeof (struct abort_msg_s) + 1, + GLRO(dl_pagesize)); struct abort_msg_s *buf = __mmap (NULL, total, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); commit aef8f8d6a947b290162393e1d717c7aee96fef8e Author: H.J. Lu Date: Tue Dec 17 18:41:45 2024 +0800 Hide all malloc functions from compiler [BZ #32366] Since -1 isn't a power of two, compiler may reject it, hide memalign from Clang 19 which issues an error: tst-memalign.c:86:31: error: requested alignment is not a power of 2 [-Werror,-Wnon-power-of-two-alignment] 86 | p = memalign (-1, pagesize); | ^~ tst-memalign.c:86:31: error: requested alignment must be 4294967296 bytes or smaller; maximum alignment assumed [-Werror,-Wbuiltin-assume-aligned-alignment] 86 | p = memalign (-1, pagesize); | ^~ Update tst-malloc-aux.h to hide all malloc functions and include it in all malloc tests to prevent compiler from optimizing out any malloc functions. Tested with Clang 19.1.5 and GCC 15 20241206 for BZ #32366. Signed-off-by: H.J. Lu Reviewed-by: Sam James (cherry picked from commit f9493a15ea9cfb63a815c00c23142369ec09d8ce) diff --git a/malloc/tst-mallinfo2.c b/malloc/tst-mallinfo2.c index 2c02f5f700..f072b9f24b 100644 --- a/malloc/tst-mallinfo2.c +++ b/malloc/tst-mallinfo2.c @@ -23,6 +23,8 @@ #include #include +#include "tst-malloc-aux.h" + /* This is not specifically needed for the test, but (1) does something to the data so gcc doesn't optimize it away, and (2) may help when developing future tests. */ diff --git a/malloc/tst-malloc-aux.h b/malloc/tst-malloc-aux.h index 54908b4a24..3e1b61ce34 100644 --- a/malloc/tst-malloc-aux.h +++ b/malloc/tst-malloc-aux.h @@ -22,20 +22,35 @@ #include #include - -static void *(*volatile aligned_alloc_indirect)(size_t, size_t) = aligned_alloc; -static void *(*volatile calloc_indirect)(size_t, size_t) = calloc; -static void *(*volatile malloc_indirect)(size_t) = malloc; -static void *(*volatile realloc_indirect)(void*, size_t) = realloc; +#include + +static __typeof (aligned_alloc) * volatile aligned_alloc_indirect + = aligned_alloc; +static __typeof (calloc) * volatile calloc_indirect = calloc; +static __typeof (malloc) * volatile malloc_indirect = malloc; +static __typeof (memalign) * volatile memalign_indirect = memalign; +static __typeof (posix_memalign) * volatile posix_memalign_indirect + = posix_memalign; +static __typeof (pvalloc) * volatile pvalloc_indirect = pvalloc; +static __typeof (realloc) * volatile realloc_indirect = realloc; +static __typeof (valloc) * volatile valloc_indirect = valloc; #undef aligned_alloc #undef calloc #undef malloc +#undef memalign +#undef posix_memalign +#undef pvalloc #undef realloc +#undef valloc #define aligned_alloc aligned_alloc_indirect #define calloc calloc_indirect #define malloc malloc_indirect +#define memalign memalign_indirect +#define posix_memalign posix_memalign_indirect +#define pvalloc pvalloc_indirect #define realloc realloc_indirect +#define valloc valloc_indirect #endif /* TST_MALLOC_AUX_H */ diff --git a/malloc/tst-malloc-backtrace.c b/malloc/tst-malloc-backtrace.c index c7b1d65e5c..65fa91f6fd 100644 --- a/malloc/tst-malloc-backtrace.c +++ b/malloc/tst-malloc-backtrace.c @@ -22,6 +22,8 @@ #include #include +#include "tst-malloc-aux.h" + #define SIZE 4096 /* Wrap free with a function to prevent gcc from optimizing it out. */ diff --git a/malloc/tst-memalign.c b/malloc/tst-memalign.c index 563f6413d2..ac9770d3f9 100644 --- a/malloc/tst-memalign.c +++ b/malloc/tst-memalign.c @@ -23,6 +23,8 @@ #include #include +#include "tst-malloc-aux.h" + static int errors = 0; static void diff --git a/malloc/tst-safe-linking.c b/malloc/tst-safe-linking.c index 01dd07004d..63a7e2bc8e 100644 --- a/malloc/tst-safe-linking.c +++ b/malloc/tst-safe-linking.c @@ -26,6 +26,8 @@ #include #include +#include "tst-malloc-aux.h" + /* Run CALLBACK and check that the data on standard error equals EXPECTED. */ static void diff --git a/malloc/tst-valloc.c b/malloc/tst-valloc.c index 9bab8c6470..0243d3dfd4 100644 --- a/malloc/tst-valloc.c +++ b/malloc/tst-valloc.c @@ -23,6 +23,8 @@ #include #include +#include "tst-malloc-aux.h" + static int errors = 0; static void commit be48b8f6ad0ec6d0d6b1d2f45eb59bf8e8c67dd7 Author: Sam James Date: Fri Jan 10 03:03:47 2025 +0000 malloc: obscure calloc use in tst-calloc Similar to a9944a52c967ce76a5894c30d0274b824df43c7a and f9493a15ea9cfb63a815c00c23142369ec09d8ce, we need to hide calloc use from the compiler to accommodate GCC's r15-6566-g804e9d55d9e54c change. First, include tst-malloc-aux.h, but then use `volatile` variables for size. The test passes without the tst-malloc-aux.h change but IMO we want it there for consistency and to avoid future problems (possibly silent). Reviewed-by: H.J. Lu (cherry picked from commit c3d1dac96bdd10250aa37bb367d5ef8334a093a1) diff --git a/malloc/tst-calloc.c b/malloc/tst-calloc.c index 01f17f9e65..5a8c7ab121 100644 --- a/malloc/tst-calloc.c +++ b/malloc/tst-calloc.c @@ -23,6 +23,7 @@ #include #include +#include "tst-malloc-aux.h" /* Number of samples per size. */ #define N 50000 @@ -94,16 +95,19 @@ random_test (void) static void null_test (void) { + /* Obscure allocation size from the compiler. */ + volatile size_t max_size = UINT_MAX; + volatile size_t zero_size = 0; /* If the size is 0 the result is implementation defined. Just make sure the program doesn't crash. The result of calloc is deliberately ignored, so do not warn about that. */ DIAG_PUSH_NEEDS_COMMENT; DIAG_IGNORE_NEEDS_COMMENT (10, "-Wunused-result"); calloc (0, 0); - calloc (0, UINT_MAX); - calloc (UINT_MAX, 0); - calloc (0, ~((size_t) 0)); - calloc (~((size_t) 0), 0); + calloc (0, max_size); + calloc (max_size, 0); + calloc (0, ~((size_t) zero_size)); + calloc (~((size_t) zero_size), 0); DIAG_POP_NEEDS_COMMENT; } commit 85668221974db44459527e04d04f77ca8f8e3115 Author: H.J. Lu Date: Fri Jan 24 18:53:13 2025 +0800 stdlib: Test using setenv with updated environ [BZ #32588] Add a test for setenv with updated environ. Verify that BZ #32588 is fixed. Signed-off-by: H.J. Lu Reviewed-by: Florian Weimer (cherry picked from commit 8ab34497de14e35aff09b607222fe1309ef156da) diff --git a/stdlib/Makefile b/stdlib/Makefile index 8213fa83ef..d3a84fa641 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -307,6 +307,7 @@ tests := \ tst-setcontext9 \ tst-setcontext10 \ tst-setcontext11 \ + tst-setenv-environ \ tst-stdbit-Wconversion \ tst-stdbit-builtins \ tst-stdc_bit_ceil \ diff --git a/stdlib/tst-setenv-environ.c b/stdlib/tst-setenv-environ.c new file mode 100644 index 0000000000..02fcef96d0 --- /dev/null +++ b/stdlib/tst-setenv-environ.c @@ -0,0 +1,36 @@ +/* Test using setenv with updated environ. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +extern char **environ; + +int +do_test (void) +{ + char *valp; + static char *dummy_environ[] = { NULL }; + environ = dummy_environ; + setenv ("A", "1", 0); + valp = getenv ("A"); + TEST_VERIFY_EXIT (valp[0] == '1' && valp[1] == '\0'); + return 0; +} + +#include commit e899ca3651f8c5e01bf3420cfb34aad97d093f74 Author: John David Anglin Date: Wed Jan 29 16:51:16 2025 -0500 nptl: Correct stack size attribute when stack grows up [BZ #32574] Set stack size attribute to the size of the mmap'd region only when the size of the remaining stack space is less than the size of the mmap'd region. This was reversed. As a result, the initial stack size was only 135168 bytes. On architectures where the stack grows down, the initial stack size is approximately 8384512 bytes with the default rlimit settings. The small main stack size on hppa broke applications like ruby that check for stack overflows. Signed-off-by: John David Anglin diff --git a/nptl/pthread_getattr_np.c b/nptl/pthread_getattr_np.c index 1e91874767..3ce34437bc 100644 --- a/nptl/pthread_getattr_np.c +++ b/nptl/pthread_getattr_np.c @@ -145,9 +145,9 @@ __pthread_getattr_np (pthread_t thread_id, pthread_attr_t *attr) > (size_t) iattr->stackaddr - last_to) iattr->stacksize = (size_t) iattr->stackaddr - last_to; #else - /* The limit might be too high. */ + /* The limit might be too low. */ if ((size_t) iattr->stacksize - > to - (size_t) iattr->stackaddr) + < to - (size_t) iattr->stackaddr) iattr->stacksize = to - (size_t) iattr->stackaddr; #endif /* We succeed and no need to look further. */ commit d6c156c326999f144cb5b73d29982108d549ad8a Author: Siddhesh Poyarekar Date: Fri Jan 31 12:16:30 2025 -0500 assert: Add test for CVE-2025-0395 Use the __progname symbol to override the program name to induce the failure that CVE-2025-0395 describes. This is related to BZ #32582 Signed-off-by: Siddhesh Poyarekar Reviewed-by: Adhemerval Zanella (cherry picked from commit cdb9ba84191ce72e86346fb8b1d906e7cd930ea2) diff --git a/assert/Makefile b/assert/Makefile index 35dc908ddb..c0fe660bd6 100644 --- a/assert/Makefile +++ b/assert/Makefile @@ -38,6 +38,7 @@ tests := \ test-assert-perr \ tst-assert-c++ \ tst-assert-g++ \ + tst-assert-sa-2025-0001 \ # tests ifeq ($(have-cxx-thread_local),yes) diff --git a/assert/tst-assert-sa-2025-0001.c b/assert/tst-assert-sa-2025-0001.c new file mode 100644 index 0000000000..102cb0078d --- /dev/null +++ b/assert/tst-assert-sa-2025-0001.c @@ -0,0 +1,92 @@ +/* Test for CVE-2025-0395. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Test that a large enough __progname does not result in a buffer overflow + when printing an assertion failure. This was CVE-2025-0395. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern const char *__progname; + +int +do_test (int argc, char **argv) +{ + + support_need_proc ("Reads /proc/self/maps to add guards to writable maps."); + ignore_stderr (); + + /* XXX assumes that the assert is on a 2 digit line number. */ + const char *prompt = ": %s:99: do_test: Assertion `argc < 1' failed.\n"; + + int ret = fprintf (stderr, prompt, __FILE__); + if (ret < 0) + FAIL_EXIT1 ("fprintf failed: %m\n"); + + size_t pagesize = getpagesize (); + size_t namesize = pagesize - 1 - ret; + + /* Alter the progname so that the assert message fills the entire page. */ + char progname[namesize]; + memset (progname, 'A', namesize - 1); + progname[namesize - 1] = '\0'; + __progname = progname; + + FILE *f = xfopen ("/proc/self/maps", "r"); + char *line = NULL; + size_t len = 0; + uintptr_t prev_to = 0; + + /* Pad the beginning of every writable mapping with a PROT_NONE map. This + ensures that the mmap in the assert_fail path never ends up below a + writable map and will terminate immediately in case of a buffer + overflow. */ + while (xgetline (&line, &len, f)) + { + uintptr_t from, to; + char perm[4]; + + sscanf (line, "%" SCNxPTR "-%" SCNxPTR " %c%c%c%c ", + &from, &to, + &perm[0], &perm[1], &perm[2], &perm[3]); + + bool writable = (memchr (perm, 'w', 4) != NULL); + + if (prev_to != 0 && from - prev_to > pagesize && writable) + xmmap ((void *) from - pagesize, pagesize, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + + prev_to = to; + } + + xfclose (f); + + assert (argc < 1); + return 0; +} + +#define EXPECTED_SIGNAL SIGABRT +#define TEST_FUNCTION_ARGV do_test +#include commit 523f85558152a1b9cced6d669f758c27677775ba Author: John David Anglin Date: Tue Feb 25 15:57:53 2025 -0500 math: Add optimization barrier to ensure a1 + u.d is not reused [BZ #30664] A number of fma tests started to fail on hppa when gcc was changed to use Ranger rather than EVRP. Eventually I found that the value of a1 + u.d in this is block of code was being computed in FE_TOWARDZERO mode and not the original rounding mode: if (TININESS_AFTER_ROUNDING) { w.d = a1 + u.d; if (w.ieee.exponent == 109) return w.d * 0x1p-108; } This caused the exponent value to be wrong and the wrong return path to be used. Here we add an optimization barrier after the rounding mode is reset to ensure that the previous value of a1 + u.d is not reused. Signed-off-by: John David Anglin diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index c5f5abdc68..79a3cd721d 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -244,6 +244,9 @@ __fma (double x, double y, double z) /* Reset rounding mode and test for inexact simultaneously. */ int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0; + /* Ensure value of a1 + u.d is not reused. */ + a1 = math_opt_barrier (a1); + if (__glibc_likely (adjust == 0)) { if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff) commit ff10623706ea0096f3af7b38a3330ffb7fb15ae7 Author: Joe Ramsay Date: Mon Sep 9 13:00:01 2024 +0100 aarch64: Avoid redundant MOVs in AdvSIMD F32 logs Since the last operation is destructive, the first argument to the FMA also has to be the first argument to the special-case in order to avoid unnecessary MOVs. Reorder arguments and adjust special-case bounds to facilitate this. Reviewed-by: Wilco Dijkstra (cherry picked from commit 8b09af572b208bfde4d31c6abbae047dcc217675) diff --git a/sysdeps/aarch64/fpu/log10f_advsimd.c b/sysdeps/aarch64/fpu/log10f_advsimd.c index 9347422a77..82228b599a 100644 --- a/sysdeps/aarch64/fpu/log10f_advsimd.c +++ b/sysdeps/aarch64/fpu/log10f_advsimd.c @@ -22,11 +22,11 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; + uint32x4_t mantissa_mask; float32x4_t poly[8]; float32x4_t inv_ln10, ln2; - uint32x4_t off, mantissa_mask; } data = { /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25. */ @@ -35,18 +35,22 @@ static const struct data V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) }, .ln2 = V4 (0x1.62e43p-1f), .inv_ln10 = V4 (0x1.bcb7b2p-2f), - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff), }; static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2, - uint16x4_t cmp) +special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp)); + return v_call_f32 (log10f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (y, p, r2), vmovl_u16 (cmp)); } /* Fast implementation of AdvSIMD log10f, @@ -58,15 +62,21 @@ special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t u = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); float32x4_t n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + + uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log10(1+r) + n * log10(2). */ @@ -77,7 +87,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) y = vmulq_f32 (y, d->inv_ln10); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, y, poly, r2, special); + return special_case (y, u_off, poly, r2, special, d); return vfmaq_f32 (y, poly, r2); } libmvec_hidden_def (V_NAME_F1 (log10)) diff --git a/sysdeps/aarch64/fpu/log2f_advsimd.c b/sysdeps/aarch64/fpu/log2f_advsimd.c index db21836749..84effe4fe9 100644 --- a/sysdeps/aarch64/fpu/log2f_advsimd.c +++ b/sysdeps/aarch64/fpu/log2f_advsimd.c @@ -22,9 +22,9 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; - uint32x4_t off, mantissa_mask; + uint32x4_t mantissa_mask; float32x4_t poly[9]; } data = { /* Coefficients generated using Remez algorithm approximate @@ -34,18 +34,22 @@ static const struct data V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f), V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f), V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) }, - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff), }; static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r, - uint16x4_t cmp) +special_case (float32x4_t n, uint32x4_t u_off, float32x4_t p, float32x4_t r, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (log2f, x, vfmaq_f32 (n, p, r), vmovl_u16 (cmp)); + return v_call_f32 (log2f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (n, p, r), vmovl_u16 (cmp)); } /* Fast implementation for single precision AdvSIMD log2, @@ -56,15 +60,21 @@ special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t u = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); float32x4_t n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + + uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log2(1+r) + n. */ @@ -72,7 +82,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, n, p, r, special); + return special_case (n, u_off, p, r, special, d); return vfmaq_f32 (n, p, r); } libmvec_hidden_def (V_NAME_F1 (log2)) diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c index 3c0d0fcdc7..c20dbfd6c0 100644 --- a/sysdeps/aarch64/fpu/logf_advsimd.c +++ b/sysdeps/aarch64/fpu/logf_advsimd.c @@ -21,20 +21,22 @@ static const struct data { - uint32x4_t min_norm; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; + uint32x4_t mantissa_mask; float32x4_t poly[7]; - float32x4_t ln2, tiny_bound; - uint32x4_t off, mantissa_mask; + float32x4_t ln2; } data = { /* 3.34 ulp error. */ .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f), V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f), V4 (-0x1.ffffc8p-2f) }, .ln2 = V4 (0x1.62e43p-1f), - .tiny_bound = V4 (0x1p-126), - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff) }; @@ -42,32 +44,37 @@ static const struct data #define P(i) d->poly[7 - i] static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p, - uint16x4_t cmp) +special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp)); + return v_call_f32 (logf, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (p, y, r2), vmovl_u16 (cmp)); } float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) { const struct data *d = ptr_barrier (&data); float32x4_t n, p, q, r, r2, y; - uint32x4_t u; + uint32x4_t u, u_off; uint16x4_t cmp; - u = vreinterpretq_u32_f32 (x); - cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vandq_u32 (u, d->mantissa_mask); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + u = vandq_u32 (u_off, d->mantissa_mask); u = vaddq_u32 (u, d->off); r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); + cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + /* y = log(1+r) + n*ln2. */ r2 = vmulq_f32 (r, r); /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))). */ @@ -80,7 +87,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) p = vfmaq_f32 (r, d->ln2, n); if (__glibc_unlikely (v_any_u16h (cmp))) - return special_case (x, y, r2, p, cmp); + return special_case (p, u_off, y, r2, cmp, d); return vfmaq_f32 (p, y, r2); } libmvec_hidden_def (V_NAME_F1 (log)) commit a991a0fc7c051d7ef2ea7778e0a699f22d4e53d7 Author: Joe Ramsay Date: Thu Sep 19 17:34:02 2024 +0100 AArch64: Add vector logp1 alias for log1p This enables vectorisation of C23 logp1, which is an alias for log1p. There are no new tests or ulp entries because the new symbols are simply aliases. Reviewed-by: Wilco Dijkstra (cherry picked from commit 751a5502bea1d13551c62c47bb9bd25bff870cda) diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index 08a41c46ad..5019e8e25c 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -253,6 +253,17 @@ #define __DECL_SIMD_log1pf64x #define __DECL_SIMD_log1pf128x +#define __DECL_SIMD_logp1 +#define __DECL_SIMD_logp1f +#define __DECL_SIMD_logp1l +#define __DECL_SIMD_logp1f16 +#define __DECL_SIMD_logp1f32 +#define __DECL_SIMD_logp1f64 +#define __DECL_SIMD_logp1f128 +#define __DECL_SIMD_logp1f32x +#define __DECL_SIMD_logp1f64x +#define __DECL_SIMD_logp1f128x + #define __DECL_SIMD_atanh #define __DECL_SIMD_atanhf #define __DECL_SIMD_atanhl diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 6cb594b6ff..92856becc4 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -126,7 +126,7 @@ __MATHCALL (log2p1,, (_Mdouble_ __x)); __MATHCALL (log10p1,, (_Mdouble_ __x)); /* Return log(1 + X). */ -__MATHCALL (logp1,, (_Mdouble_ __x)); +__MATHCALL_VEC (logp1,, (_Mdouble_ __x)); #endif #if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions index cc15ce2d1e..015211f5f4 100644 --- a/sysdeps/aarch64/fpu/Versions +++ b/sysdeps/aarch64/fpu/Versions @@ -135,4 +135,11 @@ libmvec { _ZGVsMxv_tanh; _ZGVsMxv_tanhf; } + GLIBC_2.41 { + _ZGVnN2v_logp1; + _ZGVnN2v_logp1f; + _ZGVnN4v_logp1f; + _ZGVsMxv_logp1; + _ZGVsMxv_logp1f; + } } diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h index 097d403ffe..5909bb4ce9 100644 --- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h +++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h @@ -36,6 +36,7 @@ libmvec_hidden_proto (V_NAME_F2(hypot)); libmvec_hidden_proto (V_NAME_F1(log10)); libmvec_hidden_proto (V_NAME_F1(log1p)); libmvec_hidden_proto (V_NAME_F1(log2)); +libmvec_hidden_proto (V_NAME_F1(logp1)); libmvec_hidden_proto (V_NAME_F1(log)); libmvec_hidden_proto (V_NAME_F2(pow)); libmvec_hidden_proto (V_NAME_F1(sin)); diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h index 7484150131..f295fe185d 100644 --- a/sysdeps/aarch64/fpu/bits/math-vector.h +++ b/sysdeps/aarch64/fpu/bits/math-vector.h @@ -113,6 +113,10 @@ # define __DECL_SIMD_log2 __DECL_SIMD_aarch64 # undef __DECL_SIMD_log2f # define __DECL_SIMD_log2f __DECL_SIMD_aarch64 +# undef __DECL_SIMD_logp1 +# define __DECL_SIMD_logp1 __DECL_SIMD_aarch64 +# undef __DECL_SIMD_logp1f +# define __DECL_SIMD_logp1f __DECL_SIMD_aarch64 # undef __DECL_SIMD_pow # define __DECL_SIMD_pow __DECL_SIMD_aarch64 # undef __DECL_SIMD_powf @@ -180,6 +184,7 @@ __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t); @@ -207,6 +212,7 @@ __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t); @@ -239,6 +245,7 @@ __sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log10f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log1pf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t); @@ -266,6 +273,7 @@ __sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log10 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log1p (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t); diff --git a/sysdeps/aarch64/fpu/log1p_advsimd.c b/sysdeps/aarch64/fpu/log1p_advsimd.c index ffc418fc9c..114064c696 100644 --- a/sysdeps/aarch64/fpu/log1p_advsimd.c +++ b/sysdeps/aarch64/fpu/log1p_advsimd.c @@ -127,3 +127,5 @@ VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x) return vfmaq_f64 (y, f2, p); } + +strong_alias (V_NAME_D1 (log1p), V_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c index 04f7e5720e..b21cfb2c90 100644 --- a/sysdeps/aarch64/fpu/log1p_sve.c +++ b/sysdeps/aarch64/fpu/log1p_sve.c @@ -116,3 +116,5 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) return y; } + +strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c index dc15334a85..8cfa28fb8a 100644 --- a/sysdeps/aarch64/fpu/log1pf_advsimd.c +++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c @@ -128,3 +128,6 @@ VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) } libmvec_hidden_def (V_NAME_F1 (log1p)) HALF_WIDTH_ALIAS_F1 (log1p) +strong_alias (V_NAME_F1 (log1p), V_NAME_F1 (logp1)) +libmvec_hidden_def (V_NAME_F1 (logp1)) +HALF_WIDTH_ALIAS_F1 (logp1) diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c index f645cc997e..5256d5e94c 100644 --- a/sysdeps/aarch64/fpu/log1pf_sve.c +++ b/sysdeps/aarch64/fpu/log1pf_sve.c @@ -98,3 +98,5 @@ svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg) return y; } + +strong_alias (SV_NAME_F1 (log1p), SV_NAME_F1 (logp1)) diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist index b685106954..98687cae0d 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist @@ -128,3 +128,8 @@ GLIBC_2.40 _ZGVsMxvv_hypot F GLIBC_2.40 _ZGVsMxvv_hypotf F GLIBC_2.40 _ZGVsMxvv_pow F GLIBC_2.40 _ZGVsMxvv_powf F +GLIBC_2.41 _ZGVnN2v_logp1 F +GLIBC_2.41 _ZGVnN2v_logp1f F +GLIBC_2.41 _ZGVnN4v_logp1f F +GLIBC_2.41 _ZGVsMxv_logp1 F +GLIBC_2.41 _ZGVsMxv_logp1f F commit 354aeaf2130c1484007025563fe87c997f07324a Author: Joe Ramsay Date: Mon Sep 23 15:26:12 2024 +0100 AArch64: Improve codegen in SVE expf & related routines Reduce MOV and MOVPRFX by improving special-case handling. Use inline helper to duplicate the entire computation between the special- and non-special case branches, removing the contention for z0 between x and the return value. Also rearrange some MLAs and MLSs - by making the multiplicand the destination we can avoid a MOVPRFX in several cases. Also change which constants go in the vector used for lanewise ops - the last lane is no longer wasted. Spotted that shift was incorrect in exp2f and exp10f, w.r.t. to the comment that explains it. Fixed - worst-case ULP for exp2f moves around but it doesn't change significantly for either routine. Worst-case error for coshf increases due to passing x to exp rather than abs(x) - updated the comment, but does not require regen-ulps. Reviewed-by: Wilco Dijkstra (cherry picked from commit 7b8c134b5460ed933d610fa92ed1227372b68fdc) diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c index e5d8a299c6..7ad6efa0fc 100644 --- a/sysdeps/aarch64/fpu/coshf_sve.c +++ b/sysdeps/aarch64/fpu/coshf_sve.c @@ -23,37 +23,42 @@ static const struct data { struct sv_expf_data expf_consts; - uint32_t special_bound; + float special_bound; } data = { .expf_consts = SV_EXPF_DATA, /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */ - .special_bound = 0x42ad496c, + .special_bound = 0x1.5a92d8p+6, }; static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t pg) +special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e, + svbool_t pg) { - return sv_call_f32 (coshf, x, y, pg); + return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e), + pg); } /* Single-precision vector cosh, using vector expf. - Maximum error is 1.89 ULP: - _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127 - want 0x1.f00adcp+127. */ + Maximum error is 2.77 ULP: + _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2 + want 0x1.e4594cp+2. */ svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - svfloat32_t ax = svabs_x (pg, x); - svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound); + svbool_t special = svacge (pg, x, d->special_bound); - /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */ - svfloat32_t t = expf_inline (ax, pg, &d->expf_consts); - svfloat32_t half_t = svmul_x (pg, t, 0.5); - svfloat32_t half_over_t = svdivr_x (pg, t, 0.5); + /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. + Note that x is passed to exp here, rather than |x|. This is to avoid using + destructive unary ABS for better register usage. However it means the + routine is not exactly symmetrical, as the exp helper is slightly less + accurate in the negative range. */ + svfloat32_t e = expf_inline (x, pg, &d->expf_consts); + svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5); + svfloat32_t half_over_e = svdivr_x (pg, e, 0.5); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svadd_x (pg, half_t, half_over_t), special); + return special_case (x, half_e, half_over_e, special); - return svadd_x (pg, half_t, half_over_t); + return svadd_x (svptrue_b32 (), half_e, half_over_e); } diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c index e09b2f3b27..8aa3fa9c43 100644 --- a/sysdeps/aarch64/fpu/exp10f_sve.c +++ b/sysdeps/aarch64/fpu/exp10f_sve.c @@ -18,74 +18,83 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" -/* For x < -SpecialBound, the result is subnormal and not handled correctly by +/* For x < -Thres, the result is subnormal and not handled correctly by FEXPA. */ -#define SpecialBound 37.9 +#define Thres 37.9 static const struct data { - float poly[5]; - float shift, log10_2, log2_10_hi, log2_10_lo, special_bound; + float log2_10_lo, c0, c2, c4; + float c1, c3, log10_2; + float shift, log2_10_hi, thres; } data = { /* Coefficients generated using Remez algorithm with minimisation of relative error. rel error: 0x1.89dafa3p-24 abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2] maxerr: 0.52 +0.5 ulp. */ - .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f, - 0x1.12b41ap-1f }, + .c0 = 0x1.26bb16p+1f, + .c1 = 0x1.5350d2p+1f, + .c2 = 0x1.04744ap+1f, + .c3 = 0x1.2d8176p+0f, + .c4 = 0x1.12b41ap-1f, /* 1.5*2^17 + 127, a shift value suitable for FEXPA. */ - .shift = 0x1.903f8p17f, + .shift = 0x1.803f8p17f, .log10_2 = 0x1.a934fp+1, .log2_10_hi = 0x1.344136p-2, .log2_10_lo = -0x1.ec10cp-27, - .special_bound = SpecialBound, + .thres = Thres, }; -static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +static inline svfloat32_t +sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) { - return sv_call_f32 (exp10f, x, y, special); -} - -/* Single-precision SVE exp10f routine. Implements the same algorithm - as AdvSIMD exp10f. - Worst case error is 1.02 ULPs. - _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1 - want 0x1.ba5f9cp-1. */ -svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) -{ - const struct data *d = ptr_barrier (&data); /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)), with poly(r) in [1/sqrt(2), sqrt(2)] and x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */ - /* Load some constants in quad-word chunks to minimise memory access (last - lane is wasted). */ - svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo); /* n = round(x/(log10(2)/N)). */ svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0); - svfloat32_t n = svsub_x (pg, z, shift); + svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift); + svfloat32_t n = svsub_x (svptrue_b32 (), z, shift); /* r = x - n*log10(2)/N. */ - svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1); - r = svmls_lane (r, n, log10_2_and_inv, 2); + svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x); + r = svmls_lane (r, n, lane_consts, 0); - svbool_t special = svacgt (pg, x, d->special_bound); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* Polynomial evaluation: poly(r) ~ exp10(r)-1. */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t poly - = svmla_x (pg, svmul_x (pg, r, d->poly[0]), - sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2); - - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (pg, scale, scale, poly), special); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t p14 = svmla_x (pg, p12, p34, r2); + svfloat32_t p0 = svmul_lane (r, lane_consts, 1); + svfloat32_t poly = svmla_x (pg, p0, r2, p14); return svmla_x (pg, scale, scale, poly); } + +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + return sv_call_f32 (exp10f, x, sv_exp10f_inline (x, svptrue_b32 (), d), + special); +} + +/* Single-precision SVE exp10f routine. Implements the same algorithm + as AdvSIMD exp10f. + Worst case error is 1.02 ULPs. + _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1 + want 0x1.ba5f9cp-1. */ +svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t special = svacgt (pg, x, d->thres); + if (__glibc_unlikely (svptest_any (special, special))) + return special_case (x, special, d); + return sv_exp10f_inline (x, pg, d); +} diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c index 8a686e3e05..c6216bed9e 100644 --- a/sysdeps/aarch64/fpu/exp2f_sve.c +++ b/sysdeps/aarch64/fpu/exp2f_sve.c @@ -24,54 +24,64 @@ static const struct data { - float poly[5]; + float c0, c2, c4, c1, c3; float shift, thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. */ - .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f, - 0x1.59977ap-10f }, + /* Coefficients copied from the polynomial in AdvSIMD variant. */ + .c0 = 0x1.62e422p-1f, + .c1 = 0x1.ebf9bcp-3f, + .c2 = 0x1.c6bd32p-5f, + .c3 = 0x1.3ce9e4p-7f, + .c4 = 0x1.59977ap-10f, /* 1.5*2^17 + 127. */ - .shift = 0x1.903f8p17f, + .shift = 0x1.803f8p17f, /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled correctly by FEXPA. */ .thres = Thres, }; -static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) -{ - return sv_call_f32 (exp2f, x, y, special); -} - -/* Single-precision SVE exp2f routine. Implements the same algorithm - as AdvSIMD exp2f. - Worst case error is 1.04 ULPs. - SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0 - want 0x1.ba7ebp+0. */ -svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) +static inline svfloat32_t +sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) { - const struct data *d = ptr_barrier (&data); /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = n + r, with r in [-1/2, 1/2]. */ - svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svadd_x (pg, x, shift); - svfloat32_t n = svsub_x (pg, z, shift); - svfloat32_t r = svsub_x (pg, x, n); + svfloat32_t z = svadd_x (svptrue_b32 (), x, d->shift); + svfloat32_t n = svsub_x (svptrue_b32 (), z, d->shift); + svfloat32_t r = svsub_x (svptrue_b32 (), x, n); - svbool_t special = svacgt (pg, x, d->thres); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* Polynomial evaluation: poly(r) ~ exp2(r)-1. Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for coefficients 1 to 4, and apply most significant coefficient directly. */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1); - svfloat32_t p0 = svmul_x (pg, r, d->poly[0]); + svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2); + svfloat32_t p14 = svmla_x (pg, p12, r2, p34); + svfloat32_t p0 = svmul_lane (r, even_coeffs, 0); svfloat32_t poly = svmla_x (pg, p0, r2, p14); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (pg, scale, scale, poly), special); - return svmla_x (pg, scale, scale, poly); } + +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + return sv_call_f32 (exp2f, x, sv_exp2f_inline (x, svptrue_b32 (), d), + special); +} + +/* Single-precision SVE exp2f routine. Implements the same algorithm + as AdvSIMD exp2f. + Worst case error is 1.04 ULPs. + _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1 + want 0x1.ba6a64p-1. */ +svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t special = svacgt (pg, x, d->thres); + if (__glibc_unlikely (svptest_any (special, special))) + return special_case (x, special, d); + return sv_exp2f_inline (x, pg, d); +} diff --git a/sysdeps/aarch64/fpu/expf_sve.c b/sysdeps/aarch64/fpu/expf_sve.c index 3ba79bc4f1..da93e01b87 100644 --- a/sysdeps/aarch64/fpu/expf_sve.c +++ b/sysdeps/aarch64/fpu/expf_sve.c @@ -18,33 +18,25 @@ . */ #include "sv_math.h" +#include "sv_expf_inline.h" + +/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled + correctly by FEXPA. */ +#define Thres 0x1.5d5e2ap+6f static const struct data { - float poly[5]; - float inv_ln2, ln2_hi, ln2_lo, shift, thres; + struct sv_expf_data d; + float thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. */ - .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, - 0x1.0e4020p-7f }, - .inv_ln2 = 0x1.715476p+0f, - .ln2_hi = 0x1.62e4p-1f, - .ln2_lo = 0x1.7f7d1cp-20f, - /* 1.5*2^17 + 127. */ - .shift = 0x1.903f8p17f, - /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled - correctly by FEXPA. */ - .thres = 0x1.5d5e2ap+6f, + .d = SV_EXPF_DATA, + .thres = Thres, }; -#define C(i) sv_f32 (d->poly[i]) -#define ExponentBias 0x3f800000 - static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d) { - return sv_call_f32 (expf, x, y, special); + return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special); } /* Optimised single-precision SVE exp function. @@ -54,36 +46,8 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - - /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - - /* Load some constants in quad-word chunks to minimise memory access (last - lane is wasted). */ - svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2); - - /* n = round(x/(ln2/N)). */ - svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0); - svfloat32_t n = svsub_x (pg, z, d->shift); - - /* r = x - n*ln2/N. */ - svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1); - r = svmls_lane (r, n, invln2_and_ln2, 2); - - /* scale = 2^(n/N). */ svbool_t is_special_case = svacgt (pg, x, d->thres); - svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - - /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ - svfloat32_t p12 = svmla_x (pg, C (1), C (2), r); - svfloat32_t p34 = svmla_x (pg, C (3), C (4), r); - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_x (pg, r, C (0)); - svfloat32_t poly = svmla_x (pg, p0, r2, p14); - if (__glibc_unlikely (svptest_any (pg, is_special_case))) - return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case); - - return svmla_x (pg, scale, scale, poly); + return special_case (x, is_special_case, &d->d); + return expf_inline (x, pg, &d->d); } diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h index 23963b5f8e..6166df6553 100644 --- a/sysdeps/aarch64/fpu/sv_expf_inline.h +++ b/sysdeps/aarch64/fpu/sv_expf_inline.h @@ -24,19 +24,20 @@ struct sv_expf_data { - float poly[5]; - float inv_ln2, ln2_hi, ln2_lo, shift; + float c1, c3, inv_ln2; + float ln2_lo, c0, c2, c4; + float ln2_hi, shift; }; /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for compatibility with polynomial helpers. Shift is 1.5*2^17 + 127. */ #define SV_EXPF_DATA \ { \ - .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \ - 0x1.0e4020p-7f }, \ - \ - .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \ - .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \ + /* Coefficients copied from the polynomial in AdvSIMD variant. */ \ + .c0 = 0x1.ffffecp-1f, .c1 = 0x1.fffdb6p-2f, .c2 = 0x1.555e66p-3f, \ + .c3 = 0x1.573e2ep-5f, .c4 = 0x1.0e4020p-7f, .inv_ln2 = 0x1.715476p+0f, \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ + .shift = 0x1.803f8p17f, \ } #define C(i) sv_f32 (d->poly[i]) @@ -47,26 +48,25 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - /* Load some constants in quad-word chunks to minimise memory access. */ - svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_lo); /* n = round(x/(ln2/N)). */ - svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1); + svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift); svfloat32_t n = svsub_x (pg, z, d->shift); /* r = x - n*ln2/N. */ - svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2); - r = svmls_lane (r, n, c4_invln2_and_ln2, 3); + svfloat32_t r = svmsb_x (pg, sv_f32 (d->ln2_hi), n, x); + r = svmls_lane (r, n, lane_consts, 0); /* scale = 2^(n/N). */ - svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z)); + svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ - svfloat32_t p12 = svmla_x (pg, C (1), C (2), r); - svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0); - svfloat32_t r2 = svmul_f32_x (pg, r, r); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_f32_x (pg, r, C (0)); + svfloat32_t p0 = svmul_lane (r, lane_consts, 1); svfloat32_t poly = svmla_x (pg, p0, r2, p14); return svmla_x (pg, scale, scale, poly); commit c4373426e3a85ec483a0f412c2a7c6cdfa32ccdb Author: Joe Ramsay Date: Mon Sep 23 15:30:20 2024 +0100 AArch64: Improve codegen in SVE F32 logs Reduce MOVPRFXs by using unpredicated (non-destructive) instructions where possible. Similar to the recent change to AdvSIMD F32 logs, adjust special-case arguments and bounds to allow for more optimal register usage. For all 3 routines one MOVPRFX remains in the reduction, which cannot be avoided as immediate AND and ASR are both destructive. Reviewed-by: Wilco Dijkstra (cherry picked from commit a15b1394b5eba98ffe28a02a392b587e4fe13c0d) diff --git a/sysdeps/aarch64/fpu/log10f_sve.c b/sysdeps/aarch64/fpu/log10f_sve.c index bdbb49cd32..7913679f67 100644 --- a/sysdeps/aarch64/fpu/log10f_sve.c +++ b/sysdeps/aarch64/fpu/log10f_sve.c @@ -24,6 +24,7 @@ static const struct data float poly_0246[4]; float poly_1357[4]; float ln2, inv_ln10; + uint32_t off, lower; } data = { .poly_1357 = { /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs @@ -35,18 +36,23 @@ static const struct data -0x1.0fc92cp-4f }, .ln2 = 0x1.62e43p-1f, .inv_ln10 = 0x1.bcb7b2p-2f, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min 0x00800000 -#define Max 0x7f800000 -#define Thres 0x7f000000 /* Max - Min. */ -#define Offset 0x3f2aaaab /* 0.666667. */ +#define Thres 0x7f000000 /* asuint32(inf) - 0x00800000. */ #define MantissaMask 0x007fffff static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (log10f, x, y, special); + return sv_call_f32 ( + log10f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE log10f using the same algorithm and @@ -57,23 +63,25 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t ix = svreinterpret_u32 (x); - svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres); + + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thres); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - ix = svsub_x (pg, ix, Offset); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend. */ - ix = svand_x (pg, ix, MantissaMask); - ix = svadd_x (pg, ix, Offset); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* signextend. */ + svuint32_t ix = svand_x (pg, u_off, MantissaMask); + ix = svadd_x (pg, ix, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f); /* y = log10(1+r) + n*log10(2) log10(1+r) ~ r * InvLn(10) + P(r) where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t r4 = svmul_x (pg, r2, r2); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t r4 = svmul_x (svptrue_b32 (), r2, r2); svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]); svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0); svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1); @@ -88,7 +96,6 @@ svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg) hi = svmul_x (pg, hi, d->inv_ln10); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y), - special); - return svmla_x (pg, hi, r2, y); + return special_case (u_off, hi, r2, y, special); + return svmla_x (svptrue_b32 (), hi, r2, y); } diff --git a/sysdeps/aarch64/fpu/log2f_sve.c b/sysdeps/aarch64/fpu/log2f_sve.c index 5031c42483..939d89bfb9 100644 --- a/sysdeps/aarch64/fpu/log2f_sve.c +++ b/sysdeps/aarch64/fpu/log2f_sve.c @@ -23,6 +23,7 @@ static const struct data { float poly_02468[5]; float poly_1357[4]; + uint32_t off, lower; } data = { .poly_1357 = { /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs @@ -32,18 +33,23 @@ static const struct data }, .poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f, 0x1.9d8ecap-3f, 0x1.9e495p-3f }, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min (0x00800000) -#define Max (0x7f800000) -#define Thres (0x7f000000) /* Max - Min. */ +#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */ #define MantissaMask (0x007fffff) -#define Off (0x3f2aaaab) /* 0.666667. */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (log2f, x, y, cmp); + return sv_call_f32 ( + log2f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE log2f, using the same algorithm @@ -55,19 +61,20 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t u = svreinterpret_u32 (x); - svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres); + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = svsub_x (pg, u, Off); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */ - u = svand_x (pg, u, MantissaMask); - u = svadd_x (pg, u, Off); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */ + svuint32_t u = svand_x (pg, u_off, MantissaMask); + u = svadd_x (pg, u, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f); /* y = log2(1+r) + n. */ - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); /* Evaluate polynomial using pairwise Horner scheme. */ svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]); @@ -81,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg) y = svmla_x (pg, q_01, r2, y); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special); - return svmla_x (pg, n, r, y); + return special_case (u_off, n, r, y, special); + return svmla_x (svptrue_b32 (), n, r, y); } diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c index d64e810cfe..5b9324678d 100644 --- a/sysdeps/aarch64/fpu/logf_sve.c +++ b/sysdeps/aarch64/fpu/logf_sve.c @@ -24,6 +24,7 @@ static const struct data float poly_0135[4]; float poly_246[3]; float ln2; + uint32_t off, lower; } data = { .poly_0135 = { /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so @@ -32,19 +33,24 @@ static const struct data -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f }, .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f }, - .ln2 = 0x1.62e43p-1f + .ln2 = 0x1.62e43p-1f, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min (0x00800000) -#define Max (0x7f800000) -#define Thresh (0x7f000000) /* Max - Min. */ +#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */ #define Mask (0x007fffff) -#define Off (0x3f2aaaab) /* 0.666667. */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (logf, x, y, cmp); + return sv_call_f32 ( + logf, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE logf, using the same algorithm and @@ -55,19 +61,21 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t u = svreinterpret_u32 (x); - svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh); + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t cmp = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = svsub_x (pg, u, Off); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */ - u = svand_x (pg, u, Mask); - u = svadd_x (pg, u, Off); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */ + + svuint32_t u = svand_x (pg, u_off, Mask); + u = svadd_x (pg, u, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f); /* y = log(1+r) + n*ln2. */ - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))). */ svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]); svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1); @@ -80,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg) p = svmla_x (pg, r, n, d->ln2); if (__glibc_unlikely (svptest_any (pg, cmp))) - return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp); + return special_case (u_off, p, r2, y, cmp); return svmla_x (pg, p, r2, y); } commit 520240173029fd03388ec01db9a5359291cbbd27 Author: Joe Ramsay Date: Mon Sep 23 15:32:14 2024 +0100 AArch64: Improve codegen in users of AdvSIMD log1pf helper log1pf is quite register-intensive - use fewer registers for the polynomial, and make various changes to shorten dependency chains in parent routines. There is now no spilling with GCC 14. Accuracy moves around a little - comments adjusted accordingly but does not require regen-ulps. Use the helper in log1pf as well, instead of having separate implementations. The more accurate polynomial means special-casing can be simplified, and the shorter dependency chain avoids the usual dance around v0, which is otherwise difficult. There is a small duplication of vectors containing 1.0f (or 0x3f800000) - GCC is not currently able to efficiently handle values which fit in FMOV but not MOVI, and are reinterpreted to integer. There may be potential for more optimisation if this is fixed. Reviewed-by: Wilco Dijkstra (cherry picked from commit 5bc100bd4b7e00db3009ae93d25d303341545d23) diff --git a/sysdeps/aarch64/fpu/acoshf_advsimd.c b/sysdeps/aarch64/fpu/acoshf_advsimd.c index 8916dcbf40..004474acf9 100644 --- a/sysdeps/aarch64/fpu/acoshf_advsimd.c +++ b/sysdeps/aarch64/fpu/acoshf_advsimd.c @@ -25,35 +25,32 @@ const static struct data { struct v_log1pf_data log1pf_consts; uint32x4_t one; - uint16x4_t thresh; -} data = { - .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, - .one = V4 (0x3f800000), - .thresh = V4 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */ -}; +} data = { .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .one = V4 (0x3f800000) }; + +#define Thresh vdup_n_u16 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */ static float32x4_t NOINLINE VPCS_ATTR special_case (float32x4_t x, float32x4_t y, uint16x4_t special, - const struct v_log1pf_data d) + const struct v_log1pf_data *d) { return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special)); } /* Vector approximation for single-precision acosh, based on log1p. Maximum error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it - is 2.78 ULP: - __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 - want 0x1.ef9ea2p-3. + is 3.00 ULP: + _ZGVnN4v_acoshf(0x1.01df3ap+0) got 0x1.ef0a82p-4 + want 0x1.ef0a7cp-4. With exceptions disabled, we can compute u with a shorter dependency chain, - which gives maximum error of 3.07 ULP: - __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4 - want 0x1.fbc7f4p-4. */ + which gives maximum error of 3.22 ULP: + _ZGVnN4v_acoshf(0x1.007ef2p+0) got 0x1.fdcdccp-5 + want 0x1.fdcdd2p-5. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); uint32x4_t ix = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh); + uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), Thresh); #if WANT_SIMD_EXCEPT /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use @@ -64,15 +61,16 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x) float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p); float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1); #else - float32x4_t xm1 = vsubq_f32 (x, v_f32 (1)); - float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f))); + float32x4_t xm1 = vsubq_f32 (x, vreinterpretq_f32_u32 (d->one)); + float32x4_t u + = vmulq_f32 (xm1, vaddq_f32 (x, vreinterpretq_f32_u32 (d->one))); #endif float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u)); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, y, special, d->log1pf_consts); - return log1pf_inline (y, d->log1pf_consts); + return special_case (x, y, special, &d->log1pf_consts); + return log1pf_inline (y, &d->log1pf_consts); } libmvec_hidden_def (V_NAME_F1 (acosh)) HALF_WIDTH_ALIAS_F1 (acosh) diff --git a/sysdeps/aarch64/fpu/asinhf_advsimd.c b/sysdeps/aarch64/fpu/asinhf_advsimd.c index 09fd8a6143..eb789b91b6 100644 --- a/sysdeps/aarch64/fpu/asinhf_advsimd.c +++ b/sysdeps/aarch64/fpu/asinhf_advsimd.c @@ -20,16 +20,16 @@ #include "v_math.h" #include "v_log1pf_inline.h" -#define SignMask v_u32 (0x80000000) - const static struct data { struct v_log1pf_data log1pf_consts; + float32x4_t one; uint32x4_t big_bound; #if WANT_SIMD_EXCEPT uint32x4_t tiny_bound; #endif } data = { + .one = V4 (1), .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .big_bound = V4 (0x5f800000), /* asuint(0x1p64). */ #if WANT_SIMD_EXCEPT @@ -38,20 +38,27 @@ const static struct data }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t sign, float32x4_t y, + uint32x4_t special, const struct data *d) { - return v_call_f32 (asinhf, x, y, special); + return v_call_f32 ( + asinhf, x, + vreinterpretq_f32_u32 (veorq_u32 ( + sign, vreinterpretq_u32_f32 (log1pf_inline (y, &d->log1pf_consts)))), + special); } /* Single-precision implementation of vector asinh(x), using vector log1p. - Worst-case error is 2.66 ULP, at roughly +/-0.25: - __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */ + Worst-case error is 2.59 ULP: + _ZGVnN4v_asinhf(0x1.d86124p-3) got 0x1.d449bep-3 + want 0x1.d449c4p-3. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x) { const struct data *dat = ptr_barrier (&data); - uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask); - float32x4_t ax = vreinterpretq_f32_u32 (iax); + float32x4_t ax = vabsq_f32 (x); + uint32x4_t iax = vreinterpretq_u32_f32 (ax); uint32x4_t special = vcgeq_u32 (iax, dat->big_bound); + uint32x4_t sign = veorq_u32 (vreinterpretq_u32_f32 (x), iax); float32x4_t special_arg = x; #if WANT_SIMD_EXCEPT @@ -68,13 +75,13 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x) /* asinh(x) = log(x + sqrt(x * x + 1)). For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */ float32x4_t d - = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x))); - float32x4_t y = log1pf_inline ( - vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts); + = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (dat->one, ax, ax))); + float32x4_t y = vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)); if (__glibc_unlikely (v_any_u32 (special))) - return special_case (special_arg, vbslq_f32 (SignMask, x, y), special); - return vbslq_f32 (SignMask, x, y); + return special_case (special_arg, sign, y, special, dat); + return vreinterpretq_f32_u32 (veorq_u32 ( + sign, vreinterpretq_u32_f32 (log1pf_inline (y, &dat->log1pf_consts)))); } libmvec_hidden_def (V_NAME_F1 (asinh)) HALF_WIDTH_ALIAS_F1 (asinh) diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c index ae488f7b54..818b6c92ad 100644 --- a/sysdeps/aarch64/fpu/atanhf_advsimd.c +++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c @@ -40,15 +40,17 @@ const static struct data #define Half v_u32 (0x3f000000) static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y, + uint32x4_t special) { - return v_call_f32 (atanhf, x, y, special); + return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign), + vmulq_f32 (halfsign, y), special); } /* Approximation for vector single-precision atanh(x) using modified log1p. - The maximum error is 3.08 ULP: - __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5 - want 0x1.ffcb82p-5. */ + The maximum error is 2.93 ULP: + _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5 + want 0x1.f4dcf8p-5. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); @@ -68,11 +70,19 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x) uint32x4_t special = vcgeq_u32 (iax, d->one); #endif - float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax)); - y = log1pf_inline (y, d->log1pf_consts); + float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), + vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax)); + y = log1pf_inline (y, &d->log1pf_consts); + /* If exceptions not required, pass ax to special-case for shorter dependency + chain. If exceptions are required ax will have been zerofied, so have to + pass x. */ if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vmulq_f32 (halfsign, y), special); +#if WANT_SIMD_EXCEPT + return special_case (x, halfsign, y, special); +#else + return special_case (ax, halfsign, y, special); +#endif return vmulq_f32 (halfsign, y); } libmvec_hidden_def (V_NAME_F1 (atanh)) diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c index 8cfa28fb8a..00006fc703 100644 --- a/sysdeps/aarch64/fpu/log1pf_advsimd.c +++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c @@ -18,114 +18,79 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "v_log1pf_inline.h" + +#if WANT_SIMD_EXCEPT const static struct data { - float32x4_t poly[8], ln2; - uint32x4_t tiny_bound, minus_one, four, thresh; - int32x4_t three_quarters; + uint32x4_t minus_one, thresh; + struct v_log1pf_data d; } data = { - .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients - (1, -0.5) are not stored as they can be generated more - efficiently. */ - V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), - V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), - V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, - .ln2 = V4 (0x1.62e43p-1f), - .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */ - .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound. */ + .d = V_LOG1PF_CONSTANTS_TABLE, + .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - TinyBound. */ .minus_one = V4 (0xbf800000), - .four = V4 (0x40800000), - .three_quarters = V4 (0x3f400000) }; -static inline float32x4_t -eval_poly (float32x4_t m, const float32x4_t *p) -{ - /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme. */ - float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]); - float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]); - float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]); - float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]); - - float32x4_t m2 = vmulq_f32 (m, m); - float32x4_t p_02 = vfmaq_f32 (m, m2, p_12); - float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56); - float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]); - - float32x4_t m4 = vmulq_f32 (m2, m2); - float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36); - return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79)); -} +/* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */ +# define TinyBound v_u32 (0x34000000) static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t cmp, const struct data *d) { - return v_call_f32 (log1pf, x, y, special); + /* Side-step special lanes so fenv exceptions are not triggered + inadvertently. */ + float32x4_t x_nospecial = v_zerofy_f32 (x, cmp); + return v_call_f32 (log1pf, x, log1pf_inline (x_nospecial, &d->d), cmp); } -/* Vector log1pf approximation using polynomial on reduced interval. Accuracy - is roughly 2.02 ULP: - log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */ +/* Vector log1pf approximation using polynomial on reduced interval. Worst-case + error is 1.69 ULP: + _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3 + want 0x1.cfcbdcp-3. */ VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t ix = vreinterpretq_u32_f32 (x); uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x)); + uint32x4_t special_cases - = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh), + = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, TinyBound), d->thresh), vcgeq_u32 (ix, d->minus_one)); - float32x4_t special_arg = x; -#if WANT_SIMD_EXCEPT if (__glibc_unlikely (v_any_u32 (special_cases))) - /* Side-step special lanes so fenv exceptions are not triggered - inadvertently. */ - x = v_zerofy_f32 (x, special_cases); -#endif + return special_case (x, special_cases, d); - /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m - is in [-0.25, 0.5]): - log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). - - We approximate log1p(m) with a polynomial, then scale by - k*log(2). Instead of doing this directly, we use an intermediate - scale factor s = 4*k*log(2) to ensure the scale is representable - as a normalised fp32 number. */ + return log1pf_inline (x, &d->d); +} - float32x4_t m = vaddq_f32 (x, v_f32 (1.0f)); +#else - /* Choose k to scale x to the range [-1/4, 1/2]. */ - int32x4_t k - = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters), - v_s32 (0xff800000)); - uint32x4_t ku = vreinterpretq_u32_s32 (k); +const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE; - /* Scale x by exponent manipulation. */ - float32x4_t m_scale - = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku)); +static float32x4_t NOINLINE VPCS_ATTR +special_case (float32x4_t x, uint32x4_t cmp) +{ + return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp); +} - /* Scale up to ensure that the scale factor is representable as normalised - fp32 number, and scale m down accordingly. */ - float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku)); - m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s)); +/* Vector log1pf approximation using polynomial on reduced interval. Worst-case + error is 1.63 ULP: + _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3 + want 0x1.fdcb16p-3. */ +VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) +{ + uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)), + vcaleq_f32 (x, v_f32 (0x1p127f))); - /* Evaluate polynomial on the reduced interval. */ - float32x4_t p = eval_poly (m_scale, d->poly); + if (__glibc_unlikely (v_any_u32 (special_cases))) + return special_case (x, special_cases); - /* The scale factor to be applied back at the end - by multiplying float(k) - by 2^-23 we get the unbiased exponent of k. */ - float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23)); + return log1pf_inline (x, ptr_barrier (&data)); +} - /* Apply the scaling back. */ - float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2); +#endif - if (__glibc_unlikely (v_any_u32 (special_cases))) - return special_case (special_arg, y, special_cases); - return y; -} libmvec_hidden_def (V_NAME_F1 (log1p)) HALF_WIDTH_ALIAS_F1 (log1p) strong_alias (V_NAME_F1 (log1p), V_NAME_F1 (logp1)) diff --git a/sysdeps/aarch64/fpu/v_log1pf_inline.h b/sysdeps/aarch64/fpu/v_log1pf_inline.h index 643a6cdcfc..73e45a942e 100644 --- a/sysdeps/aarch64/fpu/v_log1pf_inline.h +++ b/sysdeps/aarch64/fpu/v_log1pf_inline.h @@ -25,54 +25,81 @@ struct v_log1pf_data { - float32x4_t poly[8], ln2; uint32x4_t four; int32x4_t three_quarters; + float c0, c3, c5, c7; + float32x4_t c4, c6, c1, c2, ln2; }; /* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients (1, -0.5) are not stored as they can be generated more efficiently. */ #define V_LOG1PF_CONSTANTS_TABLE \ { \ - .poly \ - = { V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), \ - V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), \ - V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, \ - .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \ - .three_quarters = V4 (0x3f400000) \ + .c0 = 0x1.5555aap-2f, .c1 = V4 (-0x1.000038p-2f), \ + .c2 = V4 (0x1.99675cp-3f), .c3 = -0x1.54ef78p-3f, \ + .c4 = V4 (0x1.28a1f4p-3f), .c5 = -0x1.0da91p-3f, \ + .c6 = V4 (0x1.abcb6p-4f), .c7 = -0x1.6f0d5ep-5f, \ + .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \ + .three_quarters = V4 (0x3f400000) \ } static inline float32x4_t -eval_poly (float32x4_t m, const float32x4_t *c) +eval_poly (float32x4_t m, const struct v_log1pf_data *d) { - /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine - uses split Estrin, but this way reduces register pressure in the calling - routine). */ - float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]); + /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner. */ + float32x4_t c0357 = vld1q_f32 (&d->c0); + float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0); float32x4_t m2 = vmulq_f32 (m, m); - q = vfmaq_f32 (m, m2, q); - float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1); + float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1); + float32x4_t p = vfmaq_f32 (p45, m2, p67); + p = vfmaq_f32 (p23, m2, p); + p = vfmaq_f32 (d->c1, m, p); p = vmulq_f32 (m2, p); - return vfmaq_f32 (q, m2, p); + p = vfmaq_f32 (m, m2, p); + return vfmaq_f32 (p, m2, q); } static inline float32x4_t -log1pf_inline (float32x4_t x, const struct v_log1pf_data d) +log1pf_inline (float32x4_t x, const struct v_log1pf_data *d) { - /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no - special-case handling. See that file for details of the algorithm. */ + /* Helper for calculating log(x + 1). */ + + /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m + is in [-0.25, 0.5]): + log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). + + We approximate log1p(m) with a polynomial, then scale by + k*log(2). Instead of doing this directly, we use an intermediate + scale factor s = 4*k*log(2) to ensure the scale is representable + as a normalised fp32 number. */ float32x4_t m = vaddq_f32 (x, v_f32 (1.0f)); + + /* Choose k to scale x to the range [-1/4, 1/2]. */ int32x4_t k - = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters), + = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters), v_s32 (0xff800000)); uint32x4_t ku = vreinterpretq_u32_s32 (k); - float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku)); + + /* Scale up to ensure that the scale factor is representable as normalised + fp32 number, and scale m down accordingly. */ + float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku)); + + /* Scale x by exponent manipulation. */ float32x4_t m_scale = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku)); m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s)); - float32x4_t p = eval_poly (m_scale, d.poly); + + /* Evaluate polynomial on the reduced interval. */ + float32x4_t p = eval_poly (m_scale, d); + + /* The scale factor to be applied back at the end - by multiplying float(k) + by 2^-23 we get the unbiased exponent of k. */ float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f)); - return vfmaq_f32 (p, scale_back, d.ln2); + + /* Apply the scaling back. */ + return vfmaq_f32 (p, scale_back, d->ln2); } #endif commit a947a43b95bbea53ec50df058b42392fd5ea52b6 Author: Joe Ramsay Date: Mon Sep 23 15:32:53 2024 +0100 AArch64: Improve codegen in users of ADVSIMD expm1f helper Rearrange operations so MOV is not necessary in reduction or around the special-case handler. Reduce memory access by using more indexed MLAs in polynomial. Reviewed-by: Wilco Dijkstra (cherry picked from commit 7900ac490db32f6bccff812733f00280dde34e27) diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c index a0616ec754..8303ca296e 100644 --- a/sysdeps/aarch64/fpu/expm1f_advsimd.c +++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c @@ -18,27 +18,18 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "v_expm1f_inline.h" static const struct data { - float32x4_t poly[5]; - float invln2_and_ln2[4]; - float32x4_t shift; - int32x4_t exponent_bias; + struct v_expm1f_data d; #if WANT_SIMD_EXCEPT uint32x4_t thresh; #else float32x4_t oflow_bound; #endif } data = { - /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */ - .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), - V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, - /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, - .shift = V4 (0x1.8p23f), - .exponent_bias = V4 (0x3f800000), + .d = V_EXPM1F_DATA, #if !WANT_SIMD_EXCEPT /* Value above which expm1f(x) should overflow. Absolute value of the underflow bound is greater than this, so it catches both cases - there is @@ -55,67 +46,38 @@ static const struct data #define TinyBound v_u32 (0x34000000 << 1) static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t special, const struct data *d) { - return v_call_f32 (expm1f, x, y, special); + return v_call_f32 ( + expm1f, x, expm1f_inline (v_zerofy_f32 (x, special), &d->d), special); } /* Single-precision vector exp(x) - 1 function. - The maximum error is 1.51 ULP: - _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2 - want 0x1.e2fb94p-2. */ + The maximum error is 1.62 ULP: + _ZGVnN4v_expm1f(0x1.85f83p-2) got 0x1.da9f4p-2 + want 0x1.da9f44p-2. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t ix = vreinterpretq_u32_f32 (x); #if WANT_SIMD_EXCEPT + uint32x4_t ix = vreinterpretq_u32_f32 (x); /* If fp exceptions are to be triggered correctly, fall back to scalar for |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for shift-left by 1, and compare with thresh which was left-shifted offline - this is effectively an absolute compare. */ uint32x4_t special = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh); - if (__glibc_unlikely (v_any_u32 (special))) - x = v_zerofy_f32 (x, special); #else /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */ uint32x4_t special = vcagtq_f32 (x, d->oflow_bound); #endif - /* Reduce argument to smaller range: - Let i = round(x / ln2) - and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. - exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 - where 2^i is exact because i is an integer. */ - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - float32x4_t j - = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift); - int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1); - f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2); - - /* Approximate expm1(f) using polynomial. - Taylor expansion for expm1(x) has the form: - x + ax^2 + bx^3 + cx^4 .... - So we calculate the polynomial P(f) = a + bf + cf^2 + ... - and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - float32x4_t p = v_horner_4_f32 (f, d->poly); - p = vfmaq_f32 (f, vmulq_f32 (f, f), p); - - /* Assemble the result. - expm1(x) ~= 2^i * (p + 1) - 1 - Let t = 2^i. */ - int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias); - float32x4_t t = vreinterpretq_f32_s32 (u); - if (__glibc_unlikely (v_any_u32 (special))) - return special_case (vreinterpretq_f32_u32 (ix), - vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t), - special); + return special_case (x, special, d); /* expm1(x) ~= p * t + (t - 1). */ - return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t); + return expm1f_inline (x, &d->d); } libmvec_hidden_def (V_NAME_F1 (expm1)) HALF_WIDTH_ALIAS_F1 (expm1) diff --git a/sysdeps/aarch64/fpu/sinhf_advsimd.c b/sysdeps/aarch64/fpu/sinhf_advsimd.c index 6bb7482dc2..c6ed7598e7 100644 --- a/sysdeps/aarch64/fpu/sinhf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinhf_advsimd.c @@ -23,15 +23,13 @@ static const struct data { struct v_expm1f_data expm1f_consts; - uint32x4_t halff; #if WANT_SIMD_EXCEPT uint32x4_t tiny_bound, thresh; #else - uint32x4_t oflow_bound; + float32x4_t oflow_bound; #endif } data = { .expm1f_consts = V_EXPM1F_DATA, - .halff = V4 (0x3f000000), #if WANT_SIMD_EXCEPT /* 0x1.6a09e8p-32, below which expm1f underflows. */ .tiny_bound = V4 (0x2fb504f4), @@ -39,14 +37,15 @@ static const struct data .thresh = V4 (0x12fbbbb3), #else /* 0x1.61814ep+6, above which expm1f helper overflows. */ - .oflow_bound = V4 (0x42b0c0a7), + .oflow_bound = V4 (0x1.61814ep+6), #endif }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t t, float32x4_t halfsign, + uint32x4_t special) { - return v_call_f32 (sinhf, x, y, special); + return v_call_f32 (sinhf, x, vmulq_f32 (t, halfsign), special); } /* Approximation for vector single-precision sinh(x) using expm1. @@ -60,15 +59,15 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x) uint32x4_t ix = vreinterpretq_u32_f32 (x); float32x4_t ax = vabsq_f32 (x); - uint32x4_t iax = vreinterpretq_u32_f32 (ax); - uint32x4_t sign = veorq_u32 (ix, iax); - float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff)); + float32x4_t halfsign = vreinterpretq_f32_u32 ( + vbslq_u32 (v_u32 (0x80000000), ix, vreinterpretq_u32_f32 (v_f32 (0.5)))); #if WANT_SIMD_EXCEPT - uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh); + uint32x4_t special = vcgeq_u32 ( + vsubq_u32 (vreinterpretq_u32_f32 (ax), d->tiny_bound), d->thresh); ax = v_zerofy_f32 (ax, special); #else - uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound); + uint32x4_t special = vcageq_f32 (x, d->oflow_bound); #endif /* Up to the point that expm1f overflows, we can use it to calculate sinhf @@ -80,7 +79,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x) /* Fall back to the scalar variant for any lanes that should trigger an exception. */ if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vmulq_f32 (t, halfsign), special); + return special_case (x, t, halfsign, special); return vmulq_f32 (t, halfsign); } diff --git a/sysdeps/aarch64/fpu/tanhf_advsimd.c b/sysdeps/aarch64/fpu/tanhf_advsimd.c index 50defd6ef0..3ced9b7a41 100644 --- a/sysdeps/aarch64/fpu/tanhf_advsimd.c +++ b/sysdeps/aarch64/fpu/tanhf_advsimd.c @@ -28,13 +28,16 @@ static const struct data /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */ .boring_bound = V4 (0x41102cb3), .large_bound = V4 (0x7f800000), - .onef = V4 (0x3f800000), }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t is_boring, float32x4_t boring, + float32x4_t q, uint32x4_t special) { - return v_call_f32 (tanhf, x, y, special); + return v_call_f32 ( + tanhf, x, + vbslq_f32 (is_boring, boring, vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)))), + special); } /* Approximation for single-precision vector tanh(x), using a simplified @@ -50,7 +53,9 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x) uint32x4_t iax = vreinterpretq_u32_f32 (ax); uint32x4_t sign = veorq_u32 (ix, iax); uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound); - float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef)); + /* expm1 exponent bias is 1.0f reinterpreted to int. */ + float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 ( + sign, vreinterpretq_u32_s32 (d->expm1f_consts.exponent_bias))); #if WANT_SIMD_EXCEPT /* If fp exceptions are to be triggered properly, set all special and boring @@ -66,10 +71,12 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x) /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts); - float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0))); + if (__glibc_unlikely (v_any_u32 (special))) - return special_case (vreinterpretq_f32_u32 (ix), - vbslq_f32 (is_boring, boring, y), special); + return special_case (vreinterpretq_f32_u32 (ix), is_boring, boring, q, + special); + + float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0))); return vbslq_f32 (is_boring, boring, y); } libmvec_hidden_def (V_NAME_F1 (tanh)) diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h index 59b552da6b..1daedfdd51 100644 --- a/sysdeps/aarch64/fpu/v_expm1f_inline.h +++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h @@ -21,48 +21,47 @@ #define AARCH64_FPU_V_EXPM1F_INLINE_H #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "math_config.h" struct v_expm1f_data { - float32x4_t poly[5]; - float invln2_and_ln2[4]; - float32x4_t shift; + float32x4_t c0, c2; int32x4_t exponent_bias; + float c1, c3, inv_ln2, c4; + float ln2_hi, ln2_lo; }; /* Coefficients generated using fpminimax with degree=5 in [-log(2)/2, - log(2)/2]. Exponent bias is asuint(1.0f). - invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0. */ + log(2)/2]. Exponent bias is asuint(1.0f). */ #define V_EXPM1F_DATA \ { \ - .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), \ - V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, \ - .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000), \ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \ + .c0 = V4 (0x1.fffffep-2), .c1 = 0x1.5554aep-3, .c2 = V4 (0x1.555736p-5), \ + .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \ + .exponent_bias = V4 (0x3f800000), .inv_ln2 = 0x1.715476p+0f, \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ } static inline float32x4_t expm1f_inline (float32x4_t x, const struct v_expm1f_data *d) { - /* Helper routine for calculating exp(x) - 1. - Copied from v_expm1f_1u6.c, with all special-case handling removed - the - calling routine should handle special values if required. */ + /* Helper routine for calculating exp(x) - 1. */ + + float32x2_t ln2 = vld1_f32 (&d->ln2_hi); + float32x4_t lane_consts = vld1q_f32 (&d->c1); /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - float32x4_t j - = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift); + float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2)); int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1); - f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2); + float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0); + f = vfmsq_lane_f32 (f, j, ln2, 1); - /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). - Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses - Horner. */ + /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). */ float32x4_t f2 = vmulq_f32 (f, f); float32x4_t f4 = vmulq_f32 (f2, f2); - float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1); + float32x4_t p = vfmaq_f32 (p01, f2, p23); + p = vfmaq_laneq_f32 (p, f4, lane_consts, 3); p = vfmaq_f32 (f, f2, p); /* t = 2^i. */ commit 68f2eb20de698675ddc74068c2cd03fee29207df Author: Joe Ramsay Date: Mon Sep 23 15:33:31 2024 +0100 AArch64: Simplify rounding-multiply pattern in several AdvSIMD routines This operation can be simplified to use simpler multiply-round-convert sequence, which uses fewer instructions and constants. Reviewed-by: Wilco Dijkstra (cherry picked from commit 16a59571e4e9fd019d3fc23a2e7d73c1df8bb5cb) diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c index 3924c9ce44..11a89b1530 100644 --- a/sysdeps/aarch64/fpu/cos_advsimd.c +++ b/sysdeps/aarch64/fpu/cos_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float64x2_t poly[7]; - float64x2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3; + float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* Worst-case error is 3.3 ulp in [-pi/2, pi/2]. */ .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7), @@ -30,11 +30,9 @@ static const struct data V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33), V2 (-0x1.9e9540300a1p-41) }, .inv_pi = V2 (0x1.45f306dc9c883p-2), - .half_pi = V2 (0x1.921fb54442d18p+0), .pi_1 = V2 (0x1.921fb54442d18p+1), .pi_2 = V2 (0x1.1a62633145c06p-53), .pi_3 = V2 (0x1.c1cd129024e09p-106), - .shift = V2 (0x1.8p52), .range_val = V2 (0x1p23) }; @@ -68,10 +66,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x) #endif /* n = rint((|x|+pi/2)/pi) - 0.5. */ - n = vfmaq_f64 (d->shift, d->inv_pi, vaddq_f64 (r, d->half_pi)); - odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63); - n = vsubq_f64 (n, d->shift); - n = vsubq_f64 (n, v_f64 (0.5)); + n = vrndaq_f64 (vfmaq_f64 (v_f64 (0.5), r, d->inv_pi)); + odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63); + n = vsubq_f64 (n, v_f64 (0.5f)); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f64 (r, d->pi_1, n); diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c index d0c285b03a..85a1b37373 100644 --- a/sysdeps/aarch64/fpu/cosf_advsimd.c +++ b/sysdeps/aarch64/fpu/cosf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[4]; - float32x4_t range_val, inv_pi, half_pi, shift, pi_1, pi_2, pi_3; + float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* 1.886 ulp error. */ .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f), @@ -33,8 +33,6 @@ static const struct data .pi_3 = V4 (-0x1.ee59dap-49f), .inv_pi = V4 (0x1.45f306p-2f), - .shift = V4 (0x1.8p+23f), - .half_pi = V4 (0x1.921fb6p0f), .range_val = V4 (0x1p20f) }; @@ -69,9 +67,8 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x) #endif /* n = rint((|x|+pi/2)/pi) - 0.5. */ - n = vfmaq_f32 (d->shift, d->inv_pi, vaddq_f32 (r, d->half_pi)); - odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31); - n = vsubq_f32 (n, d->shift); + n = vrndaq_f32 (vfmaq_f32 (v_f32 (0.5), r, d->inv_pi)); + odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31); n = vsubq_f32 (n, v_f32 (0.5f)); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ diff --git a/sysdeps/aarch64/fpu/expf_advsimd.c b/sysdeps/aarch64/fpu/expf_advsimd.c index 99d2e647aa..5c9cb72620 100644 --- a/sysdeps/aarch64/fpu/expf_advsimd.c +++ b/sysdeps/aarch64/fpu/expf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[5]; - float32x4_t shift, inv_ln2, ln2_hi, ln2_lo; + float32x4_t inv_ln2, ln2_hi, ln2_lo; uint32x4_t exponent_bias; #if !WANT_SIMD_EXCEPT float32x4_t special_bound, scale_thresh; @@ -31,7 +31,6 @@ static const struct data /* maxerr: 1.45358 +0.5 ulp. */ .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, - .shift = V4 (0x1.8p23f), .inv_ln2 = V4 (0x1.715476p+0f), .ln2_hi = V4 (0x1.62e4p-1f), .ln2_lo = V4 (0x1.7f7d1cp-20f), @@ -85,7 +84,7 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, r, r2, scale, p, q, poly, z; + float32x4_t n, r, r2, scale, p, q, poly; uint32x4_t cmp, e; #if WANT_SIMD_EXCEPT @@ -104,11 +103,10 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - z = vfmaq_f32 (d->shift, x, d->inv_ln2); - n = vsubq_f32 (z, d->shift); + n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2)); r = vfmsq_f32 (x, n, d->ln2_hi); r = vfmsq_f32 (r, n, d->ln2_lo); - e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); + e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c index a0d9d3b819..718125cbad 100644 --- a/sysdeps/aarch64/fpu/sin_advsimd.c +++ b/sysdeps/aarch64/fpu/sin_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float64x2_t poly[7]; - float64x2_t range_val, inv_pi, shift, pi_1, pi_2, pi_3; + float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7), V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19), @@ -34,12 +34,13 @@ static const struct data .pi_1 = V2 (0x1.921fb54442d18p+1), .pi_2 = V2 (0x1.1a62633145c06p-53), .pi_3 = V2 (0x1.c1cd129024e09p-106), - .shift = V2 (0x1.8p52), }; #if WANT_SIMD_EXCEPT -# define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255). */ -# define Thresh v_u64 (0x1160000000000000) /* RangeVal - TinyBound. */ +/* asuint64(0x1p-253)), below which multiply by inv_pi underflows. */ +# define TinyBound v_u64 (0x3020000000000000) +/* RangeVal - TinyBound. */ +# define Thresh v_u64 (0x1160000000000000) #endif #define C(i) d->poly[i] @@ -72,16 +73,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x) fenv). These lanes will be fixed by special-case handler later. */ uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x)); cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh); - r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x); + r = vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), cmp)); #else r = x; cmp = vcageq_f64 (x, d->range_val); #endif /* n = rint(|x|/pi). */ - n = vfmaq_f64 (d->shift, d->inv_pi, r); - odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63); - n = vsubq_f64 (n, d->shift); + n = vrndaq_f64 (vmulq_f64 (r, d->inv_pi)); + odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f64 (r, d->pi_1, n); diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c index 375dfc3331..6ee9a23d5b 100644 --- a/sysdeps/aarch64/fpu/sinf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[4]; - float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3; + float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* 1.886 ulp error. */ .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f), @@ -33,13 +33,14 @@ static const struct data .pi_3 = V4 (-0x1.ee59dap-49f), .inv_pi = V4 (0x1.45f306p-2f), - .shift = V4 (0x1.8p+23f), .range_val = V4 (0x1p20f) }; #if WANT_SIMD_EXCEPT -# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f). */ -# define Thresh v_u32 (0x28800000) /* RangeVal - TinyBound. */ +/* asuint32(0x1p-59f), below which multiply by inv_pi underflows. */ +# define TinyBound v_u32 (0x22000000) +/* RangeVal - TinyBound. */ +# define Thresh v_u32 (0x27800000) #endif #define C(i) d->poly[i] @@ -64,23 +65,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x) /* If fenv exceptions are to be triggered correctly, set any special lanes to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by special-case handler later. */ - r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x); + r = vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), cmp)); #else r = x; cmp = vcageq_f32 (x, d->range_val); #endif - /* n = rint(|x|/pi) */ - n = vfmaq_f32 (d->shift, d->inv_pi, r); - odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31); - n = vsubq_f32 (n, d->shift); + /* n = rint(|x|/pi). */ + n = vrndaq_f32 (vmulq_f32 (r, d->inv_pi)); + odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31); - /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */ + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f32 (r, d->pi_1, n); r = vfmsq_f32 (r, d->pi_2, n); r = vfmsq_f32 (r, d->pi_3, n); - /* y = sin(r) */ + /* y = sin(r). */ r2 = vmulq_f32 (r, r); y = vfmaq_f32 (C (2), C (3), r2); y = vfmaq_f32 (C (1), y, r2); commit 9ff7559b274eb0dbce2cbcf87284c1d30d47a2d6 Author: Joe Ramsay Date: Mon Oct 28 14:58:35 2024 +0000 AArch64: Small optimisation in AdvSIMD erf and erfc In both routines, reduce register pressure such that GCC 14 emits no spills for erf and fewer spills for erfc. Also use more efficient comparison for the special-case in erf. Benchtests show erf improves by 6.4%, erfc by 1.0%. (cherry picked from commit 1cf29fbc5be23db775d1dfa6b332ded6e6554252) diff --git a/sysdeps/aarch64/fpu/erf_advsimd.c b/sysdeps/aarch64/fpu/erf_advsimd.c index 19cbb7d0f4..c0116735e4 100644 --- a/sysdeps/aarch64/fpu/erf_advsimd.c +++ b/sysdeps/aarch64/fpu/erf_advsimd.c @@ -22,19 +22,21 @@ static const struct data { float64x2_t third; - float64x2_t tenth, two_over_five, two_over_fifteen; - float64x2_t two_over_nine, two_over_fortyfive; + float64x2_t tenth, two_over_five, two_over_nine; + double two_over_fifteen, two_over_fortyfive; float64x2_t max, shift; + uint64x2_t max_idx; #if WANT_SIMD_EXCEPT float64x2_t tiny_bound, huge_bound, scale_minus_one; #endif } data = { + .max_idx = V2 (768), .third = V2 (0x1.5555555555556p-2), /* used to compute 2/3 and 1/6 too. */ - .two_over_fifteen = V2 (0x1.1111111111111p-3), + .two_over_fifteen = 0x1.1111111111111p-3, .tenth = V2 (-0x1.999999999999ap-4), .two_over_five = V2 (-0x1.999999999999ap-2), .two_over_nine = V2 (-0x1.c71c71c71c71cp-3), - .two_over_fortyfive = V2 (0x1.6c16c16c16c17p-5), + .two_over_fortyfive = 0x1.6c16c16c16c17p-5, .max = V2 (5.9921875), /* 6 - 1/128. */ .shift = V2 (0x1p45), #if WANT_SIMD_EXCEPT @@ -87,8 +89,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) float64x2_t a = vabsq_f64 (x); /* Reciprocal conditions that do not catch NaNs so they can be used in BSLs to return expected results. */ - uint64x2_t a_le_max = vcleq_f64 (a, dat->max); - uint64x2_t a_gt_max = vcgtq_f64 (a, dat->max); + uint64x2_t a_le_max = vcaleq_f64 (x, dat->max); + uint64x2_t a_gt_max = vcagtq_f64 (x, dat->max); #if WANT_SIMD_EXCEPT /* |x| huge or tiny. */ @@ -115,7 +117,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) segfault. */ uint64x2_t i = vsubq_u64 (vreinterpretq_u64_f64 (z), vreinterpretq_u64_f64 (shift)); - i = vbslq_u64 (a_le_max, i, v_u64 (768)); + i = vbslq_u64 (a_le_max, i, dat->max_idx); struct entry e = lookup (i); float64x2_t r = vsubq_f64 (z, shift); @@ -125,14 +127,19 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) float64x2_t d2 = vmulq_f64 (d, d); float64x2_t r2 = vmulq_f64 (r, r); + float64x2_t two_over_fifteen_and_fortyfive + = vld1q_f64 (&dat->two_over_fifteen); + /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5. */ float64x2_t p1 = r; float64x2_t p2 = vfmsq_f64 (dat->third, r2, vaddq_f64 (dat->third, dat->third)); float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->third)); - float64x2_t p4 = vfmaq_f64 (dat->two_over_five, r2, dat->two_over_fifteen); + float64x2_t p4 = vfmaq_laneq_f64 (dat->two_over_five, r2, + two_over_fifteen_and_fortyfive, 0); p4 = vfmsq_f64 (dat->tenth, r2, p4); - float64x2_t p5 = vfmaq_f64 (dat->two_over_nine, r2, dat->two_over_fortyfive); + float64x2_t p5 = vfmaq_laneq_f64 (dat->two_over_nine, r2, + two_over_fifteen_and_fortyfive, 1); p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->third), r2, p5)); float64x2_t p34 = vfmaq_f64 (p3, d, p4); diff --git a/sysdeps/aarch64/fpu/erfc_advsimd.c b/sysdeps/aarch64/fpu/erfc_advsimd.c index f1b3bfe830..2f2f755c46 100644 --- a/sysdeps/aarch64/fpu/erfc_advsimd.c +++ b/sysdeps/aarch64/fpu/erfc_advsimd.c @@ -24,8 +24,8 @@ static const struct data { uint64x2_t offset, table_scale; float64x2_t max, shift; - float64x2_t p20, p40, p41, p42; - float64x2_t p51, p52; + float64x2_t p20, p40, p41, p51; + double p42, p52; double qr5[2], qr6[2], qr7[2], qr8[2], qr9[2]; #if WANT_SIMD_EXCEPT float64x2_t uflow_bound; @@ -41,9 +41,9 @@ static const struct data .p20 = V2 (0x1.5555555555555p-2), /* 1/3, used to compute 2/3 and 1/6. */ .p40 = V2 (-0x1.999999999999ap-4), /* 1/10. */ .p41 = V2 (-0x1.999999999999ap-2), /* 2/5. */ - .p42 = V2 (0x1.1111111111111p-3), /* 2/15. */ + .p42 = 0x1.1111111111111p-3, /* 2/15. */ .p51 = V2 (-0x1.c71c71c71c71cp-3), /* 2/9. */ - .p52 = V2 (0x1.6c16c16c16c17p-5), /* 2/45. */ + .p52 = 0x1.6c16c16c16c17p-5, /* 2/45. */ /* Qi = (i+1) / i, Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9. */ .qr5 = { 0x1.3333333333333p0, -0x1.e79e79e79e79ep-3 }, .qr6 = { 0x1.2aaaaaaaaaaabp0, -0x1.b6db6db6db6dbp-3 }, @@ -157,9 +157,10 @@ float64x2_t V_NAME_D1 (erfc) (float64x2_t x) float64x2_t p1 = r; float64x2_t p2 = vfmsq_f64 (dat->p20, r2, vaddq_f64 (dat->p20, dat->p20)); float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->p20)); - float64x2_t p4 = vfmaq_f64 (dat->p41, r2, dat->p42); + float64x2_t p42_p52 = vld1q_f64 (&dat->p42); + float64x2_t p4 = vfmaq_laneq_f64 (dat->p41, r2, p42_p52, 0); p4 = vfmsq_f64 (dat->p40, r2, p4); - float64x2_t p5 = vfmaq_f64 (dat->p51, r2, dat->p52); + float64x2_t p5 = vfmaq_laneq_f64 (dat->p51, r2, p42_p52, 1); p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5)); /* Compute p_i using recurrence relation: p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */ commit 76c923fe9d09befc8131205659d99cb9ac97460a Author: Joe Ramsay Date: Fri Nov 1 15:48:54 2024 +0000 AArch64: Remove SVE erf and erfc tables By using a combination of mask-and-add instead of the shift-based index calculation the routines can share the same table as other variants with no performance degradation. The tables change name because of other changes in downstream AOR. Reviewed-by: Wilco Dijkstra (cherry picked from commit 2d82d781a539ce8e82178fc1fa2c99ae1884e7fe) diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile index 234a6c457c..be8541f649 100644 --- a/sysdeps/aarch64/fpu/Makefile +++ b/sysdeps/aarch64/fpu/Makefile @@ -41,8 +41,6 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \ v_log10_data \ erf_data \ erff_data \ - sv_erf_data \ - sv_erff_data \ v_exp_tail_data \ erfc_data \ erfcf_data \ diff --git a/sysdeps/aarch64/fpu/erf_advsimd.c b/sysdeps/aarch64/fpu/erf_advsimd.c index c0116735e4..a48092e838 100644 --- a/sysdeps/aarch64/fpu/erf_advsimd.c +++ b/sysdeps/aarch64/fpu/erf_advsimd.c @@ -58,8 +58,8 @@ static inline struct entry lookup (uint64x2_t i) { struct entry e; - float64x2_t e1 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 0)].erf), - e2 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 1)].erf); + float64x2_t e1 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 0)].erf), + e2 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 1)].erf); e.erf = vuzp1q_f64 (e1, e2); e.scale = vuzp2q_f64 (e1, e2); return e; diff --git a/sysdeps/aarch64/fpu/erf_data.c b/sysdeps/aarch64/fpu/erf_data.c index 6d2dcd235c..ea01fad7ca 100644 --- a/sysdeps/aarch64/fpu/erf_data.c +++ b/sysdeps/aarch64/fpu/erf_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erf. +/* Lookup table used in vector erf. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = 6.0 (769 values): - - the first entry __erff_data.tab.erf contains the values of erf(r), - - the second entry __erff_data.tab.scale contains the values of + - the first entry __v_erff_data.tab.erf contains the values of erf(r), + - the second entry __v_erff_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct erf_data __erf_data = { +const struct v_erf_data __v_erf_data = { .tab = { { 0x0.0000000000000p+0, 0x1.20dd750429b6dp+0 }, { 0x1.20dbf3deb1340p-7, 0x1.20d8f1975c85dp+0 }, { 0x1.20d77083f17a0p-6, 0x1.20cb67bd452c7p+0 }, diff --git a/sysdeps/aarch64/fpu/erf_sve.c b/sysdeps/aarch64/fpu/erf_sve.c index 7d51417406..671d55a02b 100644 --- a/sysdeps/aarch64/fpu/erf_sve.c +++ b/sysdeps/aarch64/fpu/erf_sve.c @@ -67,14 +67,16 @@ svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg) svfloat64_t a = svabs_x (pg, x); svfloat64_t shift = sv_f64 (dat->shift); svfloat64_t z = svadd_x (pg, a, shift); - svuint64_t i - = svsub_x (pg, svreinterpret_u64 (z), svreinterpret_u64 (shift)); + svuint64_t i = svand_x (pg, svreinterpret_u64 (z), 0xfff); + i = svadd_x (pg, i, i); /* Lookup without shortcut for small values but with predicate to avoid segfault for large values and NaNs. */ svfloat64_t r = svsub_x (pg, z, shift); - svfloat64_t erfr = svld1_gather_index (a_lt_max, __sv_erf_data.erf, i); - svfloat64_t scale = svld1_gather_index (a_lt_max, __sv_erf_data.scale, i); + svfloat64_t erfr + = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].erf, i); + svfloat64_t scale + = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].scale, i); /* erf(x) ~ erf(r) + scale * d * poly (r, d). */ svfloat64_t d = svsub_x (pg, a, r); diff --git a/sysdeps/aarch64/fpu/erfc_advsimd.c b/sysdeps/aarch64/fpu/erfc_advsimd.c index 2f2f755c46..d05eac61a2 100644 --- a/sysdeps/aarch64/fpu/erfc_advsimd.c +++ b/sysdeps/aarch64/fpu/erfc_advsimd.c @@ -69,9 +69,9 @@ lookup (uint64x2_t i) { struct entry e; float64x2_t e1 - = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc); + = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc); float64x2_t e2 - = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc); + = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc); e.erfc = vuzp1q_f64 (e1, e2); e.scale = vuzp2q_f64 (e1, e2); return e; diff --git a/sysdeps/aarch64/fpu/erfc_data.c b/sysdeps/aarch64/fpu/erfc_data.c index 76a94e4681..8dc6a8c42c 100644 --- a/sysdeps/aarch64/fpu/erfc_data.c +++ b/sysdeps/aarch64/fpu/erfc_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erfc. +/* Lookup table used in vector erfc. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = ~27.0 (3488 values): - - the first entry __erfc_data.tab.erfc contains the values of erfc(r), - - the second entry __erfc_data.tab.scale contains the values of + - the first entry __v_erfc_data.tab.erfc contains the values of erfc(r), + - the second entry __v_erfc_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore they are scaled by a large enough value 2^128 (fits in 8bit). */ -const struct erfc_data __erfc_data = { +const struct v_erfc_data __v_erfc_data = { .tab = { { 0x1p128, 0x1.20dd750429b6dp128 }, { 0x1.fb7c9030853b3p127, 0x1.20d8f1975c85dp128 }, { 0x1.f6f9447be0743p127, 0x1.20cb67bd452c7p128 }, diff --git a/sysdeps/aarch64/fpu/erfc_sve.c b/sysdeps/aarch64/fpu/erfc_sve.c index c17d3e4484..703926ee41 100644 --- a/sysdeps/aarch64/fpu/erfc_sve.c +++ b/sysdeps/aarch64/fpu/erfc_sve.c @@ -104,7 +104,7 @@ svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg) /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */ i = svadd_x (pg, i, i); - const float64_t *p = &__erfc_data.tab[0].erfc - 2 * dat->off_arr; + const float64_t *p = &__v_erfc_data.tab[0].erfc - 2 * dat->off_arr; svfloat64_t erfcr = svld1_gather_index (pg, p, i); svfloat64_t scale = svld1_gather_index (pg, p + 1, i); diff --git a/sysdeps/aarch64/fpu/erfcf_advsimd.c b/sysdeps/aarch64/fpu/erfcf_advsimd.c index ca5bc3ab33..59b0b0d64b 100644 --- a/sysdeps/aarch64/fpu/erfcf_advsimd.c +++ b/sysdeps/aarch64/fpu/erfcf_advsimd.c @@ -62,13 +62,13 @@ lookup (uint32x4_t i) { struct entry e; float32x2_t t0 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc); float32x2_t t1 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc); float32x2_t t2 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc); float32x2_t t3 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc); float32x4_t e1 = vcombine_f32 (t0, t1); float32x4_t e2 = vcombine_f32 (t2, t3); e.erfc = vuzp1q_f32 (e1, e2); diff --git a/sysdeps/aarch64/fpu/erfcf_data.c b/sysdeps/aarch64/fpu/erfcf_data.c index 77fb889a78..d45087bbb9 100644 --- a/sysdeps/aarch64/fpu/erfcf_data.c +++ b/sysdeps/aarch64/fpu/erfcf_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erfcf. +/* Lookup table used in vector erfcf. For each possible rounded input r (multiples of 1/64), between r = 0.0 and r = 10.0625 (645 values): - - the first entry __erfcf_data.tab.erfc contains the values of erfc(r), - - the second entry __erfcf_data.tab.scale contains the values of + - the first entry __v_erfcf_data.tab.erfc contains the values of erfc(r), + - the second entry __v_erfcf_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore they are scaled by a large enough value 2^47 (fits in 8 bits). */ -const struct erfcf_data __erfcf_data = { +const struct v_erfcf_data __v_erfcf_data = { .tab = { { 0x1p47, 0x1.20dd76p47 }, { 0x1.f6f944p46, 0x1.20cb68p47 }, { 0x1.edf3aap46, 0x1.209546p47 }, diff --git a/sysdeps/aarch64/fpu/erfcf_sve.c b/sysdeps/aarch64/fpu/erfcf_sve.c index 48d1677eb4..ecacb933ac 100644 --- a/sysdeps/aarch64/fpu/erfcf_sve.c +++ b/sysdeps/aarch64/fpu/erfcf_sve.c @@ -77,7 +77,7 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg) /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */ i = svmul_x (pg, i, 2); - const float32_t *p = &__erfcf_data.tab[0].erfc - 2 * dat->off_arr; + const float32_t *p = &__v_erfcf_data.tab[0].erfc - 2 * dat->off_arr; svfloat32_t erfcr = svld1_gather_index (pg, p, i); svfloat32_t scale = svld1_gather_index (pg, p + 1, i); diff --git a/sysdeps/aarch64/fpu/erff_advsimd.c b/sysdeps/aarch64/fpu/erff_advsimd.c index f2fe6ff236..db39e789b6 100644 --- a/sysdeps/aarch64/fpu/erff_advsimd.c +++ b/sysdeps/aarch64/fpu/erff_advsimd.c @@ -47,10 +47,10 @@ static inline struct entry lookup (uint32x4_t i) { struct entry e; - float32x2_t t0 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 0)].erf); - float32x2_t t1 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 1)].erf); - float32x2_t t2 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 2)].erf); - float32x2_t t3 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 3)].erf); + float32x2_t t0 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 0)].erf); + float32x2_t t1 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 1)].erf); + float32x2_t t2 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 2)].erf); + float32x2_t t3 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 3)].erf); float32x4_t e1 = vcombine_f32 (t0, t1); float32x4_t e2 = vcombine_f32 (t2, t3); e.erf = vuzp1q_f32 (e1, e2); diff --git a/sysdeps/aarch64/fpu/erff_data.c b/sysdeps/aarch64/fpu/erff_data.c index 9a32940915..da38aed205 100644 --- a/sysdeps/aarch64/fpu/erff_data.c +++ b/sysdeps/aarch64/fpu/erff_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erff. +/* Lookup table used in vector erff. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = 4.0 (513 values): - - the first entry __erff_data.tab.erf contains the values of erf(r), - - the second entry __erff_data.tab.scale contains the values of + - the first entry __v_erff_data.tab.erf contains the values of erf(r), + - the second entry __v_erff_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct erff_data __erff_data = { +const struct v_erff_data __v_erff_data = { .tab = { { 0x0.000000p+0, 0x1.20dd76p+0 }, { 0x1.20dbf4p-7, 0x1.20d8f2p+0 }, { 0x1.20d770p-6, 0x1.20cb68p+0 }, diff --git a/sysdeps/aarch64/fpu/erff_sve.c b/sysdeps/aarch64/fpu/erff_sve.c index 38f00db9be..0e382eb09a 100644 --- a/sysdeps/aarch64/fpu/erff_sve.c +++ b/sysdeps/aarch64/fpu/erff_sve.c @@ -62,18 +62,17 @@ svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg) svfloat32_t shift = sv_f32 (dat->shift); svfloat32_t z = svadd_x (pg, a, shift); - svuint32_t i - = svsub_x (pg, svreinterpret_u32 (z), svreinterpret_u32 (shift)); - - /* Saturate lookup index. */ - i = svsel (a_ge_max, sv_u32 (512), i); + svuint32_t i = svand_x (pg, svreinterpret_u32 (z), 0xfff); + i = svadd_x (pg, i, i); /* r and erf(r) set to 0 for |x| below min. */ svfloat32_t r = svsub_z (a_gt_min, z, shift); - svfloat32_t erfr = svld1_gather_index (a_gt_min, __sv_erff_data.erf, i); + svfloat32_t erfr + = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].erf, i); /* scale set to 2/sqrt(pi) for |x| below min. */ - svfloat32_t scale = svld1_gather_index (a_gt_min, __sv_erff_data.scale, i); + svfloat32_t scale + = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].scale, i); scale = svsel (a_gt_min, scale, sv_f32 (dat->scale)); /* erf(x) ~ erf(r) + scale * d * (1 - r * d + 1/3 * d^2). */ diff --git a/sysdeps/aarch64/fpu/sv_erf_data.c b/sysdeps/aarch64/fpu/sv_erf_data.c deleted file mode 100644 index a53878f893..0000000000 --- a/sysdeps/aarch64/fpu/sv_erf_data.c +++ /dev/null @@ -1,1570 +0,0 @@ -/* Table for SVE erf approximation - - Copyright (C) 2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include "vecmath_config.h" - -/* Lookup table used in vector erf. - For each possible rounded input r (multiples of 1/128), between - r = 0.0 and r = 6.0 (769 values): - - the first entry __erf_data.tab.erf contains the values of erf(r), - - the second entry __erf_data.tab.scale contains the values of - 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the - algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct sv_erf_data __sv_erf_data = { - .erf = { 0x0.0000000000000p+0, - 0x1.20dbf3deb1340p-7, - 0x1.20d77083f17a0p-6, - 0x1.b137e0cf584dcp-6, - 0x1.20c5645dd2538p-5, - 0x1.68e5d3bbc9526p-5, - 0x1.b0fafef135745p-5, - 0x1.f902a77bd3821p-5, - 0x1.207d480e90658p-4, - 0x1.44703e87e8593p-4, - 0x1.68591a1e83b5dp-4, - 0x1.8c36beb8a8d23p-4, - 0x1.b0081148a873ap-4, - 0x1.d3cbf7e70a4b3p-4, - 0x1.f78159ec8bb50p-4, - 0x1.0d939005f65e5p-3, - 0x1.1f5e1a35c3b89p-3, - 0x1.311fc15f56d14p-3, - 0x1.42d7fc2f64959p-3, - 0x1.548642321d7c6p-3, - 0x1.662a0bdf7a89fp-3, - 0x1.77c2d2a765f9ep-3, - 0x1.895010fdbdbfdp-3, - 0x1.9ad142662e14dp-3, - 0x1.ac45e37fe2526p-3, - 0x1.bdad72110a648p-3, - 0x1.cf076d1233237p-3, - 0x1.e05354b96ff36p-3, - 0x1.f190aa85540e2p-3, - 0x1.015f78a3dcf3dp-2, - 0x1.09eed6982b948p-2, - 0x1.127631eb8de32p-2, - 0x1.1af54e232d609p-2, - 0x1.236bef825d9a2p-2, - 0x1.2bd9db0f7827fp-2, - 0x1.343ed6989b7d9p-2, - 0x1.3c9aa8b84bedap-2, - 0x1.44ed18d9f6462p-2, - 0x1.4d35ef3e5372ep-2, - 0x1.5574f4ffac98ep-2, - 0x1.5da9f415ff23fp-2, - 0x1.65d4b75b00471p-2, - 0x1.6df50a8dff772p-2, - 0x1.760aba57a76bfp-2, - 0x1.7e15944d9d3e4p-2, - 0x1.861566f5fd3c0p-2, - 0x1.8e0a01cab516bp-2, - 0x1.95f3353cbb146p-2, - 0x1.9dd0d2b721f39p-2, - 0x1.a5a2aca209394p-2, - 0x1.ad68966569a87p-2, - 0x1.b522646bbda68p-2, - 0x1.bccfec24855b8p-2, - 0x1.c4710406a65fcp-2, - 0x1.cc058392a6d2dp-2, - 0x1.d38d4354c3bd0p-2, - 0x1.db081ce6e2a48p-2, - 0x1.e275eaf25e458p-2, - 0x1.e9d68931ae650p-2, - 0x1.f129d471eabb1p-2, - 0x1.f86faa9428f9dp-2, - 0x1.ffa7ea8eb5fd0p-2, - 0x1.03693a371519cp-1, - 0x1.06f794ab2cae7p-1, - 0x1.0a7ef5c18edd2p-1, - 0x1.0dff4f247f6c6p-1, - 0x1.1178930ada115p-1, - 0x1.14eab43841b55p-1, - 0x1.1855a5fd3dd50p-1, - 0x1.1bb95c3746199p-1, - 0x1.1f15cb50bc4dep-1, - 0x1.226ae840d4d70p-1, - 0x1.25b8a88b6dd7fp-1, - 0x1.28ff0240d52cdp-1, - 0x1.2c3debfd7d6c1p-1, - 0x1.2f755ce9a21f4p-1, - 0x1.32a54cb8db67bp-1, - 0x1.35cdb3a9a144dp-1, - 0x1.38ee8a84beb71p-1, - 0x1.3c07ca9cb4f9ep-1, - 0x1.3f196dcd0f135p-1, - 0x1.42236e79a5fa6p-1, - 0x1.4525c78dd5966p-1, - 0x1.4820747ba2dc2p-1, - 0x1.4b13713ad3513p-1, - 0x1.4dfeba47f63ccp-1, - 0x1.50e24ca35fd2cp-1, - 0x1.53be25d016a4fp-1, - 0x1.569243d2b3a9bp-1, - 0x1.595ea53035283p-1, - 0x1.5c2348ecc4dc3p-1, - 0x1.5ee02e8a71a53p-1, - 0x1.61955607dd15dp-1, - 0x1.6442bfdedd397p-1, - 0x1.66e86d0312e82p-1, - 0x1.69865ee075011p-1, - 0x1.6c1c9759d0e5fp-1, - 0x1.6eab18c74091bp-1, - 0x1.7131e5f496a5ap-1, - 0x1.73b1021fc0cb8p-1, - 0x1.762870f720c6fp-1, - 0x1.78983697dc96fp-1, - 0x1.7b00578c26037p-1, - 0x1.7d60d8c979f7bp-1, - 0x1.7fb9bfaed8078p-1, - 0x1.820b1202f27fbp-1, - 0x1.8454d5f25760dp-1, - 0x1.8697120d92a4ap-1, - 0x1.88d1cd474a2e0p-1, - 0x1.8b050ef253c37p-1, - 0x1.8d30debfc572ep-1, - 0x1.8f5544bd00c04p-1, - 0x1.91724951b8fc6p-1, - 0x1.9387f53df5238p-1, - 0x1.959651980da31p-1, - 0x1.979d67caa6631p-1, - 0x1.999d4192a5715p-1, - 0x1.9b95e8fd26abap-1, - 0x1.9d8768656cc42p-1, - 0x1.9f71ca72cffb6p-1, - 0x1.a1551a16aaeafp-1, - 0x1.a331628a45b92p-1, - 0x1.a506af4cc00f4p-1, - 0x1.a6d50c20fa293p-1, - 0x1.a89c850b7d54dp-1, - 0x1.aa5d265064366p-1, - 0x1.ac16fc7143263p-1, - 0x1.adca142b10f98p-1, - 0x1.af767a741088bp-1, - 0x1.b11c3c79bb424p-1, - 0x1.b2bb679ead19cp-1, - 0x1.b4540978921eep-1, - 0x1.b5e62fce16095p-1, - 0x1.b771e894d602ep-1, - 0x1.b8f741ef54f83p-1, - 0x1.ba764a2af2b78p-1, - 0x1.bbef0fbde6221p-1, - 0x1.bd61a1453ab44p-1, - 0x1.bece0d82d1a5cp-1, - 0x1.c034635b66e23p-1, - 0x1.c194b1d49a184p-1, - 0x1.c2ef0812fc1bdp-1, - 0x1.c443755820d64p-1, - 0x1.c5920900b5fd1p-1, - 0x1.c6dad2829ec62p-1, - 0x1.c81de16b14cefp-1, - 0x1.c95b455cce69dp-1, - 0x1.ca930e0e2a825p-1, - 0x1.cbc54b476248dp-1, - 0x1.ccf20ce0c0d27p-1, - 0x1.ce1962c0e0d8bp-1, - 0x1.cf3b5cdaf0c39p-1, - 0x1.d0580b2cfd249p-1, - 0x1.d16f7dbe41ca0p-1, - 0x1.d281c49d818d0p-1, - 0x1.d38eefdf64fddp-1, - 0x1.d4970f9ce00d9p-1, - 0x1.d59a33f19ed42p-1, - 0x1.d6986cfa798e7p-1, - 0x1.d791cad3eff01p-1, - 0x1.d8865d98abe01p-1, - 0x1.d97635600bb89p-1, - 0x1.da61623cb41e0p-1, - 0x1.db47f43b2980dp-1, - 0x1.dc29fb60715afp-1, - 0x1.dd0787a8bb39dp-1, - 0x1.dde0a90611a0dp-1, - 0x1.deb56f5f12d28p-1, - 0x1.df85ea8db188ep-1, - 0x1.e0522a5dfda73p-1, - 0x1.e11a3e8cf4eb8p-1, - 0x1.e1de36c75ba58p-1, - 0x1.e29e22a89d766p-1, - 0x1.e35a11b9b61cep-1, - 0x1.e4121370224ccp-1, - 0x1.e4c6372cd8927p-1, - 0x1.e5768c3b4a3fcp-1, - 0x1.e62321d06c5e0p-1, - 0x1.e6cc0709c8a0dp-1, - 0x1.e7714aec96534p-1, - 0x1.e812fc64db369p-1, - 0x1.e8b12a44944a8p-1, - 0x1.e94be342e6743p-1, - 0x1.e9e335fb56f87p-1, - 0x1.ea7730ed0bbb9p-1, - 0x1.eb07e27a133aap-1, - 0x1.eb9558e6b42cep-1, - 0x1.ec1fa258c4beap-1, - 0x1.eca6ccd709544p-1, - 0x1.ed2ae6489ac1ep-1, - 0x1.edabfc7453e63p-1, - 0x1.ee2a1d004692cp-1, - 0x1.eea5557137ae0p-1, - 0x1.ef1db32a2277cp-1, - 0x1.ef93436bc2daap-1, - 0x1.f006135426b26p-1, - 0x1.f0762fde45ee6p-1, - 0x1.f0e3a5e1a1788p-1, - 0x1.f14e8211e8c55p-1, - 0x1.f1b6d0fea5f4dp-1, - 0x1.f21c9f12f0677p-1, - 0x1.f27ff89525acfp-1, - 0x1.f2e0e9a6a8b09p-1, - 0x1.f33f7e43a706bp-1, - 0x1.f39bc242e43e6p-1, - 0x1.f3f5c1558b19ep-1, - 0x1.f44d870704911p-1, - 0x1.f4a31ebcd47dfp-1, - 0x1.f4f693b67bd77p-1, - 0x1.f547f10d60597p-1, - 0x1.f59741b4b97cfp-1, - 0x1.f5e4907982a07p-1, - 0x1.f62fe80272419p-1, - 0x1.f67952cff6282p-1, - 0x1.f6c0db3c34641p-1, - 0x1.f7068b7b10fd9p-1, - 0x1.f74a6d9a38383p-1, - 0x1.f78c8b812d498p-1, - 0x1.f7cceef15d631p-1, - 0x1.f80ba18636f07p-1, - 0x1.f848acb544e95p-1, - 0x1.f88419ce4e184p-1, - 0x1.f8bdf1fb78370p-1, - 0x1.f8f63e416ebffp-1, - 0x1.f92d077f8d56dp-1, - 0x1.f96256700da8ep-1, - 0x1.f99633a838a57p-1, - 0x1.f9c8a7989af0dp-1, - 0x1.f9f9ba8d3c733p-1, - 0x1.fa2974addae45p-1, - 0x1.fa57ddfe27376p-1, - 0x1.fa84fe5e05c8dp-1, - 0x1.fab0dd89d1309p-1, - 0x1.fadb831a9f9c3p-1, - 0x1.fb04f6868a944p-1, - 0x1.fb2d3f20f9101p-1, - 0x1.fb54641aebbc9p-1, - 0x1.fb7a6c834b5a2p-1, - 0x1.fb9f5f4739170p-1, - 0x1.fbc3433260ca5p-1, - 0x1.fbe61eef4cf6ap-1, - 0x1.fc07f907bc794p-1, - 0x1.fc28d7e4f9cd0p-1, - 0x1.fc48c1d033c7ap-1, - 0x1.fc67bcf2d7b8fp-1, - 0x1.fc85cf56ecd38p-1, - 0x1.fca2fee770c79p-1, - 0x1.fcbf5170b578bp-1, - 0x1.fcdacca0bfb73p-1, - 0x1.fcf57607a6e7cp-1, - 0x1.fd0f5317f582fp-1, - 0x1.fd2869270a56fp-1, - 0x1.fd40bd6d7a785p-1, - 0x1.fd58550773cb5p-1, - 0x1.fd6f34f52013ap-1, - 0x1.fd85621b0876dp-1, - 0x1.fd9ae142795e3p-1, - 0x1.fdafb719e6a69p-1, - 0x1.fdc3e835500b3p-1, - 0x1.fdd7790ea5bc0p-1, - 0x1.fdea6e062d0c9p-1, - 0x1.fdfccb62e52d3p-1, - 0x1.fe0e9552ebdd6p-1, - 0x1.fe1fcfebe2083p-1, - 0x1.fe307f2b503d0p-1, - 0x1.fe40a6f70af4bp-1, - 0x1.fe504b1d9696cp-1, - 0x1.fe5f6f568b301p-1, - 0x1.fe6e1742f7cf6p-1, - 0x1.fe7c466dc57a1p-1, - 0x1.fe8a004c19ae6p-1, - 0x1.fe97483db8670p-1, - 0x1.fea4218d6594ap-1, - 0x1.feb08f7146046p-1, - 0x1.febc950b3fa75p-1, - 0x1.fec835695932ep-1, - 0x1.fed37386190fbp-1, - 0x1.fede5248e38f4p-1, - 0x1.fee8d486585eep-1, - 0x1.fef2fd00af31ap-1, - 0x1.fefcce6813974p-1, - 0x1.ff064b5afffbep-1, - 0x1.ff0f766697c76p-1, - 0x1.ff18520700971p-1, - 0x1.ff20e0a7ba8c2p-1, - 0x1.ff2924a3f7a83p-1, - 0x1.ff312046f2339p-1, - 0x1.ff38d5cc4227fp-1, - 0x1.ff404760319b4p-1, - 0x1.ff47772010262p-1, - 0x1.ff4e671a85425p-1, - 0x1.ff55194fe19dfp-1, - 0x1.ff5b8fb26f5f6p-1, - 0x1.ff61cc26c1578p-1, - 0x1.ff67d08401202p-1, - 0x1.ff6d9e943c231p-1, - 0x1.ff733814af88cp-1, - 0x1.ff789eb6130c9p-1, - 0x1.ff7dd41ce2b4dp-1, - 0x1.ff82d9e1a76d8p-1, - 0x1.ff87b1913e853p-1, - 0x1.ff8c5cad200a5p-1, - 0x1.ff90dcaba4096p-1, - 0x1.ff9532f846ab0p-1, - 0x1.ff9960f3eb327p-1, - 0x1.ff9d67f51ddbap-1, - 0x1.ffa14948549a7p-1, - 0x1.ffa506302ebaep-1, - 0x1.ffa89fe5b3625p-1, - 0x1.ffac17988ef4bp-1, - 0x1.ffaf6e6f4f5c0p-1, - 0x1.ffb2a5879f35ep-1, - 0x1.ffb5bdf67fe6fp-1, - 0x1.ffb8b8c88295fp-1, - 0x1.ffbb970200110p-1, - 0x1.ffbe599f4f9d9p-1, - 0x1.ffc10194fcb64p-1, - 0x1.ffc38fcffbb7cp-1, - 0x1.ffc60535dd7f5p-1, - 0x1.ffc862a501fd7p-1, - 0x1.ffcaa8f4c9beap-1, - 0x1.ffccd8f5c66d1p-1, - 0x1.ffcef371ea4d7p-1, - 0x1.ffd0f92cb6ba7p-1, - 0x1.ffd2eae369a07p-1, - 0x1.ffd4c94d29fdbp-1, - 0x1.ffd6951b33686p-1, - 0x1.ffd84ef9009eep-1, - 0x1.ffd9f78c7524ap-1, - 0x1.ffdb8f7605ee7p-1, - 0x1.ffdd1750e1220p-1, - 0x1.ffde8fb314ebfp-1, - 0x1.ffdff92db56e5p-1, - 0x1.ffe1544d01ccbp-1, - 0x1.ffe2a1988857cp-1, - 0x1.ffe3e19349dc7p-1, - 0x1.ffe514bbdc197p-1, - 0x1.ffe63b8c8b5f7p-1, - 0x1.ffe7567b7b5e1p-1, - 0x1.ffe865fac722bp-1, - 0x1.ffe96a78a04a9p-1, - 0x1.ffea645f6d6dap-1, - 0x1.ffeb5415e7c44p-1, - 0x1.ffec39ff380b9p-1, - 0x1.ffed167b12ac2p-1, - 0x1.ffede9e5d3262p-1, - 0x1.ffeeb49896c6dp-1, - 0x1.ffef76e956a9fp-1, - 0x1.fff0312b010b5p-1, - 0x1.fff0e3ad91ec2p-1, - 0x1.fff18ebe2b0e1p-1, - 0x1.fff232a72b48ep-1, - 0x1.fff2cfb0453d9p-1, - 0x1.fff3661e9569dp-1, - 0x1.fff3f634b79f9p-1, - 0x1.fff48032dbe40p-1, - 0x1.fff50456dab8cp-1, - 0x1.fff582dc48d30p-1, - 0x1.fff5fbfc8a439p-1, - 0x1.fff66feee5129p-1, - 0x1.fff6dee89352ep-1, - 0x1.fff7491cd4af6p-1, - 0x1.fff7aebcff755p-1, - 0x1.fff80ff8911fdp-1, - 0x1.fff86cfd3e657p-1, - 0x1.fff8c5f702ccfp-1, - 0x1.fff91b102fca8p-1, - 0x1.fff96c717b695p-1, - 0x1.fff9ba420e834p-1, - 0x1.fffa04a7928b1p-1, - 0x1.fffa4bc63ee9ap-1, - 0x1.fffa8fc0e5f33p-1, - 0x1.fffad0b901755p-1, - 0x1.fffb0ecebee1bp-1, - 0x1.fffb4a210b172p-1, - 0x1.fffb82cd9dcbfp-1, - 0x1.fffbb8f1049c6p-1, - 0x1.fffbeca6adbe9p-1, - 0x1.fffc1e08f25f5p-1, - 0x1.fffc4d3120aa1p-1, - 0x1.fffc7a37857d2p-1, - 0x1.fffca53375ce3p-1, - 0x1.fffcce3b57bffp-1, - 0x1.fffcf564ab6b7p-1, - 0x1.fffd1ac4135f9p-1, - 0x1.fffd3e6d5cd87p-1, - 0x1.fffd607387b07p-1, - 0x1.fffd80e8ce0dap-1, - 0x1.fffd9fdeabccep-1, - 0x1.fffdbd65e5ad0p-1, - 0x1.fffdd98e903b2p-1, - 0x1.fffdf46816833p-1, - 0x1.fffe0e0140857p-1, - 0x1.fffe26683972ap-1, - 0x1.fffe3daa95b18p-1, - 0x1.fffe53d558ae9p-1, - 0x1.fffe68f4fa777p-1, - 0x1.fffe7d156d244p-1, - 0x1.fffe904222101p-1, - 0x1.fffea2860ee1ep-1, - 0x1.fffeb3ebb267bp-1, - 0x1.fffec47d19457p-1, - 0x1.fffed443e2787p-1, - 0x1.fffee34943b15p-1, - 0x1.fffef1960d85dp-1, - 0x1.fffeff32af7afp-1, - 0x1.ffff0c273bea2p-1, - 0x1.ffff187b6bc0ep-1, - 0x1.ffff2436a21dcp-1, - 0x1.ffff2f5fefcaap-1, - 0x1.ffff39fe16963p-1, - 0x1.ffff44178c8d2p-1, - 0x1.ffff4db27f146p-1, - 0x1.ffff56d4d5e5ep-1, - 0x1.ffff5f8435efcp-1, - 0x1.ffff67c604180p-1, - 0x1.ffff6f9f67e55p-1, - 0x1.ffff77154e0d6p-1, - 0x1.ffff7e2c6aea2p-1, - 0x1.ffff84e93cd75p-1, - 0x1.ffff8b500e77cp-1, - 0x1.ffff9164f8e46p-1, - 0x1.ffff972be5c59p-1, - 0x1.ffff9ca891572p-1, - 0x1.ffffa1de8c582p-1, - 0x1.ffffa6d13de73p-1, - 0x1.ffffab83e54b8p-1, - 0x1.ffffaff99bac4p-1, - 0x1.ffffb43555b5fp-1, - 0x1.ffffb839e52f3p-1, - 0x1.ffffbc09fa7cdp-1, - 0x1.ffffbfa82616bp-1, - 0x1.ffffc316d9ed0p-1, - 0x1.ffffc6586abf6p-1, - 0x1.ffffc96f1165ep-1, - 0x1.ffffcc5cec0c1p-1, - 0x1.ffffcf23ff5fcp-1, - 0x1.ffffd1c637b2bp-1, - 0x1.ffffd4456a10dp-1, - 0x1.ffffd6a3554a1p-1, - 0x1.ffffd8e1a2f22p-1, - 0x1.ffffdb01e8546p-1, - 0x1.ffffdd05a75eap-1, - 0x1.ffffdeee4f810p-1, - 0x1.ffffe0bd3e852p-1, - 0x1.ffffe273c15b7p-1, - 0x1.ffffe41314e06p-1, - 0x1.ffffe59c6698bp-1, - 0x1.ffffe710d565ep-1, - 0x1.ffffe8717232dp-1, - 0x1.ffffe9bf4098cp-1, - 0x1.ffffeafb377d5p-1, - 0x1.ffffec2641a9ep-1, - 0x1.ffffed413e5b7p-1, - 0x1.ffffee4d01cd6p-1, - 0x1.ffffef4a55bd4p-1, - 0x1.fffff039f9e8fp-1, - 0x1.fffff11ca4876p-1, - 0x1.fffff1f302bc1p-1, - 0x1.fffff2bdb904dp-1, - 0x1.fffff37d63a36p-1, - 0x1.fffff43297019p-1, - 0x1.fffff4dde0118p-1, - 0x1.fffff57fc4a95p-1, - 0x1.fffff618c3da6p-1, - 0x1.fffff6a956450p-1, - 0x1.fffff731ee681p-1, - 0x1.fffff7b2f8ed6p-1, - 0x1.fffff82cdcf1bp-1, - 0x1.fffff89ffc4aap-1, - 0x1.fffff90cb3c81p-1, - 0x1.fffff9735b73bp-1, - 0x1.fffff9d446cccp-1, - 0x1.fffffa2fc5015p-1, - 0x1.fffffa8621251p-1, - 0x1.fffffad7a2652p-1, - 0x1.fffffb248c39dp-1, - 0x1.fffffb6d1e95dp-1, - 0x1.fffffbb196132p-1, - 0x1.fffffbf22c1e2p-1, - 0x1.fffffc2f171e3p-1, - 0x1.fffffc688a9cfp-1, - 0x1.fffffc9eb76acp-1, - 0x1.fffffcd1cbc28p-1, - 0x1.fffffd01f36afp-1, - 0x1.fffffd2f57d68p-1, - 0x1.fffffd5a2041fp-1, - 0x1.fffffd8271d12p-1, - 0x1.fffffda86faa9p-1, - 0x1.fffffdcc3b117p-1, - 0x1.fffffdedf37edp-1, - 0x1.fffffe0db6b91p-1, - 0x1.fffffe2ba0ea5p-1, - 0x1.fffffe47ccb60p-1, - 0x1.fffffe62534d4p-1, - 0x1.fffffe7b4c81ep-1, - 0x1.fffffe92ced93p-1, - 0x1.fffffea8ef9cfp-1, - 0x1.fffffebdc2ec6p-1, - 0x1.fffffed15bcbap-1, - 0x1.fffffee3cc32cp-1, - 0x1.fffffef5251c2p-1, - 0x1.ffffff0576917p-1, - 0x1.ffffff14cfb92p-1, - 0x1.ffffff233ee1dp-1, - 0x1.ffffff30d18e8p-1, - 0x1.ffffff3d9480fp-1, - 0x1.ffffff4993c46p-1, - 0x1.ffffff54dab72p-1, - 0x1.ffffff5f74141p-1, - 0x1.ffffff6969fb8p-1, - 0x1.ffffff72c5fb6p-1, - 0x1.ffffff7b91176p-1, - 0x1.ffffff83d3d07p-1, - 0x1.ffffff8b962bep-1, - 0x1.ffffff92dfba2p-1, - 0x1.ffffff99b79d2p-1, - 0x1.ffffffa0248e8p-1, - 0x1.ffffffa62ce54p-1, - 0x1.ffffffabd69b4p-1, - 0x1.ffffffb127525p-1, - 0x1.ffffffb624592p-1, - 0x1.ffffffbad2affp-1, - 0x1.ffffffbf370cdp-1, - 0x1.ffffffc355dfdp-1, - 0x1.ffffffc733572p-1, - 0x1.ffffffcad3626p-1, - 0x1.ffffffce39b67p-1, - 0x1.ffffffd169d0cp-1, - 0x1.ffffffd466fa5p-1, - 0x1.ffffffd7344aap-1, - 0x1.ffffffd9d4aabp-1, - 0x1.ffffffdc4ad7ap-1, - 0x1.ffffffde9964ep-1, - 0x1.ffffffe0c2bf0p-1, - 0x1.ffffffe2c92dbp-1, - 0x1.ffffffe4aed5ep-1, - 0x1.ffffffe675bbdp-1, - 0x1.ffffffe81fc4ep-1, - 0x1.ffffffe9aeb97p-1, - 0x1.ffffffeb24467p-1, - 0x1.ffffffec81ff2p-1, - 0x1.ffffffedc95e7p-1, - 0x1.ffffffeefbc85p-1, - 0x1.fffffff01a8b6p-1, - 0x1.fffffff126e1ep-1, - 0x1.fffffff221f30p-1, - 0x1.fffffff30cd3fp-1, - 0x1.fffffff3e8892p-1, - 0x1.fffffff4b606fp-1, - 0x1.fffffff57632dp-1, - 0x1.fffffff629e44p-1, - 0x1.fffffff6d1e56p-1, - 0x1.fffffff76ef3fp-1, - 0x1.fffffff801c1fp-1, - 0x1.fffffff88af67p-1, - 0x1.fffffff90b2e3p-1, - 0x1.fffffff982fc1p-1, - 0x1.fffffff9f2e9fp-1, - 0x1.fffffffa5b790p-1, - 0x1.fffffffabd229p-1, - 0x1.fffffffb18582p-1, - 0x1.fffffffb6d844p-1, - 0x1.fffffffbbd0aap-1, - 0x1.fffffffc0748fp-1, - 0x1.fffffffc4c96cp-1, - 0x1.fffffffc8d462p-1, - 0x1.fffffffcc9a41p-1, - 0x1.fffffffd01f89p-1, - 0x1.fffffffd36871p-1, - 0x1.fffffffd678edp-1, - 0x1.fffffffd954aep-1, - 0x1.fffffffdbff2ap-1, - 0x1.fffffffde7ba0p-1, - 0x1.fffffffe0cd16p-1, - 0x1.fffffffe2f664p-1, - 0x1.fffffffe4fa30p-1, - 0x1.fffffffe6daf7p-1, - 0x1.fffffffe89b0cp-1, - 0x1.fffffffea3c9ap-1, - 0x1.fffffffebc1a9p-1, - 0x1.fffffffed2c21p-1, - 0x1.fffffffee7dc8p-1, - 0x1.fffffffefb847p-1, - 0x1.ffffffff0dd2bp-1, - 0x1.ffffffff1ede9p-1, - 0x1.ffffffff2ebdap-1, - 0x1.ffffffff3d843p-1, - 0x1.ffffffff4b453p-1, - 0x1.ffffffff58126p-1, - 0x1.ffffffff63fc3p-1, - 0x1.ffffffff6f121p-1, - 0x1.ffffffff79626p-1, - 0x1.ffffffff82fabp-1, - 0x1.ffffffff8be77p-1, - 0x1.ffffffff94346p-1, - 0x1.ffffffff9bec8p-1, - 0x1.ffffffffa319fp-1, - 0x1.ffffffffa9c63p-1, - 0x1.ffffffffaffa4p-1, - 0x1.ffffffffb5be5p-1, - 0x1.ffffffffbb1a2p-1, - 0x1.ffffffffc014ep-1, - 0x1.ffffffffc4b56p-1, - 0x1.ffffffffc901cp-1, - 0x1.ffffffffccfffp-1, - 0x1.ffffffffd0b56p-1, - 0x1.ffffffffd4271p-1, - 0x1.ffffffffd759dp-1, - 0x1.ffffffffda520p-1, - 0x1.ffffffffdd13cp-1, - 0x1.ffffffffdfa2dp-1, - 0x1.ffffffffe202dp-1, - 0x1.ffffffffe4371p-1, - 0x1.ffffffffe642ap-1, - 0x1.ffffffffe8286p-1, - 0x1.ffffffffe9eb0p-1, - 0x1.ffffffffeb8d0p-1, - 0x1.ffffffffed10ap-1, - 0x1.ffffffffee782p-1, - 0x1.ffffffffefc57p-1, - 0x1.fffffffff0fa7p-1, - 0x1.fffffffff218fp-1, - 0x1.fffffffff3227p-1, - 0x1.fffffffff4188p-1, - 0x1.fffffffff4fc9p-1, - 0x1.fffffffff5cfdp-1, - 0x1.fffffffff6939p-1, - 0x1.fffffffff748ep-1, - 0x1.fffffffff7f0dp-1, - 0x1.fffffffff88c5p-1, - 0x1.fffffffff91c6p-1, - 0x1.fffffffff9a1bp-1, - 0x1.fffffffffa1d2p-1, - 0x1.fffffffffa8f6p-1, - 0x1.fffffffffaf92p-1, - 0x1.fffffffffb5b0p-1, - 0x1.fffffffffbb58p-1, - 0x1.fffffffffc095p-1, - 0x1.fffffffffc56dp-1, - 0x1.fffffffffc9e8p-1, - 0x1.fffffffffce0dp-1, - 0x1.fffffffffd1e1p-1, - 0x1.fffffffffd56cp-1, - 0x1.fffffffffd8b3p-1, - 0x1.fffffffffdbbap-1, - 0x1.fffffffffde86p-1, - 0x1.fffffffffe11dp-1, - 0x1.fffffffffe380p-1, - 0x1.fffffffffe5b6p-1, - 0x1.fffffffffe7c0p-1, - 0x1.fffffffffe9a2p-1, - 0x1.fffffffffeb60p-1, - 0x1.fffffffffecfbp-1, - 0x1.fffffffffee77p-1, - 0x1.fffffffffefd6p-1, - 0x1.ffffffffff11ap-1, - 0x1.ffffffffff245p-1, - 0x1.ffffffffff359p-1, - 0x1.ffffffffff457p-1, - 0x1.ffffffffff542p-1, - 0x1.ffffffffff61bp-1, - 0x1.ffffffffff6e3p-1, - 0x1.ffffffffff79bp-1, - 0x1.ffffffffff845p-1, - 0x1.ffffffffff8e2p-1, - 0x1.ffffffffff973p-1, - 0x1.ffffffffff9f8p-1, - 0x1.ffffffffffa73p-1, - 0x1.ffffffffffae4p-1, - 0x1.ffffffffffb4cp-1, - 0x1.ffffffffffbadp-1, - 0x1.ffffffffffc05p-1, - 0x1.ffffffffffc57p-1, - 0x1.ffffffffffca2p-1, - 0x1.ffffffffffce7p-1, - 0x1.ffffffffffd27p-1, - 0x1.ffffffffffd62p-1, - 0x1.ffffffffffd98p-1, - 0x1.ffffffffffdcap-1, - 0x1.ffffffffffdf8p-1, - 0x1.ffffffffffe22p-1, - 0x1.ffffffffffe49p-1, - 0x1.ffffffffffe6cp-1, - 0x1.ffffffffffe8dp-1, - 0x1.ffffffffffeabp-1, - 0x1.ffffffffffec7p-1, - 0x1.ffffffffffee1p-1, - 0x1.ffffffffffef8p-1, - 0x1.fffffffffff0ep-1, - 0x1.fffffffffff22p-1, - 0x1.fffffffffff34p-1, - 0x1.fffffffffff45p-1, - 0x1.fffffffffff54p-1, - 0x1.fffffffffff62p-1, - 0x1.fffffffffff6fp-1, - 0x1.fffffffffff7bp-1, - 0x1.fffffffffff86p-1, - 0x1.fffffffffff90p-1, - 0x1.fffffffffff9ap-1, - 0x1.fffffffffffa2p-1, - 0x1.fffffffffffaap-1, - 0x1.fffffffffffb1p-1, - 0x1.fffffffffffb8p-1, - 0x1.fffffffffffbep-1, - 0x1.fffffffffffc3p-1, - 0x1.fffffffffffc8p-1, - 0x1.fffffffffffcdp-1, - 0x1.fffffffffffd1p-1, - 0x1.fffffffffffd5p-1, - 0x1.fffffffffffd9p-1, - 0x1.fffffffffffdcp-1, - 0x1.fffffffffffdfp-1, - 0x1.fffffffffffe2p-1, - 0x1.fffffffffffe4p-1, - 0x1.fffffffffffe7p-1, - 0x1.fffffffffffe9p-1, - 0x1.fffffffffffebp-1, - 0x1.fffffffffffedp-1, - 0x1.fffffffffffeep-1, - 0x1.ffffffffffff0p-1, - 0x1.ffffffffffff1p-1, - 0x1.ffffffffffff3p-1, - 0x1.ffffffffffff4p-1, - 0x1.ffffffffffff5p-1, - 0x1.ffffffffffff6p-1, - 0x1.ffffffffffff7p-1, - 0x1.ffffffffffff7p-1, - 0x1.ffffffffffff8p-1, - 0x1.ffffffffffff9p-1, - 0x1.ffffffffffff9p-1, - 0x1.ffffffffffffap-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffcp-1, - 0x1.ffffffffffffcp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - }, - .scale = { 0x1.20dd750429b6dp+0, - 0x1.20d8f1975c85dp+0, - 0x1.20cb67bd452c7p+0, - 0x1.20b4d8bac36c1p+0, - 0x1.209546ad13ccfp+0, - 0x1.206cb4897b148p+0, - 0x1.203b261cd0052p+0, - 0x1.2000a00ae3804p+0, - 0x1.1fbd27cdc72d3p+0, - 0x1.1f70c3b4f2cc7p+0, - 0x1.1f1b7ae44867fp+0, - 0x1.1ebd5552f795bp+0, - 0x1.1e565bca400d4p+0, - 0x1.1de697e413d28p+0, - 0x1.1d6e14099944ap+0, - 0x1.1cecdb718d61cp+0, - 0x1.1c62fa1e869b6p+0, - 0x1.1bd07cdd189acp+0, - 0x1.1b357141d95d5p+0, - 0x1.1a91e5a748165p+0, - 0x1.19e5e92b964abp+0, - 0x1.19318bae53a04p+0, - 0x1.1874ddcdfce24p+0, - 0x1.17aff0e56ec10p+0, - 0x1.16e2d7093cd8cp+0, - 0x1.160da304ed92fp+0, - 0x1.153068581b781p+0, - 0x1.144b3b337c90cp+0, - 0x1.135e3075d076bp+0, - 0x1.12695da8b5bdep+0, - 0x1.116cd8fd67618p+0, - 0x1.1068b94962e5ep+0, - 0x1.0f5d1602f7e41p+0, - 0x1.0e4a073dc1b91p+0, - 0x1.0d2fa5a70c168p+0, - 0x1.0c0e0a8223359p+0, - 0x1.0ae54fa490722p+0, - 0x1.09b58f724416bp+0, - 0x1.087ee4d9ad247p+0, - 0x1.07416b4fbfe7cp+0, - 0x1.05fd3ecbec297p+0, - 0x1.04b27bc403d30p+0, - 0x1.03613f2812dafp+0, - 0x1.0209a65e29545p+0, - 0x1.00abcf3e187a9p+0, - 0x1.fe8fb01a47307p-1, - 0x1.fbbbbef34b4b2p-1, - 0x1.f8dc092d58ff8p-1, - 0x1.f5f0cdaf15313p-1, - 0x1.f2fa4c16c0019p-1, - 0x1.eff8c4b1375dbp-1, - 0x1.ecec7870ebca7p-1, - 0x1.e9d5a8e4c934ep-1, - 0x1.e6b4982f158b9p-1, - 0x1.e38988fc46e72p-1, - 0x1.e054be79d3042p-1, - 0x1.dd167c4cf9d2ap-1, - 0x1.d9cf06898cdafp-1, - 0x1.d67ea1a8b5368p-1, - 0x1.d325927fb9d89p-1, - 0x1.cfc41e36c7df9p-1, - 0x1.cc5a8a3fbea40p-1, - 0x1.c8e91c4d01368p-1, - 0x1.c5701a484ef9dp-1, - 0x1.c1efca49a5011p-1, - 0x1.be68728e29d5dp-1, - 0x1.bada596f25436p-1, - 0x1.b745c55905bf8p-1, - 0x1.b3aafcc27502ep-1, - 0x1.b00a46237d5bep-1, - 0x1.ac63e7ecc1411p-1, - 0x1.a8b8287ec6a09p-1, - 0x1.a5074e2157620p-1, - 0x1.a1519efaf889ep-1, - 0x1.9d97610879642p-1, - 0x1.99d8da149c13fp-1, - 0x1.96164fafd8de3p-1, - 0x1.925007283d7aap-1, - 0x1.8e86458169af8p-1, - 0x1.8ab94f6caa71dp-1, - 0x1.86e9694134b9ep-1, - 0x1.8316d6f48133dp-1, - 0x1.7f41dc12c9e89p-1, - 0x1.7b6abbb7aaf19p-1, - 0x1.7791b886e7403p-1, - 0x1.73b714a552763p-1, - 0x1.6fdb11b1e0c34p-1, - 0x1.6bfdf0beddaf5p-1, - 0x1.681ff24b4ab04p-1, - 0x1.6441563c665d4p-1, - 0x1.60625bd75d07bp-1, - 0x1.5c8341bb23767p-1, - 0x1.58a445da7c74cp-1, - 0x1.54c5a57629db0p-1, - 0x1.50e79d1749ac9p-1, - 0x1.4d0a6889dfd9fp-1, - 0x1.492e42d78d2c5p-1, - 0x1.4553664273d24p-1, - 0x1.417a0c4049fd0p-1, - 0x1.3da26d759aef5p-1, - 0x1.39ccc1b136d5ap-1, - 0x1.35f93fe7d1b3dp-1, - 0x1.32281e2fd1a92p-1, - 0x1.2e5991bd4cbfcp-1, - 0x1.2a8dcede3673bp-1, - 0x1.26c508f6bd0ffp-1, - 0x1.22ff727dd6f7bp-1, - 0x1.1f3d3cf9ffe5ap-1, - 0x1.1b7e98fe26217p-1, - 0x1.17c3b626c7a11p-1, - 0x1.140cc3173f007p-1, - 0x1.1059ed7740313p-1, - 0x1.0cab61f084b93p-1, - 0x1.09014c2ca74dap-1, - 0x1.055bd6d32e8d7p-1, - 0x1.01bb2b87c6968p-1, - 0x1.fc3ee5d1524b0p-2, - 0x1.f511a91a67d2ap-2, - 0x1.edeeee0959518p-2, - 0x1.e6d6ffaa65a25p-2, - 0x1.dfca26f5bbf88p-2, - 0x1.d8c8aace11e63p-2, - 0x1.d1d2cfff91594p-2, - 0x1.cae8d93f1d7b6p-2, - 0x1.c40b0729ed547p-2, - 0x1.bd3998457afdap-2, - 0x1.b674c8ffc6283p-2, - 0x1.afbcd3afe8ab6p-2, - 0x1.a911f096fbc26p-2, - 0x1.a27455e14c93cp-2, - 0x1.9be437a7de946p-2, - 0x1.9561c7f23a47bp-2, - 0x1.8eed36b886d93p-2, - 0x1.8886b1e5ecfd1p-2, - 0x1.822e655b417e6p-2, - 0x1.7be47af1f5d89p-2, - 0x1.75a91a7f4d2edp-2, - 0x1.6f7c69d7d3ef8p-2, - 0x1.695e8cd31867ep-2, - 0x1.634fa54fa285fp-2, - 0x1.5d4fd33729015p-2, - 0x1.575f3483021c3p-2, - 0x1.517de540ce2a3p-2, - 0x1.4babff975a04cp-2, - 0x1.45e99bcbb7915p-2, - 0x1.4036d0468a7a2p-2, - 0x1.3a93b1998736cp-2, - 0x1.35005285227f1p-2, - 0x1.2f7cc3fe6f423p-2, - 0x1.2a09153529381p-2, - 0x1.24a55399ea239p-2, - 0x1.1f518ae487dc8p-2, - 0x1.1a0dc51a9934dp-2, - 0x1.14da0a961fd14p-2, - 0x1.0fb6620c550afp-2, - 0x1.0aa2d09497f2bp-2, - 0x1.059f59af7a906p-2, - 0x1.00abff4dec7a3p-2, - 0x1.f79183b101c5bp-3, - 0x1.edeb406d9c824p-3, - 0x1.e4652fadcb6b2p-3, - 0x1.daff4969c0b04p-3, - 0x1.d1b982c501370p-3, - 0x1.c893ce1dcbef7p-3, - 0x1.bf8e1b1ca2279p-3, - 0x1.b6a856c3ed54fp-3, - 0x1.ade26b7fbed95p-3, - 0x1.a53c4135a6526p-3, - 0x1.9cb5bd549b111p-3, - 0x1.944ec2e4f5630p-3, - 0x1.8c07329874652p-3, - 0x1.83deeada4d25ap-3, - 0x1.7bd5c7df3fe9cp-3, - 0x1.73eba3b5b07b7p-3, - 0x1.6c205655be71fp-3, - 0x1.6473b5b15a7a1p-3, - 0x1.5ce595c455b0ap-3, - 0x1.5575c8a468361p-3, - 0x1.4e241e912c305p-3, - 0x1.46f066040a832p-3, - 0x1.3fda6bc016994p-3, - 0x1.38e1fae1d6a9dp-3, - 0x1.3206dceef5f87p-3, - 0x1.2b48d9e5dea1cp-3, - 0x1.24a7b84d38971p-3, - 0x1.1e233d434b813p-3, - 0x1.17bb2c8d41535p-3, - 0x1.116f48a6476ccp-3, - 0x1.0b3f52ce8c383p-3, - 0x1.052b0b1a174eap-3, - 0x1.fe6460fef4680p-4, - 0x1.f2a901ccafb37p-4, - 0x1.e723726b824a9p-4, - 0x1.dbd32ac4c99b0p-4, - 0x1.d0b7a0f921e7cp-4, - 0x1.c5d0497c09e74p-4, - 0x1.bb1c972f23e50p-4, - 0x1.b09bfb7d11a83p-4, - 0x1.a64de673e8837p-4, - 0x1.9c31c6df3b1b8p-4, - 0x1.92470a61b6965p-4, - 0x1.888d1d8e510a3p-4, - 0x1.7f036c0107294p-4, - 0x1.75a96077274bap-4, - 0x1.6c7e64e7281cbp-4, - 0x1.6381e2980956bp-4, - 0x1.5ab342383d177p-4, - 0x1.5211ebf41880bp-4, - 0x1.499d478bca735p-4, - 0x1.4154bc68d75c3p-4, - 0x1.3937b1b319259p-4, - 0x1.31458e6542847p-4, - 0x1.297db960e4f63p-4, - 0x1.21df9981f8e53p-4, - 0x1.1a6a95b1e786fp-4, - 0x1.131e14fa1625dp-4, - 0x1.0bf97e95f2a64p-4, - 0x1.04fc3a0481321p-4, - 0x1.fc4b5e32d6259p-5, - 0x1.eeea8c1b1db93p-5, - 0x1.e1d4cf1e2450ap-5, - 0x1.d508f9a1ea64ep-5, - 0x1.c885df3451a07p-5, - 0x1.bc4a54a84e834p-5, - 0x1.b055303221015p-5, - 0x1.a4a549829587ep-5, - 0x1.993979e14fffdp-5, - 0x1.8e109c4622913p-5, - 0x1.83298d717210ep-5, - 0x1.78832c03aa2b1p-5, - 0x1.6e1c5893c380bp-5, - 0x1.63f3f5c4de13bp-5, - 0x1.5a08e85af27e0p-5, - 0x1.505a174e9c929p-5, - 0x1.46e66be002240p-5, - 0x1.3dacd1a8d8ccdp-5, - 0x1.34ac36ad8dafep-5, - 0x1.2be38b6d92415p-5, - 0x1.2351c2f2d1449p-5, - 0x1.1af5d2e04f3f6p-5, - 0x1.12ceb37ff9bc3p-5, - 0x1.0adb5fcfa8c75p-5, - 0x1.031ad58d56279p-5, - 0x1.f7182a851bca2p-6, - 0x1.e85c449e377f2p-6, - 0x1.da0005e5f28dfp-6, - 0x1.cc0180af00a8bp-6, - 0x1.be5ecd2fcb5f9p-6, - 0x1.b1160991ff737p-6, - 0x1.a4255a00b9f03p-6, - 0x1.978ae8b55ce1bp-6, - 0x1.8b44e6031383ep-6, - 0x1.7f5188610ddc8p-6, - 0x1.73af0c737bb45p-6, - 0x1.685bb5134ef13p-6, - 0x1.5d55cb54cd53ap-6, - 0x1.529b9e8cf9a1ep-6, - 0x1.482b8455dc491p-6, - 0x1.3e03d891b37dep-6, - 0x1.3422fd6d12e2bp-6, - 0x1.2a875b5ffab56p-6, - 0x1.212f612dee7fbp-6, - 0x1.181983e5133ddp-6, - 0x1.0f443edc5ce49p-6, - 0x1.06ae13b0d3255p-6, - 0x1.fcab1483ea7fcp-7, - 0x1.ec72615a894c4p-7, - 0x1.dcaf3691fc448p-7, - 0x1.cd5ec93c12431p-7, - 0x1.be7e5ac24963bp-7, - 0x1.b00b38d6b3575p-7, - 0x1.a202bd6372dcep-7, - 0x1.94624e78e0fafp-7, - 0x1.87275e3a6869dp-7, - 0x1.7a4f6aca256cbp-7, - 0x1.6dd7fe3358230p-7, - 0x1.61beae53b72b7p-7, - 0x1.56011cc3b036dp-7, - 0x1.4a9cf6bda3f4cp-7, - 0x1.3f8ff5042a88ep-7, - 0x1.34d7dbc76d7e5p-7, - 0x1.2a727a89a3f14p-7, - 0x1.205dac02bd6b9p-7, - 0x1.1697560347b25p-7, - 0x1.0d1d69569b82dp-7, - 0x1.03ede1a45bfeep-7, - 0x1.f60d8aa2a88f2p-8, - 0x1.e4cc4abf7d065p-8, - 0x1.d4143a9dfe965p-8, - 0x1.c3e1a5f5c077cp-8, - 0x1.b430ecf4a83a8p-8, - 0x1.a4fe83fb9db25p-8, - 0x1.9646f35a76623p-8, - 0x1.8806d70b2fc36p-8, - 0x1.7a3ade6c8b3e4p-8, - 0x1.6cdfcbfc1e263p-8, - 0x1.5ff2750fe7820p-8, - 0x1.536fc18f7ce5cp-8, - 0x1.4754abacdf1dcp-8, - 0x1.3b9e3f9d06e3fp-8, - 0x1.30499b503957fp-8, - 0x1.2553ee2a336bfp-8, - 0x1.1aba78ba3af89p-8, - 0x1.107a8c7323a6ep-8, - 0x1.06918b6355624p-8, - 0x1.f9f9cfd9c3035p-9, - 0x1.e77448fb66bb9p-9, - 0x1.d58da68fd1170p-9, - 0x1.c4412bf4b8f0bp-9, - 0x1.b38a3af2e55b4p-9, - 0x1.a3645330550ffp-9, - 0x1.93cb11a30d765p-9, - 0x1.84ba3004a50d0p-9, - 0x1.762d84469c18fp-9, - 0x1.6821000795a03p-9, - 0x1.5a90b00981d93p-9, - 0x1.4d78bba8ca5fdp-9, - 0x1.40d564548fad7p-9, - 0x1.34a305080681fp-9, - 0x1.28de11c5031ebp-9, - 0x1.1d83170fbf6fbp-9, - 0x1.128eb96be8798p-9, - 0x1.07fdb4dafea5fp-9, - 0x1.fb99b8b8279e1p-10, - 0x1.e7f232d9e2630p-10, - 0x1.d4fed7195d7e8p-10, - 0x1.c2b9cf7f893bfp-10, - 0x1.b11d702b3deb1p-10, - 0x1.a024365f771bdp-10, - 0x1.8fc8c794b03b5p-10, - 0x1.8005f08d6f1efp-10, - 0x1.70d6a46e07ddap-10, - 0x1.6235fbd7a4345p-10, - 0x1.541f340697987p-10, - 0x1.468dadf4080abp-10, - 0x1.397ced7af2b15p-10, - 0x1.2ce898809244ep-10, - 0x1.20cc76202c5fap-10, - 0x1.15246dda49d47p-10, - 0x1.09ec86c75d497p-10, - 0x1.fe41cd9bb4eeep-11, - 0x1.e97ba3b77f306p-11, - 0x1.d57f524723822p-11, - 0x1.c245d4b998479p-11, - 0x1.afc85e0f82e12p-11, - 0x1.9e005769dbc1dp-11, - 0x1.8ce75e9f6f8a0p-11, - 0x1.7c7744d9378f7p-11, - 0x1.6caa0d3582fe9p-11, - 0x1.5d79eb71e893bp-11, - 0x1.4ee1429bf7cc0p-11, - 0x1.40daa3c89f5b6p-11, - 0x1.3360ccd23db3ap-11, - 0x1.266ea71d4f71ap-11, - 0x1.19ff4663ae9dfp-11, - 0x1.0e0de78654d1ep-11, - 0x1.0295ef6591848p-11, - 0x1.ef25d37f49fe1p-12, - 0x1.da01102b5f851p-12, - 0x1.c5b5412dcafadp-12, - 0x1.b23a5a23e4210p-12, - 0x1.9f8893d8fd1c1p-12, - 0x1.8d986a4187285p-12, - 0x1.7c629a822bc9ep-12, - 0x1.6be02102b3520p-12, - 0x1.5c0a378c90bcap-12, - 0x1.4cda5374ea275p-12, - 0x1.3e4a23d1f4702p-12, - 0x1.30538fbb77ecdp-12, - 0x1.22f0b496539bdp-12, - 0x1.161be46ad3b50p-12, - 0x1.09cfa445b00ffp-12, - 0x1.fc0d55470cf51p-13, - 0x1.e577bbcd49935p-13, - 0x1.cfd4a5adec5bfp-13, - 0x1.bb1a9657ce465p-13, - 0x1.a740684026555p-13, - 0x1.943d4a1d1ed39p-13, - 0x1.8208bc334a6a5p-13, - 0x1.709a8db59f25cp-13, - 0x1.5feada379d8b7p-13, - 0x1.4ff207314a102p-13, - 0x1.40a8c1949f75ep-13, - 0x1.3207fb7420eb9p-13, - 0x1.2408e9ba3327fp-13, - 0x1.16a501f0e42cap-13, - 0x1.09d5f819c9e29p-13, - 0x1.fb2b792b40a22p-14, - 0x1.e3bcf436a1a95p-14, - 0x1.cd55277c18d05p-14, - 0x1.b7e94604479dcp-14, - 0x1.a36eec00926ddp-14, - 0x1.8fdc1b2dcf7b9p-14, - 0x1.7d2737527c3f9p-14, - 0x1.6b4702d7d5849p-14, - 0x1.5a329b7d30748p-14, - 0x1.49e17724f4d41p-14, - 0x1.3a4b60ba9aa4dp-14, - 0x1.2b6875310f785p-14, - 0x1.1d312098e9dbap-14, - 0x1.0f9e1b4dd36dfp-14, - 0x1.02a8673a94691p-14, - 0x1.ec929a665b449p-15, - 0x1.d4f4b4c8e09edp-15, - 0x1.be6abbb10a5aap-15, - 0x1.a8e8cc1fadef6p-15, - 0x1.94637d5bacfdbp-15, - 0x1.80cfdc72220cfp-15, - 0x1.6e2367dc27f95p-15, - 0x1.5c540b4936fd2p-15, - 0x1.4b581b8d170fcp-15, - 0x1.3b2652b06c2b2p-15, - 0x1.2bb5cc22e5db6p-15, - 0x1.1cfe010e2052dp-15, - 0x1.0ef6c4c84a0fep-15, - 0x1.01984165a5f36p-15, - 0x1.e9b5e8d00ce76p-16, - 0x1.d16f5716c6c1ap-16, - 0x1.ba4f035d60e02p-16, - 0x1.a447b7b03f045p-16, - 0x1.8f4ccca7fc90dp-16, - 0x1.7b5223dac7336p-16, - 0x1.684c227fcacefp-16, - 0x1.562fac4329b48p-16, - 0x1.44f21e49054f2p-16, - 0x1.34894a5e24657p-16, - 0x1.24eb7254ccf83p-16, - 0x1.160f438c70913p-16, - 0x1.07ebd2a2d2844p-16, - 0x1.f4f12e9ab070ap-17, - 0x1.db5ad0b27805cp-17, - 0x1.c304efa2c6f4ep-17, - 0x1.abe09e9144b5ep-17, - 0x1.95df988e76644p-17, - 0x1.80f439b4ee04bp-17, - 0x1.6d11788a69c64p-17, - 0x1.5a2adfa0b4bc4p-17, - 0x1.4834877429b8fp-17, - 0x1.37231085c7d9ap-17, - 0x1.26eb9daed6f7ep-17, - 0x1.1783ceac28910p-17, - 0x1.08e1badf0fcedp-17, - 0x1.f5f7d88472604p-18, - 0x1.db92b5212fb8dp-18, - 0x1.c282cd3957edap-18, - 0x1.aab7abace48dcp-18, - 0x1.94219bfcb4928p-18, - 0x1.7eb1a2075864dp-18, - 0x1.6a597219a93d9p-18, - 0x1.570b69502f313p-18, - 0x1.44ba864670882p-18, - 0x1.335a62115bce2p-18, - 0x1.22df298214423p-18, - 0x1.133d96ae7e0ddp-18, - 0x1.046aeabcfcdecp-18, - 0x1.ecb9cfe1d8642p-19, - 0x1.d21397ead99cbp-19, - 0x1.b8d094c86d374p-19, - 0x1.a0df0f0c626dcp-19, - 0x1.8a2e269750a39p-19, - 0x1.74adc8f4064d3p-19, - 0x1.604ea819f007cp-19, - 0x1.4d0231928c6f9p-19, - 0x1.3aba85fe22e1fp-19, - 0x1.296a70f414053p-19, - 0x1.1905613b3abf2p-19, - 0x1.097f6156f32c5p-19, - 0x1.f59a20caf6695p-20, - 0x1.d9c73698fb1dcp-20, - 0x1.bf716c6168baep-20, - 0x1.a6852c6b58392p-20, - 0x1.8eefd70594a88p-20, - 0x1.789fb715aae95p-20, - 0x1.6383f726a8e04p-20, - 0x1.4f8c96f26a26ap-20, - 0x1.3caa61607f920p-20, - 0x1.2acee2f5ecdb8p-20, - 0x1.19ec60b1242edp-20, - 0x1.09f5cf4dd2877p-20, - 0x1.f5bd95d8730d8p-21, - 0x1.d9371e2ff7c35p-21, - 0x1.be41de54d155ap-21, - 0x1.a4c89e08ef4f3p-21, - 0x1.8cb738399b12cp-21, - 0x1.75fa8dbc84becp-21, - 0x1.608078a70dcbcp-21, - 0x1.4c37c0394d094p-21, - 0x1.39100d5687bfep-21, - 0x1.26f9df8519bd6p-21, - 0x1.15e6827001f18p-21, - 0x1.05c803e4831c1p-21, - 0x1.ed22548cffd35p-22, - 0x1.d06ad6ecdf971p-22, - 0x1.b551c847fbc96p-22, - 0x1.9bc09f112b494p-22, - 0x1.83a1ff0aa239dp-22, - 0x1.6ce1aa3fd7bddp-22, - 0x1.576c72b514859p-22, - 0x1.43302cc4a0da8p-22, - 0x1.301ba221dc9bbp-22, - 0x1.1e1e857adc568p-22, - 0x1.0d2966b1746f7p-22, - 0x1.fa5b4f49cc6b2p-23, - 0x1.dc3ae30b55c16p-23, - 0x1.bfd7555a3bd68p-23, - 0x1.a517d9e61628ap-23, - 0x1.8be4f8f6c951fp-23, - 0x1.74287ded49339p-23, - 0x1.5dcd669f2cd34p-23, - 0x1.48bfd38302870p-23, - 0x1.34ecf8a3c124ap-23, - 0x1.22430f521cbcfp-23, - 0x1.10b1488aeb235p-23, - 0x1.0027c00a263a6p-23, - 0x1.e12ee004efc37p-24, - 0x1.c3e44ae32b16bp-24, - 0x1.a854ea14102a8p-24, - 0x1.8e6761569f45dp-24, - 0x1.7603bac345f65p-24, - 0x1.5f1353cdad001p-24, - 0x1.4980cb3c80949p-24, - 0x1.3537f00b6ad4dp-24, - 0x1.2225b12bffc68p-24, - 0x1.10380e1adb7e9p-24, - 0x1.febc107d5efaap-25, - 0x1.df0f2a0ee6946p-25, - 0x1.c14b2188bcee4p-25, - 0x1.a553644f7f07dp-25, - 0x1.8b0cfce0579dfp-25, - 0x1.725e7c5dd20f7p-25, - 0x1.5b2fe547a1340p-25, - 0x1.456a974e92e93p-25, - 0x1.30f93c3699078p-25, - 0x1.1dc7b5b978cf8p-25, - 0x1.0bc30c5d52f15p-25, - 0x1.f5b2be65a0c7fp-26, - 0x1.d5f3a8dea7357p-26, - 0x1.b82915b03515bp-26, - 0x1.9c3517e789488p-26, - 0x1.81fb7df06136ep-26, - 0x1.6961b8d641d06p-26, - 0x1.524ec4d916caep-26, - 0x1.3cab1343d18d1p-26, - 0x1.2860757487a01p-26, - 0x1.155a09065d4f7p-26, - 0x1.0384250e4c9fcp-26, - 0x1.e59890b926c78p-27, - 0x1.c642116a8a9e3p-27, - 0x1.a8e405e651ab6p-27, - 0x1.8d5f98114f872p-27, - 0x1.7397c5a66e307p-27, - 0x1.5b71456c5a4c4p-27, - 0x1.44d26de513197p-27, - 0x1.2fa31d6371537p-27, - 0x1.1bcca373b7b43p-27, - 0x1.0939ab853339fp-27, - 0x1.efac5187b2863p-28, - 0x1.cf1e86235d0e6p-28, - 0x1.b0a68a2128babp-28, - 0x1.9423165bc4444p-28, - 0x1.7974e743dea3cp-28, - 0x1.607e9eacd1050p-28, - 0x1.4924a74dec728p-28, - 0x1.334d19e0c2160p-28, - 0x1.1edfa3c5f5ccap-28, - 0x1.0bc56f1b54701p-28, - 0x1.f3d2185e047d9p-29, - 0x1.d26cb87945e87p-29, - 0x1.b334fac4b9f99p-29, - 0x1.96076f7918d1cp-29, - 0x1.7ac2d72fc2c63p-29, - 0x1.614801550319ep-29, - 0x1.4979ac8b28926p-29, - 0x1.333c68e2d0548p-29, - 0x1.1e767bce37dd7p-29, - 0x1.0b0fc5b6d05a0p-29, - 0x1.f1e3523b41d7dp-30, - 0x1.d00de6608effep-30, - 0x1.b0778b7b3301ap-30, - 0x1.92fb04ec0f6cfp-30, - 0x1.77756ec9f78fap-30, - 0x1.5dc61922d5a06p-30, - 0x1.45ce65699ff6dp-30, - 0x1.2f71a5f159970p-30, - 0x1.1a94ff571654fp-30, - 0x1.071f4bbea09ecp-30, - 0x1.e9f1ff8ddd774p-31, - 0x1.c818223a202c7p-31, - 0x1.a887bd2b4404dp-31, - 0x1.8b1a336c5eb6bp-31, - 0x1.6fab63324088ap-31, - 0x1.56197e30205bap-31, - 0x1.3e44e45301b92p-31, - 0x1.281000bfe4c3fp-31, - 0x1.135f28f2d50b4p-31, - 0x1.00187dded5975p-31, - 0x1.dc479de0ef001p-32, - 0x1.bad4fdad3caa1p-32, - 0x1.9baed3ed27ab8p-32, - 0x1.7ead9ce4285bbp-32, - 0x1.63ac6b4edc88ep-32, - 0x1.4a88be2a6390cp-32, - 0x1.332259185f1a0p-32, - 0x1.1d5b1f3793044p-32, - 0x1.0916f04b6e18bp-32, - 0x1.ec77101de6926p-33, - 0x1.c960bf23153e0p-33, - 0x1.a8bd20fc65ef7p-33, - 0x1.8a61745ec7d1dp-33, - 0x1.6e25d0e756261p-33, - 0x1.53e4f7d1666cbp-33, - 0x1.3b7c27a7ddb0ep-33, - 0x1.24caf2c32af14p-33, - 0x1.0fb3186804d0fp-33, - 0x1.f830c0bb41fd7p-34, - 0x1.d3c0f1a91c846p-34, - 0x1.b1e5acf351d87p-34, - 0x1.92712d259ce66p-34, - 0x1.7538c60a04476p-34, - 0x1.5a14b04b47879p-34, - 0x1.40dfd87456f4cp-34, - 0x1.2977b1172b9d5p-34, - 0x1.13bc07e891491p-34, - 0x1.ff1dbb4300811p-35, - 0x1.d9a880f306bd8p-35, - 0x1.b6e45220b55e0p-35, - 0x1.96a0b33f2c4dap-35, - 0x1.78b07e9e924acp-35, - 0x1.5ce9ab1670dd2p-35, - 0x1.4325167006bb0p-35, - 0x1.2b3e53538ff3fp-35, - 0x1.15137a7f44864p-35, - 0x1.0084ff125639dp-35, - 0x1.daeb0b7311ec7p-36, - 0x1.b7937d1c40c52p-36, - 0x1.96d082f59ab06p-36, - 0x1.7872d9fa10aadp-36, - 0x1.5c4e8e37bc7d0p-36, - 0x1.423ac0df49a40p-36, - 0x1.2a117230ad284p-36, - 0x1.13af4f04f9998p-36, - 0x1.fde703724e560p-37, - 0x1.d77f0c82e7641p-37, - 0x1.b3ee02611d7ddp-37, - 0x1.92ff33023d5bdp-37, - 0x1.7481a9e69f53fp-37, - 0x1.5847eda620959p-37, - 0x1.3e27c1fcc74bdp-37, - 0x1.25f9ee0b923dcp-37, - 0x1.0f9a0686531ffp-37, - 0x1.f5cc7718082afp-38, - 0x1.cf7e53d6a2ca5p-38, - 0x1.ac0f5f3229372p-38, - 0x1.8b498644847eap-38, - 0x1.6cfa9bcca59dcp-38, - 0x1.50f411d4fd2cdp-38, - 0x1.370ab8327af5ep-38, - 0x1.1f167f88c6b6ep-38, - 0x1.08f24085d4597p-38, - 0x1.e8f70e181d619p-39, - 0x1.c324c20e337dcp-39, - 0x1.a03261574b54ep-39, - 0x1.7fe903cdf5855p-39, - 0x1.6215c58da3450p-39, - 0x1.46897d4b69fc6p-39, - 0x1.2d1877d731b7bp-39, - 0x1.159a386b11517p-39, - 0x1.ffd27ae9393cep-40, - 0x1.d7c593130dd0bp-40, - 0x1.b2cd607c79bcfp-40, - 0x1.90ae4d3405651p-40, - 0x1.71312dd1759e2p-40, - 0x1.5422ef5d8949dp-40, - 0x1.39544b0ecc957p-40, - 0x1.20997f73e73ddp-40, - 0x1.09ca0eaacd277p-40, - 0x1.e9810295890ecp-41, - 0x1.c2b45b5aa4a1dp-41, - 0x1.9eee068fa7596p-41, - 0x1.7df2b399c10a8p-41, - 0x1.5f8b87a31bd85p-41, - 0x1.4385c96e9a2d9p-41, - 0x1.29b2933ef4cbcp-41, - 0x1.11e68a6378f8ap-41, - 0x1.f7f338086a86bp-42, - 0x1.cf8d7d9ce040ap-42, - 0x1.aa577251ae484p-42, - 0x1.8811d739efb5ep-42, - 0x1.68823e52970bep-42, - 0x1.4b72ae68e8b4cp-42, - 0x1.30b14dbe876bcp-42, - 0x1.181012ef86610p-42, - 0x1.01647ba798744p-42, - 0x1.d90e917701675p-43, - 0x1.b2a87e86d0c8ap-43, - 0x1.8f53dcb377293p-43, - 0x1.6ed2f2515e933p-43, - 0x1.50ecc9ed47f19p-43, - 0x1.356cd5ce7799ep-43, - 0x1.1c229a587ab78p-43, - 0x1.04e15ecc7f3f6p-43, - 0x1.deffc7e6a6017p-44, - 0x1.b7b040832f310p-44, - 0x1.938e021f36d76p-44, - 0x1.7258610b3b233p-44, - 0x1.53d3bfc82a909p-44, - 0x1.37c92babdc2fdp-44, - 0x1.1e06010120f6ap-44, - 0x1.065b9616170d4p-44, - 0x1.e13dd96b3753ap-45, - 0x1.b950d32467392p-45, - 0x1.94a72263259a5p-45, - 0x1.72fd93e036cdcp-45, - 0x1.54164576929abp-45, - 0x1.37b83c521fe96p-45, - 0x1.1daf033182e96p-45, - 0x1.05ca50205d26ap-45, - 0x1.dfbb6235639fap-46, - 0x1.b7807e294781fp-46, - 0x1.9298add70a734p-46, - 0x1.70beaf9c7ffb6p-46, - 0x1.51b2cd6709222p-46, - 0x1.353a6cf7f7fffp-46, - 0x1.1b1fa8cbe84a7p-46, - 0x1.0330f0fd69921p-46, - 0x1.da81670f96f9bp-47, - 0x1.b24a16b4d09aap-47, - 0x1.8d6eeb6efdbd6p-47, - 0x1.6ba91ac734785p-47, - 0x1.4cb7966770ab5p-47, - 0x1.305e9721d0981p-47, - 0x1.1667311fff70ap-47, - 0x1.fd3de10d62855p-48, - 0x1.d1aefbcd48d0cp-48, - 0x1.a9cc93c25aca9p-48, - 0x1.85487ee3ea735p-48, - 0x1.63daf8b4b1e0cp-48, - 0x1.45421e69a6ca1p-48, - 0x1.294175802d99ap-48, - 0x1.0fa17bf41068fp-48, - 0x1.f05e82aae2bb9p-49, - 0x1.c578101b29058p-49, - 0x1.9e39dc5dd2f7cp-49, - 0x1.7a553a728bbf2p-49, - 0x1.5982008db1304p-49, - 0x1.3b7e00422e51bp-49, - 0x1.200c898d9ee3ep-49, - 0x1.06f5f7eb65a56p-49, - 0x1.e00e9148a1d25p-50, - 0x1.b623734024e92p-50, - 0x1.8fd4e01891bf8p-50, - 0x1.6cd44c7470d89p-50, - 0x1.4cd9c04158cd7p-50, - 0x1.2fa34bf5c8344p-50, - 0x1.14f4890ff2461p-50, - 0x1.f92c49dfa4df5p-51, - 0x1.ccaaea71ab0dfp-51, - 0x1.a40829f001197p-51, - 0x1.7eef13b59e96cp-51, - 0x1.5d11e1a252bf5p-51, - 0x1.3e296303b2297p-51, - 0x1.21f47009f43cep-51, - 0x1.083768c5e4541p-51, - 0x1.e1777d831265ep-52, - 0x1.b69f10b0191b5p-52, - 0x1.8f8a3a05b5b52p-52, - 0x1.6be573c40c8e7p-52, - 0x1.4b645ba991fdbp-52, - 0x1.2dc119095729fp-52, - }, -}; diff --git a/sysdeps/aarch64/fpu/sv_erff_data.c b/sysdeps/aarch64/fpu/sv_erff_data.c deleted file mode 100644 index 6dcd72af69..0000000000 --- a/sysdeps/aarch64/fpu/sv_erff_data.c +++ /dev/null @@ -1,1058 +0,0 @@ -/* Table for SVE erff approximation - - Copyright (C) 2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include "vecmath_config.h" - -/* Lookup table used in SVE erff. - For each possible rounded input r (multiples of 1/128), between - r = 0.0 and r = 4.0 (513 values): - - __erff_data.erf contains the values of erf(r), - - __erff_data.scale contains the values of 2/sqrt(pi)*exp(-r^2). - Note that indices 0 and 1 are never hit by the algorithm, since lookup is - performed only for x >= 1/64-1/512. */ -const struct sv_erff_data __sv_erff_data = { - .erf = { 0x0.000000p+0, - 0x1.20dbf4p-7, - 0x1.20d770p-6, - 0x1.b137e0p-6, - 0x1.20c564p-5, - 0x1.68e5d4p-5, - 0x1.b0fafep-5, - 0x1.f902a8p-5, - 0x1.207d48p-4, - 0x1.44703ep-4, - 0x1.68591ap-4, - 0x1.8c36bep-4, - 0x1.b00812p-4, - 0x1.d3cbf8p-4, - 0x1.f7815ap-4, - 0x1.0d9390p-3, - 0x1.1f5e1ap-3, - 0x1.311fc2p-3, - 0x1.42d7fcp-3, - 0x1.548642p-3, - 0x1.662a0cp-3, - 0x1.77c2d2p-3, - 0x1.895010p-3, - 0x1.9ad142p-3, - 0x1.ac45e4p-3, - 0x1.bdad72p-3, - 0x1.cf076ep-3, - 0x1.e05354p-3, - 0x1.f190aap-3, - 0x1.015f78p-2, - 0x1.09eed6p-2, - 0x1.127632p-2, - 0x1.1af54ep-2, - 0x1.236bf0p-2, - 0x1.2bd9dcp-2, - 0x1.343ed6p-2, - 0x1.3c9aa8p-2, - 0x1.44ed18p-2, - 0x1.4d35f0p-2, - 0x1.5574f4p-2, - 0x1.5da9f4p-2, - 0x1.65d4b8p-2, - 0x1.6df50ap-2, - 0x1.760abap-2, - 0x1.7e1594p-2, - 0x1.861566p-2, - 0x1.8e0a02p-2, - 0x1.95f336p-2, - 0x1.9dd0d2p-2, - 0x1.a5a2acp-2, - 0x1.ad6896p-2, - 0x1.b52264p-2, - 0x1.bccfecp-2, - 0x1.c47104p-2, - 0x1.cc0584p-2, - 0x1.d38d44p-2, - 0x1.db081cp-2, - 0x1.e275eap-2, - 0x1.e9d68ap-2, - 0x1.f129d4p-2, - 0x1.f86faap-2, - 0x1.ffa7eap-2, - 0x1.03693ap-1, - 0x1.06f794p-1, - 0x1.0a7ef6p-1, - 0x1.0dff50p-1, - 0x1.117894p-1, - 0x1.14eab4p-1, - 0x1.1855a6p-1, - 0x1.1bb95cp-1, - 0x1.1f15ccp-1, - 0x1.226ae8p-1, - 0x1.25b8a8p-1, - 0x1.28ff02p-1, - 0x1.2c3decp-1, - 0x1.2f755cp-1, - 0x1.32a54cp-1, - 0x1.35cdb4p-1, - 0x1.38ee8ap-1, - 0x1.3c07cap-1, - 0x1.3f196ep-1, - 0x1.42236ep-1, - 0x1.4525c8p-1, - 0x1.482074p-1, - 0x1.4b1372p-1, - 0x1.4dfebap-1, - 0x1.50e24cp-1, - 0x1.53be26p-1, - 0x1.569244p-1, - 0x1.595ea6p-1, - 0x1.5c2348p-1, - 0x1.5ee02ep-1, - 0x1.619556p-1, - 0x1.6442c0p-1, - 0x1.66e86ep-1, - 0x1.69865ep-1, - 0x1.6c1c98p-1, - 0x1.6eab18p-1, - 0x1.7131e6p-1, - 0x1.73b102p-1, - 0x1.762870p-1, - 0x1.789836p-1, - 0x1.7b0058p-1, - 0x1.7d60d8p-1, - 0x1.7fb9c0p-1, - 0x1.820b12p-1, - 0x1.8454d6p-1, - 0x1.869712p-1, - 0x1.88d1cep-1, - 0x1.8b050ep-1, - 0x1.8d30dep-1, - 0x1.8f5544p-1, - 0x1.91724ap-1, - 0x1.9387f6p-1, - 0x1.959652p-1, - 0x1.979d68p-1, - 0x1.999d42p-1, - 0x1.9b95e8p-1, - 0x1.9d8768p-1, - 0x1.9f71cap-1, - 0x1.a1551ap-1, - 0x1.a33162p-1, - 0x1.a506b0p-1, - 0x1.a6d50cp-1, - 0x1.a89c86p-1, - 0x1.aa5d26p-1, - 0x1.ac16fcp-1, - 0x1.adca14p-1, - 0x1.af767ap-1, - 0x1.b11c3cp-1, - 0x1.b2bb68p-1, - 0x1.b4540ap-1, - 0x1.b5e630p-1, - 0x1.b771e8p-1, - 0x1.b8f742p-1, - 0x1.ba764ap-1, - 0x1.bbef10p-1, - 0x1.bd61a2p-1, - 0x1.bece0ep-1, - 0x1.c03464p-1, - 0x1.c194b2p-1, - 0x1.c2ef08p-1, - 0x1.c44376p-1, - 0x1.c5920ap-1, - 0x1.c6dad2p-1, - 0x1.c81de2p-1, - 0x1.c95b46p-1, - 0x1.ca930ep-1, - 0x1.cbc54cp-1, - 0x1.ccf20cp-1, - 0x1.ce1962p-1, - 0x1.cf3b5cp-1, - 0x1.d0580cp-1, - 0x1.d16f7ep-1, - 0x1.d281c4p-1, - 0x1.d38ef0p-1, - 0x1.d49710p-1, - 0x1.d59a34p-1, - 0x1.d6986cp-1, - 0x1.d791cap-1, - 0x1.d8865ep-1, - 0x1.d97636p-1, - 0x1.da6162p-1, - 0x1.db47f4p-1, - 0x1.dc29fcp-1, - 0x1.dd0788p-1, - 0x1.dde0aap-1, - 0x1.deb570p-1, - 0x1.df85eap-1, - 0x1.e0522ap-1, - 0x1.e11a3ep-1, - 0x1.e1de36p-1, - 0x1.e29e22p-1, - 0x1.e35a12p-1, - 0x1.e41214p-1, - 0x1.e4c638p-1, - 0x1.e5768cp-1, - 0x1.e62322p-1, - 0x1.e6cc08p-1, - 0x1.e7714ap-1, - 0x1.e812fcp-1, - 0x1.e8b12ap-1, - 0x1.e94be4p-1, - 0x1.e9e336p-1, - 0x1.ea7730p-1, - 0x1.eb07e2p-1, - 0x1.eb9558p-1, - 0x1.ec1fa2p-1, - 0x1.eca6ccp-1, - 0x1.ed2ae6p-1, - 0x1.edabfcp-1, - 0x1.ee2a1ep-1, - 0x1.eea556p-1, - 0x1.ef1db4p-1, - 0x1.ef9344p-1, - 0x1.f00614p-1, - 0x1.f07630p-1, - 0x1.f0e3a6p-1, - 0x1.f14e82p-1, - 0x1.f1b6d0p-1, - 0x1.f21ca0p-1, - 0x1.f27ff8p-1, - 0x1.f2e0eap-1, - 0x1.f33f7ep-1, - 0x1.f39bc2p-1, - 0x1.f3f5c2p-1, - 0x1.f44d88p-1, - 0x1.f4a31ep-1, - 0x1.f4f694p-1, - 0x1.f547f2p-1, - 0x1.f59742p-1, - 0x1.f5e490p-1, - 0x1.f62fe8p-1, - 0x1.f67952p-1, - 0x1.f6c0dcp-1, - 0x1.f7068cp-1, - 0x1.f74a6ep-1, - 0x1.f78c8cp-1, - 0x1.f7cceep-1, - 0x1.f80ba2p-1, - 0x1.f848acp-1, - 0x1.f8841ap-1, - 0x1.f8bdf2p-1, - 0x1.f8f63ep-1, - 0x1.f92d08p-1, - 0x1.f96256p-1, - 0x1.f99634p-1, - 0x1.f9c8a8p-1, - 0x1.f9f9bap-1, - 0x1.fa2974p-1, - 0x1.fa57dep-1, - 0x1.fa84fep-1, - 0x1.fab0dep-1, - 0x1.fadb84p-1, - 0x1.fb04f6p-1, - 0x1.fb2d40p-1, - 0x1.fb5464p-1, - 0x1.fb7a6cp-1, - 0x1.fb9f60p-1, - 0x1.fbc344p-1, - 0x1.fbe61ep-1, - 0x1.fc07fap-1, - 0x1.fc28d8p-1, - 0x1.fc48c2p-1, - 0x1.fc67bcp-1, - 0x1.fc85d0p-1, - 0x1.fca2fep-1, - 0x1.fcbf52p-1, - 0x1.fcdaccp-1, - 0x1.fcf576p-1, - 0x1.fd0f54p-1, - 0x1.fd286ap-1, - 0x1.fd40bep-1, - 0x1.fd5856p-1, - 0x1.fd6f34p-1, - 0x1.fd8562p-1, - 0x1.fd9ae2p-1, - 0x1.fdafb8p-1, - 0x1.fdc3e8p-1, - 0x1.fdd77ap-1, - 0x1.fdea6ep-1, - 0x1.fdfcccp-1, - 0x1.fe0e96p-1, - 0x1.fe1fd0p-1, - 0x1.fe3080p-1, - 0x1.fe40a6p-1, - 0x1.fe504cp-1, - 0x1.fe5f70p-1, - 0x1.fe6e18p-1, - 0x1.fe7c46p-1, - 0x1.fe8a00p-1, - 0x1.fe9748p-1, - 0x1.fea422p-1, - 0x1.feb090p-1, - 0x1.febc96p-1, - 0x1.fec836p-1, - 0x1.fed374p-1, - 0x1.fede52p-1, - 0x1.fee8d4p-1, - 0x1.fef2fep-1, - 0x1.fefccep-1, - 0x1.ff064cp-1, - 0x1.ff0f76p-1, - 0x1.ff1852p-1, - 0x1.ff20e0p-1, - 0x1.ff2924p-1, - 0x1.ff3120p-1, - 0x1.ff38d6p-1, - 0x1.ff4048p-1, - 0x1.ff4778p-1, - 0x1.ff4e68p-1, - 0x1.ff551ap-1, - 0x1.ff5b90p-1, - 0x1.ff61ccp-1, - 0x1.ff67d0p-1, - 0x1.ff6d9ep-1, - 0x1.ff7338p-1, - 0x1.ff789ep-1, - 0x1.ff7dd4p-1, - 0x1.ff82dap-1, - 0x1.ff87b2p-1, - 0x1.ff8c5cp-1, - 0x1.ff90dcp-1, - 0x1.ff9532p-1, - 0x1.ff9960p-1, - 0x1.ff9d68p-1, - 0x1.ffa14ap-1, - 0x1.ffa506p-1, - 0x1.ffa8a0p-1, - 0x1.ffac18p-1, - 0x1.ffaf6ep-1, - 0x1.ffb2a6p-1, - 0x1.ffb5bep-1, - 0x1.ffb8b8p-1, - 0x1.ffbb98p-1, - 0x1.ffbe5ap-1, - 0x1.ffc102p-1, - 0x1.ffc390p-1, - 0x1.ffc606p-1, - 0x1.ffc862p-1, - 0x1.ffcaa8p-1, - 0x1.ffccd8p-1, - 0x1.ffcef4p-1, - 0x1.ffd0fap-1, - 0x1.ffd2eap-1, - 0x1.ffd4cap-1, - 0x1.ffd696p-1, - 0x1.ffd84ep-1, - 0x1.ffd9f8p-1, - 0x1.ffdb90p-1, - 0x1.ffdd18p-1, - 0x1.ffde90p-1, - 0x1.ffdffap-1, - 0x1.ffe154p-1, - 0x1.ffe2a2p-1, - 0x1.ffe3e2p-1, - 0x1.ffe514p-1, - 0x1.ffe63cp-1, - 0x1.ffe756p-1, - 0x1.ffe866p-1, - 0x1.ffe96ap-1, - 0x1.ffea64p-1, - 0x1.ffeb54p-1, - 0x1.ffec3ap-1, - 0x1.ffed16p-1, - 0x1.ffedeap-1, - 0x1.ffeeb4p-1, - 0x1.ffef76p-1, - 0x1.fff032p-1, - 0x1.fff0e4p-1, - 0x1.fff18ep-1, - 0x1.fff232p-1, - 0x1.fff2d0p-1, - 0x1.fff366p-1, - 0x1.fff3f6p-1, - 0x1.fff480p-1, - 0x1.fff504p-1, - 0x1.fff582p-1, - 0x1.fff5fcp-1, - 0x1.fff670p-1, - 0x1.fff6dep-1, - 0x1.fff74ap-1, - 0x1.fff7aep-1, - 0x1.fff810p-1, - 0x1.fff86cp-1, - 0x1.fff8c6p-1, - 0x1.fff91cp-1, - 0x1.fff96cp-1, - 0x1.fff9bap-1, - 0x1.fffa04p-1, - 0x1.fffa4cp-1, - 0x1.fffa90p-1, - 0x1.fffad0p-1, - 0x1.fffb0ep-1, - 0x1.fffb4ap-1, - 0x1.fffb82p-1, - 0x1.fffbb8p-1, - 0x1.fffbecp-1, - 0x1.fffc1ep-1, - 0x1.fffc4ep-1, - 0x1.fffc7ap-1, - 0x1.fffca6p-1, - 0x1.fffccep-1, - 0x1.fffcf6p-1, - 0x1.fffd1ap-1, - 0x1.fffd3ep-1, - 0x1.fffd60p-1, - 0x1.fffd80p-1, - 0x1.fffda0p-1, - 0x1.fffdbep-1, - 0x1.fffddap-1, - 0x1.fffdf4p-1, - 0x1.fffe0ep-1, - 0x1.fffe26p-1, - 0x1.fffe3ep-1, - 0x1.fffe54p-1, - 0x1.fffe68p-1, - 0x1.fffe7ep-1, - 0x1.fffe90p-1, - 0x1.fffea2p-1, - 0x1.fffeb4p-1, - 0x1.fffec4p-1, - 0x1.fffed4p-1, - 0x1.fffee4p-1, - 0x1.fffef2p-1, - 0x1.ffff00p-1, - 0x1.ffff0cp-1, - 0x1.ffff18p-1, - 0x1.ffff24p-1, - 0x1.ffff30p-1, - 0x1.ffff3ap-1, - 0x1.ffff44p-1, - 0x1.ffff4ep-1, - 0x1.ffff56p-1, - 0x1.ffff60p-1, - 0x1.ffff68p-1, - 0x1.ffff70p-1, - 0x1.ffff78p-1, - 0x1.ffff7ep-1, - 0x1.ffff84p-1, - 0x1.ffff8cp-1, - 0x1.ffff92p-1, - 0x1.ffff98p-1, - 0x1.ffff9cp-1, - 0x1.ffffa2p-1, - 0x1.ffffa6p-1, - 0x1.ffffacp-1, - 0x1.ffffb0p-1, - 0x1.ffffb4p-1, - 0x1.ffffb8p-1, - 0x1.ffffbcp-1, - 0x1.ffffc0p-1, - 0x1.ffffc4p-1, - 0x1.ffffc6p-1, - 0x1.ffffcap-1, - 0x1.ffffccp-1, - 0x1.ffffd0p-1, - 0x1.ffffd2p-1, - 0x1.ffffd4p-1, - 0x1.ffffd6p-1, - 0x1.ffffd8p-1, - 0x1.ffffdcp-1, - 0x1.ffffdep-1, - 0x1.ffffdep-1, - 0x1.ffffe0p-1, - 0x1.ffffe2p-1, - 0x1.ffffe4p-1, - 0x1.ffffe6p-1, - 0x1.ffffe8p-1, - 0x1.ffffe8p-1, - 0x1.ffffeap-1, - 0x1.ffffeap-1, - 0x1.ffffecp-1, - 0x1.ffffeep-1, - 0x1.ffffeep-1, - 0x1.fffff0p-1, - 0x1.fffff0p-1, - 0x1.fffff2p-1, - 0x1.fffff2p-1, - 0x1.fffff2p-1, - 0x1.fffff4p-1, - 0x1.fffff4p-1, - 0x1.fffff4p-1, - 0x1.fffff6p-1, - 0x1.fffff6p-1, - 0x1.fffff6p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - }, - .scale = { 0x1.20dd76p+0, - 0x1.20d8f2p+0, - 0x1.20cb68p+0, - 0x1.20b4d8p+0, - 0x1.209546p+0, - 0x1.206cb4p+0, - 0x1.203b26p+0, - 0x1.2000a0p+0, - 0x1.1fbd28p+0, - 0x1.1f70c4p+0, - 0x1.1f1b7ap+0, - 0x1.1ebd56p+0, - 0x1.1e565cp+0, - 0x1.1de698p+0, - 0x1.1d6e14p+0, - 0x1.1cecdcp+0, - 0x1.1c62fap+0, - 0x1.1bd07cp+0, - 0x1.1b3572p+0, - 0x1.1a91e6p+0, - 0x1.19e5eap+0, - 0x1.19318cp+0, - 0x1.1874dep+0, - 0x1.17aff0p+0, - 0x1.16e2d8p+0, - 0x1.160da4p+0, - 0x1.153068p+0, - 0x1.144b3cp+0, - 0x1.135e30p+0, - 0x1.12695ep+0, - 0x1.116cd8p+0, - 0x1.1068bap+0, - 0x1.0f5d16p+0, - 0x1.0e4a08p+0, - 0x1.0d2fa6p+0, - 0x1.0c0e0ap+0, - 0x1.0ae550p+0, - 0x1.09b590p+0, - 0x1.087ee4p+0, - 0x1.07416cp+0, - 0x1.05fd3ep+0, - 0x1.04b27cp+0, - 0x1.036140p+0, - 0x1.0209a6p+0, - 0x1.00abd0p+0, - 0x1.fe8fb0p-1, - 0x1.fbbbbep-1, - 0x1.f8dc0ap-1, - 0x1.f5f0cep-1, - 0x1.f2fa4cp-1, - 0x1.eff8c4p-1, - 0x1.ecec78p-1, - 0x1.e9d5a8p-1, - 0x1.e6b498p-1, - 0x1.e38988p-1, - 0x1.e054bep-1, - 0x1.dd167cp-1, - 0x1.d9cf06p-1, - 0x1.d67ea2p-1, - 0x1.d32592p-1, - 0x1.cfc41ep-1, - 0x1.cc5a8ap-1, - 0x1.c8e91cp-1, - 0x1.c5701ap-1, - 0x1.c1efcap-1, - 0x1.be6872p-1, - 0x1.bada5ap-1, - 0x1.b745c6p-1, - 0x1.b3aafcp-1, - 0x1.b00a46p-1, - 0x1.ac63e8p-1, - 0x1.a8b828p-1, - 0x1.a5074ep-1, - 0x1.a1519ep-1, - 0x1.9d9762p-1, - 0x1.99d8dap-1, - 0x1.961650p-1, - 0x1.925008p-1, - 0x1.8e8646p-1, - 0x1.8ab950p-1, - 0x1.86e96ap-1, - 0x1.8316d6p-1, - 0x1.7f41dcp-1, - 0x1.7b6abcp-1, - 0x1.7791b8p-1, - 0x1.73b714p-1, - 0x1.6fdb12p-1, - 0x1.6bfdf0p-1, - 0x1.681ff2p-1, - 0x1.644156p-1, - 0x1.60625cp-1, - 0x1.5c8342p-1, - 0x1.58a446p-1, - 0x1.54c5a6p-1, - 0x1.50e79ep-1, - 0x1.4d0a68p-1, - 0x1.492e42p-1, - 0x1.455366p-1, - 0x1.417a0cp-1, - 0x1.3da26ep-1, - 0x1.39ccc2p-1, - 0x1.35f940p-1, - 0x1.32281ep-1, - 0x1.2e5992p-1, - 0x1.2a8dcep-1, - 0x1.26c508p-1, - 0x1.22ff72p-1, - 0x1.1f3d3cp-1, - 0x1.1b7e98p-1, - 0x1.17c3b6p-1, - 0x1.140cc4p-1, - 0x1.1059eep-1, - 0x1.0cab62p-1, - 0x1.09014cp-1, - 0x1.055bd6p-1, - 0x1.01bb2cp-1, - 0x1.fc3ee6p-2, - 0x1.f511aap-2, - 0x1.edeeeep-2, - 0x1.e6d700p-2, - 0x1.dfca26p-2, - 0x1.d8c8aap-2, - 0x1.d1d2d0p-2, - 0x1.cae8dap-2, - 0x1.c40b08p-2, - 0x1.bd3998p-2, - 0x1.b674c8p-2, - 0x1.afbcd4p-2, - 0x1.a911f0p-2, - 0x1.a27456p-2, - 0x1.9be438p-2, - 0x1.9561c8p-2, - 0x1.8eed36p-2, - 0x1.8886b2p-2, - 0x1.822e66p-2, - 0x1.7be47ap-2, - 0x1.75a91ap-2, - 0x1.6f7c6ap-2, - 0x1.695e8cp-2, - 0x1.634fa6p-2, - 0x1.5d4fd4p-2, - 0x1.575f34p-2, - 0x1.517de6p-2, - 0x1.4bac00p-2, - 0x1.45e99cp-2, - 0x1.4036d0p-2, - 0x1.3a93b2p-2, - 0x1.350052p-2, - 0x1.2f7cc4p-2, - 0x1.2a0916p-2, - 0x1.24a554p-2, - 0x1.1f518ap-2, - 0x1.1a0dc6p-2, - 0x1.14da0ap-2, - 0x1.0fb662p-2, - 0x1.0aa2d0p-2, - 0x1.059f5ap-2, - 0x1.00ac00p-2, - 0x1.f79184p-3, - 0x1.edeb40p-3, - 0x1.e46530p-3, - 0x1.daff4ap-3, - 0x1.d1b982p-3, - 0x1.c893cep-3, - 0x1.bf8e1cp-3, - 0x1.b6a856p-3, - 0x1.ade26cp-3, - 0x1.a53c42p-3, - 0x1.9cb5bep-3, - 0x1.944ec2p-3, - 0x1.8c0732p-3, - 0x1.83deeap-3, - 0x1.7bd5c8p-3, - 0x1.73eba4p-3, - 0x1.6c2056p-3, - 0x1.6473b6p-3, - 0x1.5ce596p-3, - 0x1.5575c8p-3, - 0x1.4e241ep-3, - 0x1.46f066p-3, - 0x1.3fda6cp-3, - 0x1.38e1fap-3, - 0x1.3206dcp-3, - 0x1.2b48dap-3, - 0x1.24a7b8p-3, - 0x1.1e233ep-3, - 0x1.17bb2cp-3, - 0x1.116f48p-3, - 0x1.0b3f52p-3, - 0x1.052b0cp-3, - 0x1.fe6460p-4, - 0x1.f2a902p-4, - 0x1.e72372p-4, - 0x1.dbd32ap-4, - 0x1.d0b7a0p-4, - 0x1.c5d04ap-4, - 0x1.bb1c98p-4, - 0x1.b09bfcp-4, - 0x1.a64de6p-4, - 0x1.9c31c6p-4, - 0x1.92470ap-4, - 0x1.888d1ep-4, - 0x1.7f036cp-4, - 0x1.75a960p-4, - 0x1.6c7e64p-4, - 0x1.6381e2p-4, - 0x1.5ab342p-4, - 0x1.5211ecp-4, - 0x1.499d48p-4, - 0x1.4154bcp-4, - 0x1.3937b2p-4, - 0x1.31458ep-4, - 0x1.297dbap-4, - 0x1.21df9ap-4, - 0x1.1a6a96p-4, - 0x1.131e14p-4, - 0x1.0bf97ep-4, - 0x1.04fc3ap-4, - 0x1.fc4b5ep-5, - 0x1.eeea8cp-5, - 0x1.e1d4d0p-5, - 0x1.d508fap-5, - 0x1.c885e0p-5, - 0x1.bc4a54p-5, - 0x1.b05530p-5, - 0x1.a4a54ap-5, - 0x1.99397ap-5, - 0x1.8e109cp-5, - 0x1.83298ep-5, - 0x1.78832cp-5, - 0x1.6e1c58p-5, - 0x1.63f3f6p-5, - 0x1.5a08e8p-5, - 0x1.505a18p-5, - 0x1.46e66cp-5, - 0x1.3dacd2p-5, - 0x1.34ac36p-5, - 0x1.2be38cp-5, - 0x1.2351c2p-5, - 0x1.1af5d2p-5, - 0x1.12ceb4p-5, - 0x1.0adb60p-5, - 0x1.031ad6p-5, - 0x1.f7182ap-6, - 0x1.e85c44p-6, - 0x1.da0006p-6, - 0x1.cc0180p-6, - 0x1.be5ecep-6, - 0x1.b1160ap-6, - 0x1.a4255ap-6, - 0x1.978ae8p-6, - 0x1.8b44e6p-6, - 0x1.7f5188p-6, - 0x1.73af0cp-6, - 0x1.685bb6p-6, - 0x1.5d55ccp-6, - 0x1.529b9ep-6, - 0x1.482b84p-6, - 0x1.3e03d8p-6, - 0x1.3422fep-6, - 0x1.2a875cp-6, - 0x1.212f62p-6, - 0x1.181984p-6, - 0x1.0f443ep-6, - 0x1.06ae14p-6, - 0x1.fcab14p-7, - 0x1.ec7262p-7, - 0x1.dcaf36p-7, - 0x1.cd5ecap-7, - 0x1.be7e5ap-7, - 0x1.b00b38p-7, - 0x1.a202bep-7, - 0x1.94624ep-7, - 0x1.87275ep-7, - 0x1.7a4f6ap-7, - 0x1.6dd7fep-7, - 0x1.61beaep-7, - 0x1.56011cp-7, - 0x1.4a9cf6p-7, - 0x1.3f8ff6p-7, - 0x1.34d7dcp-7, - 0x1.2a727ap-7, - 0x1.205dacp-7, - 0x1.169756p-7, - 0x1.0d1d6ap-7, - 0x1.03ede2p-7, - 0x1.f60d8ap-8, - 0x1.e4cc4ap-8, - 0x1.d4143ap-8, - 0x1.c3e1a6p-8, - 0x1.b430ecp-8, - 0x1.a4fe84p-8, - 0x1.9646f4p-8, - 0x1.8806d8p-8, - 0x1.7a3adep-8, - 0x1.6cdfccp-8, - 0x1.5ff276p-8, - 0x1.536fc2p-8, - 0x1.4754acp-8, - 0x1.3b9e40p-8, - 0x1.30499cp-8, - 0x1.2553eep-8, - 0x1.1aba78p-8, - 0x1.107a8cp-8, - 0x1.06918cp-8, - 0x1.f9f9d0p-9, - 0x1.e77448p-9, - 0x1.d58da6p-9, - 0x1.c4412cp-9, - 0x1.b38a3ap-9, - 0x1.a36454p-9, - 0x1.93cb12p-9, - 0x1.84ba30p-9, - 0x1.762d84p-9, - 0x1.682100p-9, - 0x1.5a90b0p-9, - 0x1.4d78bcp-9, - 0x1.40d564p-9, - 0x1.34a306p-9, - 0x1.28de12p-9, - 0x1.1d8318p-9, - 0x1.128ebap-9, - 0x1.07fdb4p-9, - 0x1.fb99b8p-10, - 0x1.e7f232p-10, - 0x1.d4fed8p-10, - 0x1.c2b9d0p-10, - 0x1.b11d70p-10, - 0x1.a02436p-10, - 0x1.8fc8c8p-10, - 0x1.8005f0p-10, - 0x1.70d6a4p-10, - 0x1.6235fcp-10, - 0x1.541f34p-10, - 0x1.468daep-10, - 0x1.397ceep-10, - 0x1.2ce898p-10, - 0x1.20cc76p-10, - 0x1.15246ep-10, - 0x1.09ec86p-10, - 0x1.fe41cep-11, - 0x1.e97ba4p-11, - 0x1.d57f52p-11, - 0x1.c245d4p-11, - 0x1.afc85ep-11, - 0x1.9e0058p-11, - 0x1.8ce75ep-11, - 0x1.7c7744p-11, - 0x1.6caa0ep-11, - 0x1.5d79ecp-11, - 0x1.4ee142p-11, - 0x1.40daa4p-11, - 0x1.3360ccp-11, - 0x1.266ea8p-11, - 0x1.19ff46p-11, - 0x1.0e0de8p-11, - 0x1.0295f0p-11, - 0x1.ef25d4p-12, - 0x1.da0110p-12, - 0x1.c5b542p-12, - 0x1.b23a5ap-12, - 0x1.9f8894p-12, - 0x1.8d986ap-12, - 0x1.7c629ap-12, - 0x1.6be022p-12, - 0x1.5c0a38p-12, - 0x1.4cda54p-12, - 0x1.3e4a24p-12, - 0x1.305390p-12, - 0x1.22f0b4p-12, - 0x1.161be4p-12, - 0x1.09cfa4p-12, - 0x1.fc0d56p-13, - 0x1.e577bcp-13, - 0x1.cfd4a6p-13, - 0x1.bb1a96p-13, - 0x1.a74068p-13, - 0x1.943d4ap-13, - 0x1.8208bcp-13, - 0x1.709a8ep-13, - 0x1.5feadap-13, - 0x1.4ff208p-13, - 0x1.40a8c2p-13, - 0x1.3207fcp-13, - 0x1.2408eap-13, - 0x1.16a502p-13, - 0x1.09d5f8p-13, - 0x1.fb2b7ap-14, - 0x1.e3bcf4p-14, - 0x1.cd5528p-14, - 0x1.b7e946p-14, - 0x1.a36eecp-14, - 0x1.8fdc1cp-14, - 0x1.7d2738p-14, - 0x1.6b4702p-14, - 0x1.5a329cp-14, - 0x1.49e178p-14, - 0x1.3a4b60p-14, - 0x1.2b6876p-14, - 0x1.1d3120p-14, - 0x1.0f9e1cp-14, - 0x1.02a868p-14, - 0x1.ec929ap-15, - 0x1.d4f4b4p-15, - 0x1.be6abcp-15, - 0x1.a8e8ccp-15, - 0x1.94637ep-15, - 0x1.80cfdcp-15, - 0x1.6e2368p-15, - 0x1.5c540cp-15, - 0x1.4b581cp-15, - 0x1.3b2652p-15, - 0x1.2bb5ccp-15, - 0x1.1cfe02p-15, - 0x1.0ef6c4p-15, - 0x1.019842p-15, - 0x1.e9b5e8p-16, - 0x1.d16f58p-16, - 0x1.ba4f04p-16, - 0x1.a447b8p-16, - 0x1.8f4cccp-16, - 0x1.7b5224p-16, - 0x1.684c22p-16, - 0x1.562facp-16, - 0x1.44f21ep-16, - 0x1.34894ap-16, - 0x1.24eb72p-16, - 0x1.160f44p-16, - 0x1.07ebd2p-16, - 0x1.f4f12ep-17, - 0x1.db5ad0p-17, - 0x1.c304f0p-17, - 0x1.abe09ep-17, - 0x1.95df98p-17, - 0x1.80f43ap-17, - 0x1.6d1178p-17, - 0x1.5a2ae0p-17, - 0x1.483488p-17, - 0x1.372310p-17, - 0x1.26eb9ep-17, - 0x1.1783cep-17, - 0x1.08e1bap-17, - 0x1.f5f7d8p-18, - 0x1.db92b6p-18, - 0x1.c282cep-18, - 0x1.aab7acp-18, - 0x1.94219cp-18, - 0x1.7eb1a2p-18, - 0x1.6a5972p-18, - 0x1.570b6ap-18, - 0x1.44ba86p-18, - 0x1.335a62p-18, - 0x1.22df2ap-18, - 0x1.133d96p-18, - 0x1.046aeap-18, - 0x1.ecb9d0p-19, - 0x1.d21398p-19, - 0x1.b8d094p-19, - 0x1.a0df10p-19, - 0x1.8a2e26p-19, - 0x1.74adc8p-19, - 0x1.604ea8p-19, - 0x1.4d0232p-19, - 0x1.3aba86p-19, - 0x1.296a70p-19, - 0x1.190562p-19, - 0x1.097f62p-19, - 0x1.f59a20p-20, - 0x1.d9c736p-20, - 0x1.bf716cp-20, - 0x1.a6852cp-20, - 0x1.8eefd8p-20, - 0x1.789fb8p-20, - 0x1.6383f8p-20, - 0x1.4f8c96p-20, - 0x1.3caa62p-20, - 0x1.2acee2p-20, - 0x1.19ec60p-20, - 0x1.09f5d0p-20, - 0x1.f5bd96p-21, - 0x1.d9371ep-21, - 0x1.be41dep-21, - 0x1.a4c89ep-21, - 0x1.8cb738p-21, - 0x1.75fa8ep-21, - 0x1.608078p-21, - 0x1.4c37c0p-21, - 0x1.39100ep-21, - 0x1.26f9e0p-21, - 0x1.15e682p-21, - 0x1.05c804p-21, - 0x1.ed2254p-22, - 0x1.d06ad6p-22, - 0x1.b551c8p-22, - 0x1.9bc0a0p-22, - 0x1.83a200p-22, - 0x1.6ce1aap-22, - 0x1.576c72p-22, - 0x1.43302cp-22, - 0x1.301ba2p-22, - 0x1.1e1e86p-22, - 0x1.0d2966p-22, - 0x1.fa5b50p-23, - 0x1.dc3ae4p-23, - 0x1.bfd756p-23, - 0x1.a517dap-23, - 0x1.8be4f8p-23, - 0x1.74287ep-23, - 0x1.5dcd66p-23, - 0x1.48bfd4p-23, - 0x1.34ecf8p-23, - 0x1.224310p-23, - 0x1.10b148p-23, - }, -}; diff --git a/sysdeps/aarch64/fpu/vecmath_config.h b/sysdeps/aarch64/fpu/vecmath_config.h index 7f0a8aa5f2..862eefaf8f 100644 --- a/sysdeps/aarch64/fpu/vecmath_config.h +++ b/sysdeps/aarch64/fpu/vecmath_config.h @@ -75,49 +75,37 @@ extern const struct v_log10_data } table[1 << V_LOG10_TABLE_BITS]; } __v_log10_data attribute_hidden; -extern const struct erff_data +extern const struct v_erff_data { struct { float erf, scale; } tab[513]; -} __erff_data attribute_hidden; +} __v_erff_data attribute_hidden; -extern const struct sv_erff_data -{ - float erf[513]; - float scale[513]; -} __sv_erff_data attribute_hidden; - -extern const struct erf_data +extern const struct v_erf_data { struct { double erf, scale; } tab[769]; -} __erf_data attribute_hidden; - -extern const struct sv_erf_data -{ - double erf[769]; - double scale[769]; -} __sv_erf_data attribute_hidden; +} __v_erf_data attribute_hidden; -extern const struct erfc_data +extern const struct v_erfc_data { struct { double erfc, scale; } tab[3488]; -} __erfc_data attribute_hidden; +} __v_erfc_data attribute_hidden; -extern const struct erfcf_data +extern const struct v_erfcf_data { struct { float erfc, scale; } tab[645]; -} __erfcf_data attribute_hidden; +} __v_erfcf_data attribute_hidden; /* Some data for AdvSIMD and SVE pow's internal exp and log. */ #define V_POW_EXP_TABLE_BITS 8 commit 4148940836eee07d1138da6f1805280eeb8217e3 Author: Pierre Blanchard Date: Mon Dec 9 15:53:04 2024 +0000 AArch64: Improve codegen in AdvSIMD pow Remove spurious ADRP. Improve memory access by shuffling constants and using more indexed MLAs. A few more optimisation with no impact on accuracy - force fmas contraction - switch from shift-aided rint to rint instruction Between 1 and 5% throughput improvement on Neoverse V1 depending on benchmark. (cherry picked from commit 569cfaaf4984ae70b23c61ee28a609b5aef93fea) diff --git a/sysdeps/aarch64/fpu/pow_advsimd.c b/sysdeps/aarch64/fpu/pow_advsimd.c index 3c91e3e183..81e134ac2f 100644 --- a/sysdeps/aarch64/fpu/pow_advsimd.c +++ b/sysdeps/aarch64/fpu/pow_advsimd.c @@ -22,9 +22,6 @@ /* Defines parameters of the approximation and scalar fallback. */ #include "finite_pow.h" -#define VecSmallExp v_u64 (SmallExp) -#define VecThresExp v_u64 (ThresExp) - #define VecSmallPowX v_u64 (SmallPowX) #define VecThresPowX v_u64 (ThresPowX) #define VecSmallPowY v_u64 (SmallPowY) @@ -32,36 +29,48 @@ static const struct data { - float64x2_t log_poly[6]; - float64x2_t exp_poly[3]; - float64x2_t ln2_hi, ln2_lo; - float64x2_t shift, inv_ln2_n, ln2_hi_n, ln2_lo_n, small_powx; uint64x2_t inf; + float64x2_t small_powx; + uint64x2_t offset, mask; + uint64x2_t mask_sub_0, mask_sub_1; + float64x2_t log_c0, log_c2, log_c4, log_c5; + double log_c1, log_c3; + double ln2_lo, ln2_hi; + uint64x2_t small_exp, thres_exp; + double ln2_lo_n, ln2_hi_n; + double inv_ln2_n, exp_c2; + float64x2_t exp_c0, exp_c1; } data = { + /* Power threshold. */ + .inf = V2 (0x7ff0000000000000), + .small_powx = V2 (0x1p-126), + .offset = V2 (Off), + .mask = V2 (0xfffULL << 52), + .mask_sub_0 = V2 (1ULL << 52), + .mask_sub_1 = V2 (52ULL << 52), /* Coefficients copied from v_pow_log_data.c relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8] Coefficients are scaled to match the scaling during evaluation. */ - .log_poly - = { V2 (0x1.555555555556p-2 * -2), V2 (-0x1.0000000000006p-2 * -2), - V2 (0x1.999999959554ep-3 * 4), V2 (-0x1.555555529a47ap-3 * 4), - V2 (0x1.2495b9b4845e9p-3 * -8), V2 (-0x1.0002b8b263fc3p-3 * -8) }, - .ln2_hi = V2 (0x1.62e42fefa3800p-1), - .ln2_lo = V2 (0x1.ef35793c76730p-45), + .log_c0 = V2 (0x1.555555555556p-2 * -2), + .log_c1 = -0x1.0000000000006p-2 * -2, + .log_c2 = V2 (0x1.999999959554ep-3 * 4), + .log_c3 = -0x1.555555529a47ap-3 * 4, + .log_c4 = V2 (0x1.2495b9b4845e9p-3 * -8), + .log_c5 = V2 (-0x1.0002b8b263fc3p-3 * -8), + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, /* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549 (0.550 without fma) if |x| < ln2/512. */ - .exp_poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6ef9p-3), - V2 (0x1.5555576a5adcep-5) }, - .shift = V2 (0x1.8p52), /* round to nearest int. without intrinsics. */ - .inv_ln2_n = V2 (0x1.71547652b82fep8), /* N/ln2. */ - .ln2_hi_n = V2 (0x1.62e42fefc0000p-9), /* ln2/N. */ - .ln2_lo_n = V2 (-0x1.c610ca86c3899p-45), - .small_powx = V2 (0x1p-126), - .inf = V2 (0x7ff0000000000000) + .exp_c0 = V2 (0x1.fffffffffffd4p-2), + .exp_c1 = V2 (0x1.5555571d6ef9p-3), + .exp_c2 = 0x1.5555576a5adcep-5, + .small_exp = V2 (0x3c90000000000000), + .thres_exp = V2 (0x03f0000000000000), + .inv_ln2_n = 0x1.71547652b82fep8, /* N/ln2. */ + .ln2_hi_n = 0x1.62e42fefc0000p-9, /* ln2/N. */ + .ln2_lo_n = -0x1.c610ca86c3899p-45, }; -#define A(i) data.log_poly[i] -#define C(i) data.exp_poly[i] - /* This version implements an algorithm close to scalar pow but - does not implement the trick in the exp's specialcase subroutine to avoid double-rounding, @@ -91,10 +100,9 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d) /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - uint64x2_t tmp = vsubq_u64 (ix, v_u64 (Off)); - int64x2_t k - = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift. */ - uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, v_u64 (0xfffULL << 52))); + uint64x2_t tmp = vsubq_u64 (ix, d->offset); + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); + uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->mask)); float64x2_t z = vreinterpretq_f64_u64 (iz); float64x2_t kd = vcvtq_f64_s64 (k); /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ @@ -105,9 +113,10 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d) |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc); /* k*Ln2 + log(c) + r. */ - float64x2_t t1 = vfmaq_f64 (logc, kd, d->ln2_hi); + float64x2_t ln2 = vld1q_f64 (&d->ln2_lo); + float64x2_t t1 = vfmaq_laneq_f64 (logc, kd, ln2, 1); float64x2_t t2 = vaddq_f64 (t1, r); - float64x2_t lo1 = vfmaq_f64 (logctail, kd, d->ln2_lo); + float64x2_t lo1 = vfmaq_laneq_f64 (logctail, kd, ln2, 0); float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r); /* Evaluation is optimized assuming superscalar pipelined execution. */ float64x2_t ar = vmulq_f64 (v_f64 (-0.5), r); @@ -118,9 +127,10 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d) float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r); float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2); /* p = log1p(r) - r - A[0]*r*r. */ - float64x2_t a56 = vfmaq_f64 (A (4), r, A (5)); - float64x2_t a34 = vfmaq_f64 (A (2), r, A (3)); - float64x2_t a12 = vfmaq_f64 (A (0), r, A (1)); + float64x2_t odd_coeffs = vld1q_f64 (&d->log_c1); + float64x2_t a56 = vfmaq_f64 (d->log_c4, r, d->log_c5); + float64x2_t a34 = vfmaq_laneq_f64 (d->log_c2, r, odd_coeffs, 1); + float64x2_t a12 = vfmaq_laneq_f64 (d->log_c0, r, odd_coeffs, 0); float64x2_t p = vfmaq_f64 (a34, ar2, a56); p = vfmaq_f64 (a12, ar2, p); p = vmulq_f64 (ar3, p); @@ -140,28 +150,28 @@ exp_special_case (float64x2_t x, float64x2_t xtail) /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. */ static inline float64x2_t -v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d) +v_exp_inline (float64x2_t x, float64x2_t neg_xtail, const struct data *d) { /* Fallback to scalar exp_inline for all lanes if any lane contains value of x s.t. |x| <= 2^-54 or >= 512. */ - uint64x2_t abstop - = vshrq_n_u64 (vandq_u64 (vreinterpretq_u64_f64 (x), d->inf), 52); - uint64x2_t uoflowx - = vcgeq_u64 (vsubq_u64 (abstop, VecSmallExp), VecThresExp); + uint64x2_t uoflowx = vcgeq_u64 ( + vsubq_u64 (vreinterpretq_u64_f64 (vabsq_f64 (x)), d->small_exp), + d->thres_exp); if (__glibc_unlikely (v_any_u64 (uoflowx))) - return exp_special_case (x, xtail); + return exp_special_case (x, vnegq_f64 (neg_xtail)); /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N]. */ - float64x2_t z = vmulq_f64 (d->inv_ln2_n, x); /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - float64x2_t kd = vaddq_f64 (z, d->shift); - uint64x2_t ki = vreinterpretq_u64_f64 (kd); - kd = vsubq_f64 (kd, d->shift); - float64x2_t r = vfmsq_f64 (x, kd, d->ln2_hi_n); - r = vfmsq_f64 (r, kd, d->ln2_lo_n); + float64x2_t exp_consts = vld1q_f64 (&d->inv_ln2_n); + float64x2_t z = vmulq_laneq_f64 (x, exp_consts, 0); + float64x2_t kd = vrndnq_f64 (z); + uint64x2_t ki = vreinterpretq_u64_s64 (vcvtaq_s64_f64 (z)); + float64x2_t ln2_n = vld1q_f64 (&d->ln2_lo_n); + float64x2_t r = vfmsq_laneq_f64 (x, kd, ln2_n, 1); + r = vfmsq_laneq_f64 (r, kd, ln2_n, 0); /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ - r = vaddq_f64 (r, xtail); + r = vsubq_f64 (r, neg_xtail); /* 2^(k/N) ~= scale. */ uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1)); uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS); @@ -170,8 +180,8 @@ v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d) sbits = vaddq_u64 (sbits, top); /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */ float64x2_t r2 = vmulq_f64 (r, r); - float64x2_t tmp = vfmaq_f64 (C (1), r, C (2)); - tmp = vfmaq_f64 (C (0), r, tmp); + float64x2_t tmp = vfmaq_laneq_f64 (d->exp_c1, r, exp_consts, 1); + tmp = vfmaq_f64 (d->exp_c0, r, tmp); tmp = vfmaq_f64 (r, r2, tmp); float64x2_t scale = vreinterpretq_f64_u64 (sbits); /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there @@ -230,8 +240,8 @@ float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y) { /* Normalize subnormal x so exponent becomes negative. */ uint64x2_t vix_norm = vreinterpretq_u64_f64 ( - vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (v_u64 (1ULL << 52))))); - vix_norm = vsubq_u64 (vix_norm, v_u64 (52ULL << 52)); + vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (d->mask_sub_0)))); + vix_norm = vsubq_u64 (vix_norm, d->mask_sub_1); vix = vbslq_u64 (sub_x, vix_norm, vix); } } @@ -242,8 +252,7 @@ float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y) /* Vector Exp(y_loghi, y_loglo). */ float64x2_t vehi = vmulq_f64 (y, vhi); - float64x2_t velo = vmulq_f64 (y, vlo); float64x2_t vemi = vfmsq_f64 (vehi, y, vhi); - velo = vsubq_f64 (velo, vemi); - return v_exp_inline (vehi, velo, d); + float64x2_t neg_velo = vfmsq_f64 (vemi, y, vlo); + return v_exp_inline (vehi, neg_velo, d); } commit ae04f63087415eba9060143608b03db693854bb7 Author: Pierre Blanchard Date: Mon Dec 9 15:54:34 2024 +0000 AArch64: Improve codegen in AdvSIMD logs Remove spurious ADRP and a few MOVs. Reduce memory access by using more indexed MLAs in polynomial. Align notation so that algorithms are easier to compare. Speedup on Neoverse V1 for log10 (8%), log (8.5%), and log2 (10%). Update error threshold in AdvSIMD log (now matches SVE log). (cherry picked from commit 8eb5ad2ebc94cc5bedbac57c226c02ec254479c7) diff --git a/sysdeps/aarch64/fpu/log10_advsimd.c b/sysdeps/aarch64/fpu/log10_advsimd.c index c065aaebae..f69ed21c39 100644 --- a/sysdeps/aarch64/fpu/log10_advsimd.c +++ b/sysdeps/aarch64/fpu/log10_advsimd.c @@ -18,36 +18,36 @@ . */ #include "v_math.h" -#include "poly_advsimd_f64.h" - -#define N (1 << V_LOG10_TABLE_BITS) static const struct data { - uint64x2_t min_norm; + uint64x2_t off, sign_exp_mask, offset_lower_bound; uint32x4_t special_bound; - float64x2_t poly[5]; - float64x2_t invln10, log10_2, ln2; - uint64x2_t sign_exp_mask; + double invln10, log10_2; + double c1, c3; + float64x2_t c0, c2, c4; } data = { /* Computed from log coefficients divided by log(10) then rounded to double precision. */ - .poly = { V2 (-0x1.bcb7b1526e506p-3), V2 (0x1.287a7636be1d1p-3), - V2 (-0x1.bcb7b158af938p-4), V2 (0x1.63c78734e6d07p-4), - V2 (-0x1.287461742fee4p-4) }, - .ln2 = V2 (0x1.62e42fefa39efp-1), - .invln10 = V2 (0x1.bcb7b1526e50ep-2), - .log10_2 = V2 (0x1.34413509f79ffp-2), - .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022). */ - .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm. */ + .c0 = V2 (-0x1.bcb7b1526e506p-3), + .c1 = 0x1.287a7636be1d1p-3, + .c2 = V2 (-0x1.bcb7b158af938p-4), + .c3 = 0x1.63c78734e6d07p-4, + .c4 = V2 (-0x1.287461742fee4p-4), + .invln10 = 0x1.bcb7b1526e50ep-2, + .log10_2 = 0x1.34413509f79ffp-2, + .off = V2 (0x3fe6900900000000), .sign_exp_mask = V2 (0xfff0000000000000), + /* Lower bound is 0x0010000000000000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound - offset (which wraps around). */ + .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000), + .special_bound = V4 (0x7fe00000), /* asuint64(inf) - 0x0010000000000000. */ }; -#define Off v_u64 (0x3fe6900900000000) +#define N (1 << V_LOG10_TABLE_BITS) #define IndexMask (N - 1) -#define T(s, i) __v_log10_data.s[i] - struct entry { float64x2_t invc; @@ -70,10 +70,11 @@ lookup (uint64x2_t i) } static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2, - uint32x2_t special) +special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, + uint32x2_t special, const struct data *d) { - return v_call_f64 (log10, x, vfmaq_f64 (hi, r2, y), vmovl_u32 (special)); + float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); + return v_call_f64 (log10, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special)); } /* Fast implementation of double-precision vector log10 @@ -85,19 +86,24 @@ special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2, float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x) { const struct data *d = ptr_barrier (&data); - uint64x2_t ix = vreinterpretq_u64_f64 (x); - uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm), - vget_low_u32 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint64x2_t u = vreinterpretq_u64_f64 (x); + uint64x2_t u_off = vsubq_u64 (u, d->off); /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - uint64x2_t tmp = vsubq_u64 (ix, Off); - int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); - uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask)); + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52); + uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask)); float64x2_t z = vreinterpretq_f64_u64 (iz); - struct entry e = lookup (tmp); + struct entry e = lookup (u_off); + + uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound), + vget_low_u32 (d->special_bound)); /* log10(x) = log1p(z/c-1)/log(10) + log10(c) + k*log10(2). */ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); @@ -105,17 +111,22 @@ float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x) /* hi = r / log(10) + log10(c) + k*log10(2). Constants in v_log10_data.c are computed (in extended precision) as - e.log10c := e.logc * ivln10. */ - float64x2_t w = vfmaq_f64 (e.log10c, r, d->invln10); + e.log10c := e.logc * invln10. */ + float64x2_t cte = vld1q_f64 (&d->invln10); + float64x2_t hi = vfmaq_laneq_f64 (e.log10c, r, cte, 0); /* y = log10(1+r) + n * log10(2). */ - float64x2_t hi = vfmaq_f64 (w, kd, d->log10_2); + hi = vfmaq_laneq_f64 (hi, kd, cte, 1); /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ float64x2_t r2 = vmulq_f64 (r, r); - float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly); + float64x2_t odd_coeffs = vld1q_f64 (&d->c1); + float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1); + float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0); + y = vfmaq_f64 (y, d->c4, r2); + y = vfmaq_f64 (p, y, r2); if (__glibc_unlikely (v_any_u32h (special))) - return special_case (x, y, hi, r2, special); - return vfmaq_f64 (hi, r2, y); + return special_case (hi, u_off, y, r2, special, d); + return vfmaq_f64 (hi, y, r2); } diff --git a/sysdeps/aarch64/fpu/log2_advsimd.c b/sysdeps/aarch64/fpu/log2_advsimd.c index 4057c552d8..1eea1f86eb 100644 --- a/sysdeps/aarch64/fpu/log2_advsimd.c +++ b/sysdeps/aarch64/fpu/log2_advsimd.c @@ -18,31 +18,33 @@ . */ #include "v_math.h" -#include "poly_advsimd_f64.h" - -#define N (1 << V_LOG2_TABLE_BITS) static const struct data { - uint64x2_t min_norm; + uint64x2_t off, sign_exp_mask, offset_lower_bound; uint32x4_t special_bound; - float64x2_t poly[5]; - float64x2_t invln2; - uint64x2_t sign_exp_mask; + float64x2_t c0, c2; + double c1, c3, invln2, c4; } data = { /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9 and N = 128, then scaled by log2(e) in extended precision and rounded back to double precision. */ - .poly = { V2 (-0x1.71547652b83p-1), V2 (0x1.ec709dc340953p-2), - V2 (-0x1.71547651c8f35p-2), V2 (0x1.2777ebe12dda5p-2), - V2 (-0x1.ec738d616fe26p-3) }, - .invln2 = V2 (0x1.71547652b82fep0), - .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022). */ - .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm. */ + .c0 = V2 (-0x1.71547652b8300p-1), + .c1 = 0x1.ec709dc340953p-2, + .c2 = V2 (-0x1.71547651c8f35p-2), + .c3 = 0x1.2777ebe12dda5p-2, + .c4 = -0x1.ec738d616fe26p-3, + .invln2 = 0x1.71547652b82fep0, + .off = V2 (0x3fe6900900000000), .sign_exp_mask = V2 (0xfff0000000000000), + /* Lower bound is 0x0010000000000000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound - offset (which wraps around). */ + .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000), + .special_bound = V4 (0x7fe00000), /* asuint64(inf) - asuint64(0x1p-1022). */ }; -#define Off v_u64 (0x3fe6900900000000) +#define N (1 << V_LOG2_TABLE_BITS) #define IndexMask (N - 1) struct entry @@ -67,10 +69,11 @@ lookup (uint64x2_t i) } static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, float64x2_t w, float64x2_t r2, - uint32x2_t special) +special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, + uint32x2_t special, const struct data *d) { - return v_call_f64 (log2, x, vfmaq_f64 (w, r2, y), vmovl_u32 (special)); + float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); + return v_call_f64 (log2, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special)); } /* Double-precision vector log2 routine. Implements the same algorithm as @@ -81,31 +84,41 @@ special_case (float64x2_t x, float64x2_t y, float64x2_t w, float64x2_t r2, float64x2_t VPCS_ATTR V_NAME_D1 (log2) (float64x2_t x) { const struct data *d = ptr_barrier (&data); - uint64x2_t ix = vreinterpretq_u64_f64 (x); - uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm), - vget_low_u32 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint64x2_t u = vreinterpretq_u64_f64 (x); + uint64x2_t u_off = vsubq_u64 (u, d->off); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - uint64x2_t tmp = vsubq_u64 (ix, Off); - int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); - uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask)); + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52); + uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask)); float64x2_t z = vreinterpretq_f64_u64 (iz); - struct entry e = lookup (tmp); + struct entry e = lookup (u_off); - /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */ + uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound), + vget_low_u32 (d->special_bound)); + /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); float64x2_t kd = vcvtq_f64_s64 (k); - float64x2_t w = vfmaq_f64 (e.log2c, r, d->invln2); + + float64x2_t invln2_and_c4 = vld1q_f64 (&d->invln2); + float64x2_t hi + = vfmaq_laneq_f64 (vaddq_f64 (e.log2c, kd), r, invln2_and_c4, 0); float64x2_t r2 = vmulq_f64 (r, r); - float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly); - w = vaddq_f64 (kd, w); + float64x2_t odd_coeffs = vld1q_f64 (&d->c1); + float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1); + float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0); + y = vfmaq_laneq_f64 (y, r2, invln2_and_c4, 1); + y = vfmaq_f64 (p, r2, y); if (__glibc_unlikely (v_any_u32h (special))) - return special_case (x, y, w, r2, special); - return vfmaq_f64 (w, r2, y); + return special_case (hi, u_off, y, r2, special, d); + return vfmaq_f64 (hi, y, r2); } diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c index 015a6da7d7..b1a27fbc29 100644 --- a/sysdeps/aarch64/fpu/log_advsimd.c +++ b/sysdeps/aarch64/fpu/log_advsimd.c @@ -21,27 +21,29 @@ static const struct data { - uint64x2_t min_norm; + uint64x2_t off, sign_exp_mask, offset_lower_bound; uint32x4_t special_bound; - float64x2_t poly[5]; - float64x2_t ln2; - uint64x2_t sign_exp_mask; + float64x2_t c0, c2; + double c1, c3, ln2, c4; } data = { - /* Worst-case error: 1.17 + 0.5 ulp. - Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ]. */ - .poly = { V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2), - V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3), - V2 (-0x1.554e550bd501ep-3) }, - .ln2 = V2 (0x1.62e42fefa39efp-1), - .min_norm = V2 (0x0010000000000000), - .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm. */ - .sign_exp_mask = V2 (0xfff0000000000000) + /* Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ]. */ + .c0 = V2 (-0x1.ffffffffffff7p-2), + .c1 = 0x1.55555555170d4p-2, + .c2 = V2 (-0x1.0000000399c27p-2), + .c3 = 0x1.999b2e90e94cap-3, + .c4 = -0x1.554e550bd501ep-3, + .ln2 = 0x1.62e42fefa39efp-1, + .sign_exp_mask = V2 (0xfff0000000000000), + .off = V2 (0x3fe6900900000000), + /* Lower bound is 0x0010000000000000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound - offset (which wraps around). */ + .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000), + .special_bound = V4 (0x7fe00000), /* asuint64(inf) - asuint64(0x1p-126). */ }; -#define A(i) d->poly[i] #define N (1 << V_LOG_TABLE_BITS) #define IndexMask (N - 1) -#define Off v_u64 (0x3fe6900900000000) struct entry { @@ -64,48 +66,56 @@ lookup (uint64x2_t i) } static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2, - uint32x2_t cmp) +special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, + uint32x2_t special, const struct data *d) { - return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (cmp)); + float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); + return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special)); } +/* Double-precision vector log routine. + The maximum observed error is 2.17 ULP: + _ZGVnN2v_log(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2 + want 0x1.ffffff1cca045p-2. */ float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x) { const struct data *d = ptr_barrier (&data); - float64x2_t z, r, r2, p, y, kd, hi; - uint64x2_t ix, iz, tmp; - uint32x2_t cmp; - int64x2_t k; - struct entry e; - ix = vreinterpretq_u64_f64 (x); - cmp = vcge_u32 (vsubhn_u64 (ix, d->min_norm), - vget_low_u32 (d->special_bound)); + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint64x2_t u = vreinterpretq_u64_f64 (x); + uint64x2_t u_off = vsubq_u64 (u, d->off); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - tmp = vsubq_u64 (ix, Off); - k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift. */ - iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask)); - z = vreinterpretq_f64_u64 (iz); - e = lookup (tmp); + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52); + uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask)); + float64x2_t z = vreinterpretq_f64_u64 (iz); + + struct entry e = lookup (u_off); + + uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound), + vget_low_u32 (d->special_bound)); /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ - r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); - kd = vcvtq_f64_s64 (k); + float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); + float64x2_t kd = vcvtq_f64_s64 (k); /* hi = r + log(c) + k*Ln2. */ - hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2); + float64x2_t ln2_and_c4 = vld1q_f64 (&d->ln2); + float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_c4, 0); + /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ - r2 = vmulq_f64 (r, r); - y = vfmaq_f64 (A (2), A (3), r); - p = vfmaq_f64 (A (0), A (1), r); - y = vfmaq_f64 (y, A (4), r2); - y = vfmaq_f64 (p, y, r2); - - if (__glibc_unlikely (v_any_u32h (cmp))) - return special_case (x, y, hi, r2, cmp); + float64x2_t odd_coeffs = vld1q_f64 (&d->c1); + float64x2_t r2 = vmulq_f64 (r, r); + float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1); + float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0); + y = vfmaq_laneq_f64 (y, r2, ln2_and_c4, 1); + y = vfmaq_f64 (p, r2, y); + + if (__glibc_unlikely (v_any_u32h (special))) + return special_case (hi, u_off, y, r2, special, d); return vfmaq_f64 (hi, y, r2); } commit 2aed9796bfb17b257e63b12cefdb7ff60be09626 Author: Pierre Blanchard Date: Mon Dec 9 15:55:39 2024 +0000 AArch64: Improve codegen in users of ADVSIMD log1p helper Add inline helper for log1p and rearrange operations so MOV is not necessary in reduction or around the special-case handler. Reduce memory access by using more indexed MLAs in polynomial. Speedup on Neoverse V1 for log1p (3.5%), acosh (7.5%) and atanh (10%). (cherry picked from commit ca0c0d0f26fbf75b9cacc65122b457e8fdec40b8) diff --git a/sysdeps/aarch64/fpu/acosh_advsimd.c b/sysdeps/aarch64/fpu/acosh_advsimd.c index c88283cf11..a98f4a2e4d 100644 --- a/sysdeps/aarch64/fpu/acosh_advsimd.c +++ b/sysdeps/aarch64/fpu/acosh_advsimd.c @@ -54,9 +54,8 @@ VPCS_ATTR float64x2_t V_NAME_D1 (acosh) (float64x2_t x) x = vbslq_f64 (special, vreinterpretq_f64_u64 (d->one), x); #endif - float64x2_t xm1 = vsubq_f64 (x, v_f64 (1)); - float64x2_t y; - y = vaddq_f64 (x, v_f64 (1)); + float64x2_t xm1 = vsubq_f64 (x, v_f64 (1.0)); + float64x2_t y = vaddq_f64 (x, v_f64 (1.0)); y = vmulq_f64 (y, xm1); y = vsqrtq_f64 (y); y = vaddq_f64 (xm1, y); diff --git a/sysdeps/aarch64/fpu/atanh_advsimd.c b/sysdeps/aarch64/fpu/atanh_advsimd.c index 3c3d0bd6ad..eb9769aeac 100644 --- a/sysdeps/aarch64/fpu/atanh_advsimd.c +++ b/sysdeps/aarch64/fpu/atanh_advsimd.c @@ -23,15 +23,19 @@ const static struct data { struct v_log1p_data log1p_consts; - uint64x2_t one, half; + uint64x2_t one; + uint64x2_t sign_mask; } data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE, .one = V2 (0x3ff0000000000000), - .half = V2 (0x3fe0000000000000) }; + .sign_mask = V2 (0x8000000000000000) }; static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, uint64x2_t special) +special_case (float64x2_t x, float64x2_t halfsign, float64x2_t y, + uint64x2_t special, const struct data *d) { - return v_call_f64 (atanh, x, y, special); + y = log1p_inline (y, &d->log1p_consts); + return v_call_f64 (atanh, vbslq_f64 (d->sign_mask, halfsign, x), + vmulq_f64 (halfsign, y), special); } /* Approximation for vector double-precision atanh(x) using modified log1p. @@ -43,11 +47,10 @@ float64x2_t V_NAME_D1 (atanh) (float64x2_t x) { const struct data *d = ptr_barrier (&data); + float64x2_t halfsign = vbslq_f64 (d->sign_mask, x, v_f64 (0.5)); float64x2_t ax = vabsq_f64 (x); uint64x2_t ia = vreinterpretq_u64_f64 (ax); - uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia); uint64x2_t special = vcgeq_u64 (ia, d->one); - float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half)); #if WANT_SIMD_EXCEPT ax = v_zerofy_f64 (ax, special); @@ -55,10 +58,15 @@ float64x2_t V_NAME_D1 (atanh) (float64x2_t x) float64x2_t y; y = vaddq_f64 (ax, ax); - y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax)); - y = log1p_inline (y, &d->log1p_consts); + y = vdivq_f64 (y, vsubq_f64 (vreinterpretq_f64_u64 (d->one), ax)); if (__glibc_unlikely (v_any_u64 (special))) - return special_case (x, vmulq_f64 (y, halfsign), special); +#if WANT_SIMD_EXCEPT + return special_case (x, halfsign, y, special, d); +#else + return special_case (ax, halfsign, y, special, d); +#endif + + y = log1p_inline (y, &d->log1p_consts); return vmulq_f64 (y, halfsign); } diff --git a/sysdeps/aarch64/fpu/log1p_advsimd.c b/sysdeps/aarch64/fpu/log1p_advsimd.c index 114064c696..1263587201 100644 --- a/sysdeps/aarch64/fpu/log1p_advsimd.c +++ b/sysdeps/aarch64/fpu/log1p_advsimd.c @@ -17,43 +17,26 @@ License along with the GNU C Library; if not, see . */ -#include "v_math.h" -#include "poly_advsimd_f64.h" +#define WANT_V_LOG1P_K0_SHORTCUT 0 +#include "v_log1p_inline.h" const static struct data { - float64x2_t poly[19], ln2[2]; - uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask, inf, minus_one; - int64x2_t one_top; -} data = { - /* Generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */ - .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2), - V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3), - V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3), - V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4), - V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4), - V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4), - V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4), - V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5), - V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4), - V2 (-0x1.cfa7385bdb37ep-6) }, - .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) }, - /* top32(asuint64(sqrt(2)/2)) << 32. */ - .hf_rt2_top = V2 (0x3fe6a09e00000000), - /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32. */ - .one_m_hf_rt2_top = V2 (0x00095f6200000000), - .umask = V2 (0x000fffff00000000), - .one_top = V2 (0x3ff), - .inf = V2 (0x7ff0000000000000), - .minus_one = V2 (0xbff0000000000000) -}; + struct v_log1p_data d; + uint64x2_t inf, minus_one; +} data = { .d = V_LOG1P_CONSTANTS_TABLE, + .inf = V2 (0x7ff0000000000000), + .minus_one = V2 (0xbff0000000000000) }; #define BottomMask v_u64 (0xffffffff) -static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, uint64x2_t special) +static float64x2_t NOINLINE VPCS_ATTR +special_case (float64x2_t x, uint64x2_t cmp, const struct data *d) { - return v_call_f64 (log1p, x, y, special); + /* Side-step special lanes so fenv exceptions are not triggered + inadvertently. */ + float64x2_t x_nospecial = v_zerofy_f64 (x, cmp); + return v_call_f64 (log1p, x, log1p_inline (x_nospecial, &d->d), cmp); } /* Vector log1p approximation using polynomial on reduced interval. Routine is @@ -66,66 +49,14 @@ VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x) const struct data *d = ptr_barrier (&data); uint64x2_t ix = vreinterpretq_u64_f64 (x); uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x)); - uint64x2_t special = vcgeq_u64 (ia, d->inf); -#if WANT_SIMD_EXCEPT - special = vorrq_u64 (special, - vcgeq_u64 (ix, vreinterpretq_u64_f64 (v_f64 (-1)))); - if (__glibc_unlikely (v_any_u64 (special))) - x = v_zerofy_f64 (x, special); -#else - special = vorrq_u64 (special, vcleq_f64 (x, v_f64 (-1))); -#endif + uint64x2_t special_cases + = vorrq_u64 (vcgeq_u64 (ia, d->inf), vcgeq_u64 (ix, d->minus_one)); - /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f - is in [sqrt(2)/2, sqrt(2)]): - log1p(x) = k*log(2) + log1p(f). + if (__glibc_unlikely (v_any_u64 (special_cases))) + return special_case (x, special_cases, d); - f may not be representable exactly, so we need a correction term: - let m = round(1 + x), c = (1 + x) - m. - c << m: at very small x, log1p(x) ~ x, hence: - log(1+x) - log(m) ~ c/m. - - We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */ - - /* Obtain correctly scaled k by manipulation in the exponent. - The scalar algorithm casts down to 32-bit at this point to calculate k and - u_red. We stay in double-width to obtain f and k, using the same constants - as the scalar algorithm but shifted left by 32. */ - float64x2_t m = vaddq_f64 (x, v_f64 (1)); - uint64x2_t mi = vreinterpretq_u64_f64 (m); - uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top); - - int64x2_t ki - = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top); - float64x2_t k = vcvtq_f64_s64 (ki); - - /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ - uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top); - uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask)); - float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1)); - - /* Correction term c/m. */ - float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m); - - /* Approximate log1p(x) on the reduced input using a polynomial. Because - log1p(0)=0 we choose an approximation of the form: - x + C0*x^2 + C1*x^3 + C2x^4 + ... - Hence approximation has the form f + f^2 * P(f) - where P(x) = C0 + C1*x + C2x^2 + ... - Assembling this all correctly is dealt with at the final step. */ - float64x2_t f2 = vmulq_f64 (f, f); - float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly); - - float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]); - float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]); - float64x2_t y = vaddq_f64 (ylo, yhi); - - if (__glibc_unlikely (v_any_u64 (special))) - return special_case (vreinterpretq_f64_u64 (ix), vfmaq_f64 (y, f2, p), - special); - - return vfmaq_f64 (y, f2, p); + return log1p_inline (x, &d->d); } strong_alias (V_NAME_D1 (log1p), V_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/v_log1p_inline.h b/sysdeps/aarch64/fpu/v_log1p_inline.h index 242e43b6ee..834ff65adf 100644 --- a/sysdeps/aarch64/fpu/v_log1p_inline.h +++ b/sysdeps/aarch64/fpu/v_log1p_inline.h @@ -21,29 +21,30 @@ #define AARCH64_FPU_V_LOG1P_INLINE_H #include "v_math.h" -#include "poly_advsimd_f64.h" struct v_log1p_data { - float64x2_t poly[19], ln2[2]; + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16; uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask; int64x2_t one_top; + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18; + double ln2[2]; }; /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */ #define V_LOG1P_CONSTANTS_TABLE \ { \ - .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2), \ - V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3), \ - V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3), \ - V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4), \ - V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4), \ - V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4), \ - V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4), \ - V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5), \ - V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4), \ - V2 (-0x1.cfa7385bdb37ep-6) }, \ - .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) }, \ + .c0 = V2 (-0x1.ffffffffffffbp-2), .c1 = 0x1.55555555551a9p-2, \ + .c2 = V2 (-0x1.00000000008e3p-2), .c3 = 0x1.9999999a32797p-3, \ + .c4 = V2 (-0x1.555555552fecfp-3), .c5 = 0x1.249248e071e5ap-3, \ + .c6 = V2 (-0x1.ffffff8bf8482p-4), .c7 = 0x1.c71c8f07da57ap-4, \ + .c8 = V2 (-0x1.9999ca4ccb617p-4), .c9 = 0x1.7459ad2e1dfa3p-4, \ + .c10 = V2 (-0x1.554d2680a3ff2p-4), .c11 = 0x1.3b4c54d487455p-4, \ + .c12 = V2 (-0x1.2548a9ffe80e6p-4), .c13 = 0x1.0f389a24b2e07p-4, \ + .c14 = V2 (-0x1.eee4db15db335p-5), .c15 = 0x1.e95b494d4a5ddp-5, \ + .c16 = V2 (-0x1.15fdf07cb7c73p-4), .c17 = 0x1.0310b70800fcfp-4, \ + .c18 = -0x1.cfa7385bdb37ep-6, \ + .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 }, \ .hf_rt2_top = V2 (0x3fe6a09e00000000), \ .one_m_hf_rt2_top = V2 (0x00095f6200000000), \ .umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff) \ @@ -51,19 +52,45 @@ struct v_log1p_data #define BottomMask v_u64 (0xffffffff) +static inline float64x2_t +eval_poly (float64x2_t m, float64x2_t m2, const struct v_log1p_data *d) +{ + /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + float64x2_t c1315 = vld1q_f64 (&d->c13); + float64x2_t c1718 = vld1q_f64 (&d->c17); + float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, m, c1718, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, m, c1315, 1); + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, m, c1315, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, m, c911, 1); + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, m, c911, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, m, c57, 1); + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, m, c57, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, m, c13, 1); + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, m, c13, 0); + float64x2_t p = vfmaq_laneq_f64 (p1617, m2, c1718, 1); + p = vfmaq_f64 (p1415, m2, p); + p = vfmaq_f64 (p1213, m2, p); + p = vfmaq_f64 (p1011, m2, p); + p = vfmaq_f64 (p89, m2, p); + p = vfmaq_f64 (p67, m2, p); + p = vfmaq_f64 (p45, m2, p); + p = vfmaq_f64 (p23, m2, p); + return vfmaq_f64 (p01, m2, p); +} + static inline float64x2_t log1p_inline (float64x2_t x, const struct v_log1p_data *d) { - /* Helper for calculating log(x + 1). Copied from v_log1p_2u5.c, with several - modifications: + /* Helper for calculating log(x + 1): - No special-case handling - this should be dealt with by the caller. - - Pairwise Horner polynomial evaluation for improved accuracy. - Optionally simulate the shortcut for k=0, used in the scalar routine, - using v_sel, for improved accuracy when the argument to log1p is close to - 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 in - the source of the caller before including this file. - See v_log1pf_2u1.c for details of the algorithm. */ - float64x2_t m = vaddq_f64 (x, v_f64 (1)); + using v_sel, for improved accuracy when the argument to log1p is close + to 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 + in the source of the caller before including this file. */ + float64x2_t m = vaddq_f64 (x, v_f64 (1.0)); uint64x2_t mi = vreinterpretq_u64_f64 (m); uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top); @@ -74,14 +101,14 @@ log1p_inline (float64x2_t x, const struct v_log1p_data *d) /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top); uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask)); - float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1)); + float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1.0)); /* Correction term c/m. */ - float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m); + float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1.0))), m); #ifndef WANT_V_LOG1P_K0_SHORTCUT -#error \ - "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0" +# error \ + "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0" #elif WANT_V_LOG1P_K0_SHORTCUT /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is that the approximation is solely the polynomial. */ @@ -92,11 +119,12 @@ log1p_inline (float64x2_t x, const struct v_log1p_data *d) /* Approximate log1p(f) on the reduced input using a polynomial. */ float64x2_t f2 = vmulq_f64 (f, f); - float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly); + float64x2_t p = eval_poly (f, f2, d); /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */ - float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]); - float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]); + float64x2_t ln2 = vld1q_f64 (&d->ln2[0]); + float64x2_t ylo = vfmaq_laneq_f64 (cm, k, ln2, 1); + float64x2_t yhi = vfmaq_laneq_f64 (f, k, ln2, 0); return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p); } commit 9170b921fa49d2ef37141506837baaae92c7d3f8 Author: Joana Cruz Date: Tue Dec 17 14:47:31 2024 +0000 AArch64: Improve codegen of AdvSIMD logf function family Load the polynomial evaluation coefficients into 2 vectors and use lanewise MLAs. 8% improvement in throughput microbenchmark on Neoverse V1 for log2 and log, and 2% for log10. Reviewed-by: Wilco Dijkstra (cherry picked from commit d6e034f5b222a9ed1aeb5de0c0c7d0dda8b63da3) diff --git a/sysdeps/aarch64/fpu/log10f_advsimd.c b/sysdeps/aarch64/fpu/log10f_advsimd.c index 82228b599a..0d792c3df9 100644 --- a/sysdeps/aarch64/fpu/log10f_advsimd.c +++ b/sysdeps/aarch64/fpu/log10f_advsimd.c @@ -18,21 +18,25 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" static const struct data { + float32x4_t c0, c2, c4, c6, inv_ln10, ln2; uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; uint32x4_t mantissa_mask; - float32x4_t poly[8]; - float32x4_t inv_ln10, ln2; + float c1, c3, c5, c7; } data = { /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25. */ - .poly = { V4 (-0x1.bcb79cp-3f), V4 (0x1.2879c8p-3f), V4 (-0x1.bcd472p-4f), - V4 (0x1.6408f8p-4f), V4 (-0x1.246f8p-4f), V4 (0x1.f0e514p-5f), - V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) }, + .c0 = V4 (-0x1.bcb79cp-3f), + .c1 = 0x1.2879c8p-3f, + .c2 = V4 (-0x1.bcd472p-4f), + .c3 = 0x1.6408f8p-4f, + .c4 = V4 (-0x1.246f8p-4f), + .c5 = 0x1.f0e514p-5f, + .c6 = V4 (-0x1.0fc92cp-4f), + .c7 = 0x1.f5f76ap-5f, .ln2 = V4 (0x1.62e43p-1f), .inv_ln10 = V4 (0x1.bcb7b2p-2f), /* Lower bound is the smallest positive normal float 0x00800000. For @@ -62,7 +66,7 @@ special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - + float32x4_t c1357 = vld1q_f32 (&d->c1); /* To avoid having to mov x out of the way, keep u after offset has been applied, and recover x by adding the offset back in the special-case handler. */ @@ -81,7 +85,16 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) /* y = log10(1+r) + n * log10(2). */ float32x4_t r2 = vmulq_f32 (r, r); - float32x4_t poly = v_pw_horner_7_f32 (r, r2, d->poly); + + float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0); + float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1); + float32x4_t c45 = vfmaq_laneq_f32 (d->c4, r, c1357, 2); + float32x4_t c67 = vfmaq_laneq_f32 (d->c6, r, c1357, 3); + + float32x4_t p47 = vfmaq_f32 (c45, r2, c67); + float32x4_t p27 = vfmaq_f32 (c23, r2, p47); + float32x4_t poly = vfmaq_f32 (c01, r2, p27); + /* y = Log10(2) * n + poly * InvLn(10). */ float32x4_t y = vfmaq_f32 (r, d->ln2, n); y = vmulq_f32 (y, d->inv_ln10); diff --git a/sysdeps/aarch64/fpu/log2f_advsimd.c b/sysdeps/aarch64/fpu/log2f_advsimd.c index 84effe4fe9..116c36c8e2 100644 --- a/sysdeps/aarch64/fpu/log2f_advsimd.c +++ b/sysdeps/aarch64/fpu/log2f_advsimd.c @@ -18,22 +18,27 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" static const struct data { + float32x4_t c0, c2, c4, c6, c8; uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; uint32x4_t mantissa_mask; - float32x4_t poly[9]; + float c1, c3, c5, c7; } data = { /* Coefficients generated using Remez algorithm approximate log2(1+r)/r for r in [ -1/3, 1/3 ]. rel error: 0x1.c4c4b0cp-26. */ - .poly = { V4 (0x1.715476p0f), /* (float)(1 / ln(2)). */ - V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f), - V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f), - V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) }, + .c0 = V4 (0x1.715476p0f), /* (float)(1 / ln(2)). */ + .c1 = -0x1.715458p-1f, + .c2 = V4 (0x1.ec701cp-2f), + .c3 = -0x1.7171a4p-2f, + .c4 = V4 (0x1.27a0b8p-2f), + .c5 = -0x1.e5143ep-3f, + .c6 = V4 (0x1.9d8ecap-3f), + .c7 = -0x1.c675bp-3f, + .c8 = V4 (0x1.9e495p-3f), /* Lower bound is the smallest positive normal float 0x00800000. For optimised register use subnormals are detected after offset has been subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ @@ -79,11 +84,21 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) /* y = log2(1+r) + n. */ float32x4_t r2 = vmulq_f32 (r, r); - float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly); + + float32x4_t c1357 = vld1q_f32 (&d->c1); + float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0); + float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1); + float32x4_t c45 = vfmaq_laneq_f32 (d->c4, r, c1357, 2); + float32x4_t c67 = vfmaq_laneq_f32 (d->c6, r, c1357, 3); + float32x4_t p68 = vfmaq_f32 (c67, r2, d->c8); + float32x4_t p48 = vfmaq_f32 (c45, r2, p68); + float32x4_t p28 = vfmaq_f32 (c23, r2, p48); + float32x4_t p = vfmaq_f32 (c01, r2, p28); if (__glibc_unlikely (v_any_u16h (special))) return special_case (n, u_off, p, r, special, d); return vfmaq_f32 (n, p, r); } + libmvec_hidden_def (V_NAME_F1 (log2)) HALF_WIDTH_ALIAS_F1 (log2) diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c index c20dbfd6c0..d9e64c732d 100644 --- a/sysdeps/aarch64/fpu/logf_advsimd.c +++ b/sysdeps/aarch64/fpu/logf_advsimd.c @@ -21,16 +21,19 @@ static const struct data { - uint32x4_t off, offset_lower_bound; + float32x4_t c2, c4, c6, ln2; + uint32x4_t off, offset_lower_bound, mantissa_mask; uint16x8_t special_bound; - uint32x4_t mantissa_mask; - float32x4_t poly[7]; - float32x4_t ln2; + float c1, c3, c5, c0; } data = { /* 3.34 ulp error. */ - .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f), - V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f), - V4 (-0x1.ffffc8p-2f) }, + .c0 = -0x1.3e737cp-3f, + .c1 = 0x1.5a9aa2p-3f, + .c2 = V4 (-0x1.4f9934p-3f), + .c3 = 0x1.961348p-3f, + .c4 = V4 (-0x1.00187cp-2f), + .c5 = 0x1.555d7cp-2f, + .c6 = V4 (-0x1.ffffc8p-2f), .ln2 = V4 (0x1.62e43p-1f), /* Lower bound is the smallest positive normal float 0x00800000. For optimised register use subnormals are detected after offset has been @@ -41,8 +44,6 @@ static const struct data .mantissa_mask = V4 (0x007fffff) }; -#define P(i) d->poly[7 - i] - static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2, uint16x4_t cmp, const struct data *d) @@ -55,33 +56,30 @@ special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, p, q, r, r2, y; - uint32x4_t u, u_off; - uint16x4_t cmp; + float32x4_t c1350 = vld1q_f32 (&d->c1); /* To avoid having to mov x out of the way, keep u after offset has been applied, and recover x by adding the offset back in the special-case handler. */ - u_off = vreinterpretq_u32_f32 (x); + uint32x4_t u_off = vsubq_u32 (vreinterpretq_u32_f32 (x), d->off); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u_off = vsubq_u32 (u_off, d->off); - n = vcvtq_f32_s32 ( + float32x4_t n = vcvtq_f32_s32 ( vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ - u = vandq_u32 (u_off, d->mantissa_mask); - u = vaddq_u32 (u, d->off); - r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); + uint16x4_t cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); - cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), - vget_low_u16 (d->special_bound)); + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); + float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log(1+r) + n*ln2. */ - r2 = vmulq_f32 (r, r); + float32x4_t r2 = vmulq_f32 (r, r); /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))). */ - p = vfmaq_f32 (P (5), P (6), r); - q = vfmaq_f32 (P (3), P (4), r); - y = vfmaq_f32 (P (1), P (2), r); - p = vfmaq_f32 (p, P (7), r2); + float32x4_t p = vfmaq_laneq_f32 (d->c2, r, c1350, 0); + float32x4_t q = vfmaq_laneq_f32 (d->c4, r, c1350, 1); + float32x4_t y = vfmaq_laneq_f32 (d->c6, r, c1350, 2); + p = vfmaq_laneq_f32 (p, r2, c1350, 3); + q = vfmaq_f32 (q, p, r2); y = vfmaq_f32 (y, q, r2); p = vfmaq_f32 (r, d->ln2, n); commit 41dc9e7c2d80bc5e886950b8a7bd21f77c9793b3 Author: Joana Cruz Date: Tue Dec 17 14:49:30 2024 +0000 AArch64: Improve codegen of AdvSIMD atan(2)(f) Load the polynomial evaluation coefficients into 2 vectors and use lanewise MLAs. 8% improvement in throughput microbenchmark on Neoverse V1. Reviewed-by: Wilco Dijkstra (cherry picked from commit 6914774b9d3460876d9ad4482782213ec01a752e) diff --git a/sysdeps/aarch64/fpu/atan2_advsimd.c b/sysdeps/aarch64/fpu/atan2_advsimd.c index b1e7a9b8fc..1a8f02109f 100644 --- a/sysdeps/aarch64/fpu/atan2_advsimd.c +++ b/sysdeps/aarch64/fpu/atan2_advsimd.c @@ -23,40 +23,57 @@ static const struct data { + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; float64x2_t pi_over_2; - float64x2_t poly[20]; + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; + uint64x2_t zeroinfnan, minustwo; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on - the interval [2**-1022, 1.0]. */ - .poly = { V2 (-0x1.5555555555555p-2), V2 (0x1.99999999996c1p-3), - V2 (-0x1.2492492478f88p-3), V2 (0x1.c71c71bc3951cp-4), - V2 (-0x1.745d160a7e368p-4), V2 (0x1.3b139b6a88ba1p-4), - V2 (-0x1.11100ee084227p-4), V2 (0x1.e1d0f9696f63bp-5), - V2 (-0x1.aebfe7b418581p-5), V2 (0x1.842dbe9b0d916p-5), - V2 (-0x1.5d30140ae5e99p-5), V2 (0x1.338e31eb2fbbcp-5), - V2 (-0x1.00e6eece7de8p-5), V2 (0x1.860897b29e5efp-6), - V2 (-0x1.0051381722a59p-6), V2 (0x1.14e9dc19a4a4ep-7), - V2 (-0x1.d0062b42fe3bfp-9), V2 (0x1.17739e210171ap-10), - V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), }, + [2**-1022, 1.0]. */ + .c0 = V2 (-0x1.5555555555555p-2), + .c1 = 0x1.99999999996c1p-3, + .c2 = V2 (-0x1.2492492478f88p-3), + .c3 = 0x1.c71c71bc3951cp-4, + .c4 = V2 (-0x1.745d160a7e368p-4), + .c5 = 0x1.3b139b6a88ba1p-4, + .c6 = V2 (-0x1.11100ee084227p-4), + .c7 = 0x1.e1d0f9696f63bp-5, + .c8 = V2 (-0x1.aebfe7b418581p-5), + .c9 = 0x1.842dbe9b0d916p-5, + .c10 = V2 (-0x1.5d30140ae5e99p-5), + .c11 = 0x1.338e31eb2fbbcp-5, + .c12 = V2 (-0x1.00e6eece7de8p-5), + .c13 = 0x1.860897b29e5efp-6, + .c14 = V2 (-0x1.0051381722a59p-6), + .c15 = 0x1.14e9dc19a4a4ep-7, + .c16 = V2 (-0x1.d0062b42fe3bfp-9), + .c17 = 0x1.17739e210171ap-10, + .c18 = V2 (-0x1.ab24da7be7402p-13), + .c19 = 0x1.358851160a528p-16, .pi_over_2 = V2 (0x1.921fb54442d18p+0), + .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1), + .minustwo = V2 (0xc000000000000000), }; #define SignMask v_u64 (0x8000000000000000) /* Special cases i.e. 0, infinity, NaN (fall back to scalar calls). */ static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t y, float64x2_t x, float64x2_t ret, uint64x2_t cmp) +special_case (float64x2_t y, float64x2_t x, float64x2_t ret, + uint64x2_t sign_xy, uint64x2_t cmp) { + /* Account for the sign of x and y. */ + ret = vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); return v_call2_f64 (atan2, y, x, ret, cmp); } /* Returns 1 if input is the bit representation of 0, infinity or nan. */ static inline uint64x2_t -zeroinfnan (uint64x2_t i) +zeroinfnan (uint64x2_t i, const struct data *d) { /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1). */ - return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), - v_u64 (2 * asuint64 (INFINITY) - 1)); + return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), d->zeroinfnan); } /* Fast implementation of vector atan2. @@ -66,12 +83,13 @@ zeroinfnan (uint64x2_t i) want 0x1.92d628ab678cfp-1. */ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) { - const struct data *data_ptr = ptr_barrier (&data); + const struct data *d = ptr_barrier (&data); uint64x2_t ix = vreinterpretq_u64_f64 (x); uint64x2_t iy = vreinterpretq_u64_f64 (y); - uint64x2_t special_cases = vorrq_u64 (zeroinfnan (ix), zeroinfnan (iy)); + uint64x2_t special_cases + = vorrq_u64 (zeroinfnan (ix, d), zeroinfnan (iy, d)); uint64x2_t sign_x = vandq_u64 (ix, SignMask); uint64x2_t sign_y = vandq_u64 (iy, SignMask); @@ -81,18 +99,18 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) float64x2_t ay = vabsq_f64 (y); uint64x2_t pred_xlt0 = vcltzq_f64 (x); - uint64x2_t pred_aygtax = vcgtq_f64 (ay, ax); + uint64x2_t pred_aygtax = vcagtq_f64 (y, x); /* Set up z for call to atan. */ float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); - float64x2_t d = vbslq_f64 (pred_aygtax, ay, ax); - float64x2_t z = vdivq_f64 (n, d); + float64x2_t q = vbslq_f64 (pred_aygtax, ay, ax); + float64x2_t z = vdivq_f64 (n, q); /* Work out the correct shift. */ - float64x2_t shift = vreinterpretq_f64_u64 ( - vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0)))); + float64x2_t shift + = vreinterpretq_f64_u64 (vandq_u64 (pred_xlt0, d->minustwo)); shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift); - shift = vmulq_f64 (shift, data_ptr->pi_over_2); + shift = vmulq_f64 (shift, d->pi_over_2); /* Calculate the polynomial approximation. Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of @@ -103,20 +121,52 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) float64x2_t x2 = vmulq_f64 (z2, z2); float64x2_t x4 = vmulq_f64 (x2, x2); float64x2_t x8 = vmulq_f64 (x4, x4); - float64x2_t ret - = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, data_ptr->poly), - v_estrin_11_f64 (z2, x2, x4, x8, data_ptr->poly + 8), x8); + + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + float64x2_t c1315 = vld1q_f64 (&d->c13); + float64x2_t c1719 = vld1q_f64 (&d->c17); + + /* estrin_7. */ + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, x2, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, x2, p67); + + float64x2_t p07 = vfmaq_f64 (p03, x4, p47); + + /* estrin_11. */ + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, x2, p1011); + + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1); + float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415); + + float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0); + float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1); + float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819); + + float64x2_t p815 = vfmaq_f64 (p811, x4, p1215); + float64x2_t p819 = vfmaq_f64 (p815, x8, p1619); + + float64x2_t ret = vfmaq_f64 (p07, p819, x8); /* Finalize. y = shift + z + z^3 * P(z^2). */ ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z)); ret = vaddq_f64 (ret, shift); + if (__glibc_unlikely (v_any_u64 (special_cases))) + return special_case (y, x, ret, sign_xy, special_cases); + /* Account for the sign of x and y. */ ret = vreinterpretq_f64_u64 ( veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); - if (__glibc_unlikely (v_any_u64 (special_cases))) - return special_case (y, x, ret, special_cases); - return ret; } diff --git a/sysdeps/aarch64/fpu/atan2f_advsimd.c b/sysdeps/aarch64/fpu/atan2f_advsimd.c index 56e610caf1..88daacd76c 100644 --- a/sysdeps/aarch64/fpu/atan2f_advsimd.c +++ b/sysdeps/aarch64/fpu/atan2f_advsimd.c @@ -22,34 +22,39 @@ static const struct data { - float32x4_t poly[8]; - float32x4_t pi_over_2; + float32x4_t c0, pi_over_2, c4, c6, c2; + float c1, c3, c5, c7; + uint32x4_t comp_const; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. Generated using fpminimax between FLT_MIN and 1. */ - .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f), - V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f), - V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) }, - .pi_over_2 = V4 (0x1.921fb6p+0f), + .c0 = V4 (-0x1.55555p-2f), .c1 = 0x1.99935ep-3f, + .c2 = V4 (-0x1.24051ep-3f), .c3 = 0x1.bd7368p-4f, + .c4 = V4 (-0x1.491f0ep-4f), .c5 = 0x1.93a2c0p-5f, + .c6 = V4 (-0x1.4c3c60p-6f), .c7 = 0x1.01fd88p-8f, + .pi_over_2 = V4 (0x1.921fb6p+0f), .comp_const = V4 (2 * 0x7f800000lu - 1), }; #define SignMask v_u32 (0x80000000) /* Special cases i.e. 0, infinity and nan (fall back to scalar calls). */ static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t y, float32x4_t x, float32x4_t ret, uint32x4_t cmp) +special_case (float32x4_t y, float32x4_t x, float32x4_t ret, + uint32x4_t sign_xy, uint32x4_t cmp) { + /* Account for the sign of y. */ + ret = vreinterpretq_f32_u32 ( + veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy)); return v_call2_f32 (atan2f, y, x, ret, cmp); } /* Returns 1 if input is the bit representation of 0, infinity or nan. */ static inline uint32x4_t -zeroinfnan (uint32x4_t i) +zeroinfnan (uint32x4_t i, const struct data *d) { /* 2 * i - 1 >= 2 * 0x7f800000lu - 1. */ - return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), - v_u32 (2 * 0x7f800000lu - 1)); + return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), d->comp_const); } /* Fast implementation of vector atan2f. Maximum observed error is @@ -58,12 +63,13 @@ zeroinfnan (uint32x4_t i) want 0x1.967f00p-1. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) { - const struct data *data_ptr = ptr_barrier (&data); + const struct data *d = ptr_barrier (&data); uint32x4_t ix = vreinterpretq_u32_f32 (x); uint32x4_t iy = vreinterpretq_u32_f32 (y); - uint32x4_t special_cases = vorrq_u32 (zeroinfnan (ix), zeroinfnan (iy)); + uint32x4_t special_cases + = vorrq_u32 (zeroinfnan (ix, d), zeroinfnan (iy, d)); uint32x4_t sign_x = vandq_u32 (ix, SignMask); uint32x4_t sign_y = vandq_u32 (iy, SignMask); @@ -77,14 +83,14 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) /* Set up z for call to atanf. */ float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); - float32x4_t d = vbslq_f32 (pred_aygtax, ay, ax); - float32x4_t z = vdivq_f32 (n, d); + float32x4_t q = vbslq_f32 (pred_aygtax, ay, ax); + float32x4_t z = vdivq_f32 (n, q); /* Work out the correct shift. */ float32x4_t shift = vreinterpretq_f32_u32 ( vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f)))); shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift); - shift = vmulq_f32 (shift, data_ptr->pi_over_2); + shift = vmulq_f32 (shift, d->pi_over_2); /* Calculate the polynomial approximation. Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, @@ -96,23 +102,27 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) float32x4_t z2 = vmulq_f32 (z, z); float32x4_t z4 = vmulq_f32 (z2, z2); - float32x4_t ret = vfmaq_f32 ( - v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly), z4, - vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly + 4))); + float32x4_t c1357 = vld1q_f32 (&d->c1); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c1357, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c1357, 1); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c1357, 2); + float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, c1357, 3); + float32x4_t p03 = vfmaq_f32 (p01, z4, p23); + float32x4_t p47 = vfmaq_f32 (p45, z4, p67); + + float32x4_t ret = vfmaq_f32 (p03, z4, vmulq_f32 (z4, p47)); /* y = shift + z * P(z^2). */ ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift); - /* Account for the sign of y. */ - ret = vreinterpretq_f32_u32 ( - veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy)); - if (__glibc_unlikely (v_any_u32 (special_cases))) { - return special_case (y, x, ret, special_cases); + return special_case (y, x, ret, sign_xy, special_cases); } - return ret; + /* Account for the sign of y. */ + return vreinterpretq_f32_u32 ( + veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy)); } libmvec_hidden_def (V_NAME_F2 (atan2)) HALF_WIDTH_ALIAS_F2(atan2) diff --git a/sysdeps/aarch64/fpu/atan_advsimd.c b/sysdeps/aarch64/fpu/atan_advsimd.c index a962be0f78..14f1809796 100644 --- a/sysdeps/aarch64/fpu/atan_advsimd.c +++ b/sysdeps/aarch64/fpu/atan_advsimd.c @@ -22,21 +22,22 @@ static const struct data { + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; float64x2_t pi_over_2; - float64x2_t poly[20]; + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-1022, 1.0]. */ - .poly = { V2 (-0x1.5555555555555p-2), V2 (0x1.99999999996c1p-3), - V2 (-0x1.2492492478f88p-3), V2 (0x1.c71c71bc3951cp-4), - V2 (-0x1.745d160a7e368p-4), V2 (0x1.3b139b6a88ba1p-4), - V2 (-0x1.11100ee084227p-4), V2 (0x1.e1d0f9696f63bp-5), - V2 (-0x1.aebfe7b418581p-5), V2 (0x1.842dbe9b0d916p-5), - V2 (-0x1.5d30140ae5e99p-5), V2 (0x1.338e31eb2fbbcp-5), - V2 (-0x1.00e6eece7de8p-5), V2 (0x1.860897b29e5efp-6), - V2 (-0x1.0051381722a59p-6), V2 (0x1.14e9dc19a4a4ep-7), - V2 (-0x1.d0062b42fe3bfp-9), V2 (0x1.17739e210171ap-10), - V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), }, + .c0 = V2 (-0x1.5555555555555p-2), .c1 = 0x1.99999999996c1p-3, + .c2 = V2 (-0x1.2492492478f88p-3), .c3 = 0x1.c71c71bc3951cp-4, + .c4 = V2 (-0x1.745d160a7e368p-4), .c5 = 0x1.3b139b6a88ba1p-4, + .c6 = V2 (-0x1.11100ee084227p-4), .c7 = 0x1.e1d0f9696f63bp-5, + .c8 = V2 (-0x1.aebfe7b418581p-5), .c9 = 0x1.842dbe9b0d916p-5, + .c10 = V2 (-0x1.5d30140ae5e99p-5), .c11 = 0x1.338e31eb2fbbcp-5, + .c12 = V2 (-0x1.00e6eece7de8p-5), .c13 = 0x1.860897b29e5efp-6, + .c14 = V2 (-0x1.0051381722a59p-6), .c15 = 0x1.14e9dc19a4a4ep-7, + .c16 = V2 (-0x1.d0062b42fe3bfp-9), .c17 = 0x1.17739e210171ap-10, + .c18 = V2 (-0x1.ab24da7be7402p-13), .c19 = 0x1.358851160a528p-16, .pi_over_2 = V2 (0x1.921fb54442d18p+0), }; @@ -52,6 +53,11 @@ static const struct data float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) { const struct data *d = ptr_barrier (&data); + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + float64x2_t c1315 = vld1q_f64 (&d->c13); + float64x2_t c1719 = vld1q_f64 (&d->c17); /* Small cases, infs and nans are supported by our approximation technique, but do not set fenv flags correctly. Only trigger special case if we need @@ -90,9 +96,35 @@ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) float64x2_t x2 = vmulq_f64 (z2, z2); float64x2_t x4 = vmulq_f64 (x2, x2); float64x2_t x8 = vmulq_f64 (x4, x4); - float64x2_t y - = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, d->poly), - v_estrin_11_f64 (z2, x2, x4, x8, d->poly + 8), x8); + + /* estrin_7. */ + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, x2, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, x2, p67); + + float64x2_t p07 = vfmaq_f64 (p03, x4, p47); + + /* estrin_11. */ + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, x2, p1011); + + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1); + float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415); + + float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0); + float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1); + float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819); + + float64x2_t p815 = vfmaq_f64 (p811, x4, p1215); + float64x2_t p819 = vfmaq_f64 (p815, x8, p1619); + + float64x2_t y = vfmaq_f64 (p07, p819, x8); /* Finalize. y = shift + z + z^3 * P(z^2). */ y = vfmaq_f64 (az, y, vmulq_f64 (z2, az)); commit bf2b60a56036c951a798845223a2e04cc48507e4 Author: Joana Cruz Date: Tue Dec 17 14:50:33 2024 +0000 AArch64: Improve codegen of AdvSIMD expf family Load the polynomial evaluation coefficients into 2 vectors and use lanewise MLAs. Also use intrinsics instead of native operations. expf: 3% improvement in throughput microbenchmark on Neoverse V1, exp2f: 5%, exp10f: 13%, coshf: 14%. Reviewed-by: Wilco Dijkstra (cherry picked from commit cff9648d0b50d19cdaf685f6767add040d4e1a8e) diff --git a/sysdeps/aarch64/fpu/coshf_advsimd.c b/sysdeps/aarch64/fpu/coshf_advsimd.c index c1ab4923b8..cd5c866521 100644 --- a/sysdeps/aarch64/fpu/coshf_advsimd.c +++ b/sysdeps/aarch64/fpu/coshf_advsimd.c @@ -23,19 +23,27 @@ static const struct data { struct v_expf_data expf_consts; - uint32x4_t tiny_bound, special_bound; + uint32x4_t tiny_bound; + float32x4_t bound; +#if WANT_SIMD_EXCEPT + uint32x4_t special_bound; +#endif } data = { .expf_consts = V_EXPF_DATA, .tiny_bound = V4 (0x20000000), /* 0x1p-63: Round to 1 below this. */ /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */ + .bound = V4 (0x1.5a92d8p+6), +#if WANT_SIMD_EXCEPT .special_bound = V4 (0x42ad496c), +#endif }; #if !WANT_SIMD_EXCEPT static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t half_t, float32x4_t half_over_t, + uint32x4_t special) { - return v_call_f32 (coshf, x, y, special); + return v_call_f32 (coshf, x, vaddq_f32 (half_t, half_over_t), special); } #endif @@ -47,14 +55,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t ax = vabsq_f32 (x); - uint32x4_t iax = vreinterpretq_u32_f32 (ax); - uint32x4_t special = vcgeq_u32 (iax, d->special_bound); - #if WANT_SIMD_EXCEPT /* If fp exceptions are to be triggered correctly, fall back to the scalar variant for all inputs if any input is a special value or above the bound at which expf overflows. */ + float32x4_t ax = vabsq_f32 (x); + uint32x4_t iax = vreinterpretq_u32_f32 (ax); + uint32x4_t special = vcgeq_u32 (iax, d->special_bound); if (__glibc_unlikely (v_any_u32 (special))) return v_call_f32 (coshf, x, x, v_u32 (-1)); @@ -63,10 +70,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x) input to 0, which will generate no exceptions. */ if (__glibc_unlikely (v_any_u32 (tiny))) ax = v_zerofy_f32 (ax, tiny); + float32x4_t t = v_expf_inline (ax, &d->expf_consts); +#else + uint32x4_t special = vcageq_f32 (x, d->bound); + float32x4_t t = v_expf_inline (x, &d->expf_consts); #endif /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */ - float32x4_t t = v_expf_inline (ax, &d->expf_consts); float32x4_t half_t = vmulq_n_f32 (t, 0.5); float32x4_t half_over_t = vdivq_f32 (v_f32 (0.5), t); @@ -75,7 +85,7 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x) return vbslq_f32 (tiny, v_f32 (1), vaddq_f32 (half_t, half_over_t)); #else if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vaddq_f32 (half_t, half_over_t), special); + return special_case (x, half_t, half_over_t, special); #endif return vaddq_f32 (half_t, half_over_t); diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c index cf53e73290..55d9cd83f2 100644 --- a/sysdeps/aarch64/fpu/exp10f_advsimd.c +++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c @@ -18,16 +18,15 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" #define ScaleBound 192.0f static const struct data { - float32x4_t poly[5]; - float log10_2_and_inv[4]; - float32x4_t shift; - + float32x4_t c0, c1, c3; + float log10_2_high, log10_2_low, c2, c4; + float32x4_t inv_log10_2, special_bound; + uint32x4_t exponent_bias, special_offset, special_bias; #if !WANT_SIMD_EXCEPT float32x4_t scale_thresh; #endif @@ -37,19 +36,24 @@ static const struct data rel error: 0x1.89dafa3p-24 abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2] maxerr: 1.85943 +0.5 ulp. */ - .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f), - V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) }, - .shift = V4 (0x1.8p23f), - - /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */ - .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 }, + .c0 = V4 (0x1.26bb16p+1f), + .c1 = V4 (0x1.5350d2p+1f), + .c2 = 0x1.04744ap+1f, + .c3 = V4 (0x1.2d8176p+0f), + .c4 = 0x1.12b41ap-1f, + .inv_log10_2 = V4 (0x1.a934fp+1), + .log10_2_high = 0x1.344136p-2, + .log10_2_low = 0x1.ec10cp-27, + /* rint (log2 (2^127 / (1 + sqrt (2)))). */ + .special_bound = V4 (126.0f), + .exponent_bias = V4 (0x3f800000), + .special_offset = V4 (0x82000000), + .special_bias = V4 (0x7f000000), #if !WANT_SIMD_EXCEPT .scale_thresh = V4 (ScaleBound) #endif }; -#define ExponentBias v_u32 (0x3f800000) - #if WANT_SIMD_EXCEPT # define SpecialBound 38.0f /* rint(log10(2^127)). */ @@ -67,17 +71,15 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) #else -# define SpecialBound 126.0f /* rint (log2 (2^127 / (1 + sqrt (2)))). */ -# define SpecialOffset v_u32 (0x82000000) -# define SpecialBias v_u32 (0x7f000000) +# define SpecialBound 126.0f static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t scale, const struct data *d) { /* 2^n may overflow, break it up into s1*s2. */ - uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset); - float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias)); + uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset); + float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh); float32x4_t r2 = vmulq_f32 (s1, s1); @@ -112,23 +114,23 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x) /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)), with poly(r) in [1/sqrt(2), sqrt(2)] and x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */ - float32x4_t log10_2_and_inv = vld1q_f32 (d->log10_2_and_inv); - float32x4_t z = vfmaq_laneq_f32 (d->shift, x, log10_2_and_inv, 0); - float32x4_t n = vsubq_f32 (z, d->shift); - float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_and_inv, 1); - r = vfmsq_laneq_f32 (r, n, log10_2_and_inv, 2); - uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); + float32x4_t log10_2_c24 = vld1q_f32 (&d->log10_2_high); + float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_log10_2)); + float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_c24, 0); + r = vfmaq_laneq_f32 (r, n, log10_2_c24, 1); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (n)), 23); - float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias)); + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT - uint32x4_t cmp = vcagtq_f32 (n, v_f32 (SpecialBound)); + uint32x4_t cmp = vcagtq_f32 (n, d->special_bound); #endif float32x4_t r2 = vmulq_f32 (r, r); - float32x4_t poly - = vfmaq_f32 (vmulq_f32 (r, d->poly[0]), - v_pairwise_poly_3_f32 (r, r2, d->poly + 1), r2); + float32x4_t p12 = vfmaq_laneq_f32 (d->c1, r, log10_2_c24, 2); + float32x4_t p34 = vfmaq_laneq_f32 (d->c3, r, log10_2_c24, 3); + float32x4_t p14 = vfmaq_f32 (p12, r2, p34); + float32x4_t poly = vfmaq_f32 (vmulq_f32 (r, d->c0), p14, r2); if (__glibc_unlikely (v_any_u32 (cmp))) #if WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/exp2f_advsimd.c b/sysdeps/aarch64/fpu/exp2f_advsimd.c index 69e0b193a1..a4220da63c 100644 --- a/sysdeps/aarch64/fpu/exp2f_advsimd.c +++ b/sysdeps/aarch64/fpu/exp2f_advsimd.c @@ -21,24 +21,28 @@ static const struct data { - float32x4_t poly[5]; - uint32x4_t exponent_bias; + float32x4_t c1, c3; + uint32x4_t exponent_bias, special_offset, special_bias; #if !WANT_SIMD_EXCEPT - float32x4_t special_bound, scale_thresh; + float32x4_t scale_thresh, special_bound; #endif + float c0, c2, c4, zero; } data = { /* maxerr: 1.962 ulp. */ - .poly = { V4 (0x1.59977ap-10f), V4 (0x1.3ce9e4p-7f), V4 (0x1.c6bd32p-5f), - V4 (0x1.ebf9bcp-3f), V4 (0x1.62e422p-1f) }, + .c0 = 0x1.59977ap-10f, + .c1 = V4 (0x1.3ce9e4p-7f), + .c2 = 0x1.c6bd32p-5f, + .c3 = V4 (0x1.ebf9bcp-3f), + .c4 = 0x1.62e422p-1f, .exponent_bias = V4 (0x3f800000), + .special_offset = V4 (0x82000000), + .special_bias = V4 (0x7f000000), #if !WANT_SIMD_EXCEPT .special_bound = V4 (126.0f), .scale_thresh = V4 (192.0f), #endif }; -#define C(i) d->poly[i] - #if WANT_SIMD_EXCEPT # define TinyBound v_u32 (0x20000000) /* asuint (0x1p-63). */ @@ -55,16 +59,13 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) #else -# define SpecialOffset v_u32 (0x82000000) -# define SpecialBias v_u32 (0x7f000000) - static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t scale, const struct data *d) { /* 2^n may overflow, break it up into s1*s2. */ - uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset); - float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias)); + uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset); + float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh); float32x4_t r2 = vmulq_f32 (s1, s1); @@ -80,13 +81,11 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp2) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, r, r2, scale, p, q, poly; - uint32x4_t cmp, e; #if WANT_SIMD_EXCEPT /* asuint(|x|) - TinyBound >= BigBound - TinyBound. */ uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x)); - cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound); + uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound); float32x4_t xm = x; /* If any lanes are special, mask them with 1 and retain a copy of x to allow special_case to fix special lanes later. This is only necessary if fenv @@ -95,23 +94,24 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp2) (float32x4_t x) x = vbslq_f32 (cmp, v_f32 (1), x); #endif - /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = n + r, with r in [-1/2, 1/2]. */ - n = vrndaq_f32 (x); - r = vsubq_f32 (x, n); - e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23); - scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); + /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] + x = n + r, with r in [-1/2, 1/2]. */ + float32x4_t n = vrndaq_f32 (x); + float32x4_t r = vsubq_f32 (x, n); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23); + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT - cmp = vcagtq_f32 (n, d->special_bound); + uint32x4_t cmp = vcagtq_f32 (n, d->special_bound); #endif - r2 = vmulq_f32 (r, r); - p = vfmaq_f32 (C (1), C (0), r); - q = vfmaq_f32 (C (3), C (2), r); + float32x4_t c024 = vld1q_f32 (&d->c0); + float32x4_t r2 = vmulq_f32 (r, r); + float32x4_t p = vfmaq_laneq_f32 (d->c1, r, c024, 0); + float32x4_t q = vfmaq_laneq_f32 (d->c3, r, c024, 1); q = vfmaq_f32 (q, p, r2); - p = vmulq_f32 (C (4), r); - poly = vfmaq_f32 (p, q, r2); + p = vmulq_laneq_f32 (r, c024, 2); + float32x4_t poly = vfmaq_f32 (p, q, r2); if (__glibc_unlikely (v_any_u32 (cmp))) #if WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/expf_advsimd.c b/sysdeps/aarch64/fpu/expf_advsimd.c index 5c9cb72620..70f137e2e5 100644 --- a/sysdeps/aarch64/fpu/expf_advsimd.c +++ b/sysdeps/aarch64/fpu/expf_advsimd.c @@ -21,20 +21,25 @@ static const struct data { - float32x4_t poly[5]; - float32x4_t inv_ln2, ln2_hi, ln2_lo; - uint32x4_t exponent_bias; + float32x4_t c1, c3, c4, inv_ln2; + float ln2_hi, ln2_lo, c0, c2; + uint32x4_t exponent_bias, special_offset, special_bias; #if !WANT_SIMD_EXCEPT float32x4_t special_bound, scale_thresh; #endif } data = { /* maxerr: 1.45358 +0.5 ulp. */ - .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), - V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, + .c0 = 0x1.0e4020p-7f, + .c1 = V4 (0x1.573e2ep-5f), + .c2 = 0x1.555e66p-3f, + .c3 = V4 (0x1.fffdb6p-2f), + .c4 = V4 (0x1.ffffecp-1f), .inv_ln2 = V4 (0x1.715476p+0f), - .ln2_hi = V4 (0x1.62e4p-1f), - .ln2_lo = V4 (0x1.7f7d1cp-20f), + .ln2_hi = 0x1.62e4p-1f, + .ln2_lo = 0x1.7f7d1cp-20f, .exponent_bias = V4 (0x3f800000), + .special_offset = V4 (0x82000000), + .special_bias = V4 (0x7f000000), #if !WANT_SIMD_EXCEPT .special_bound = V4 (126.0f), .scale_thresh = V4 (192.0f), @@ -59,19 +64,17 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) #else -# define SpecialOffset v_u32 (0x82000000) -# define SpecialBias v_u32 (0x7f000000) - static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t scale, const struct data *d) { /* 2^n may overflow, break it up into s1*s2. */ - uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset); - float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias)); + uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset); + float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh); float32x4_t r2 = vmulq_f32 (s1, s1); + // (s2 + p*s2)*s1 = s2(p+1)s1 float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1); /* Similar to r1 but avoids double rounding in the subnormal range. */ float32x4_t r0 = vfmaq_f32 (scale, poly, scale); @@ -84,12 +87,11 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, r, r2, scale, p, q, poly; - uint32x4_t cmp, e; + float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi); #if WANT_SIMD_EXCEPT /* asuint(x) - TinyBound >= BigBound - TinyBound. */ - cmp = vcgeq_u32 ( + uint32x4_t cmp = vcgeq_u32 ( vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)), TinyBound), SpecialBound); @@ -103,22 +105,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2)); - r = vfmsq_f32 (x, n, d->ln2_hi); - r = vfmsq_f32 (r, n, d->ln2_lo); - e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); - scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); + float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2)); + float32x4_t r = vfmsq_laneq_f32 (x, n, ln2_c02, 0); + r = vfmsq_laneq_f32 (r, n, ln2_c02, 1); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT - cmp = vcagtq_f32 (n, d->special_bound); + uint32x4_t cmp = vcagtq_f32 (n, d->special_bound); #endif - r2 = vmulq_f32 (r, r); - p = vfmaq_f32 (C (1), C (0), r); - q = vfmaq_f32 (C (3), C (2), r); + float32x4_t r2 = vmulq_f32 (r, r); + float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2); + float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3); q = vfmaq_f32 (q, p, r2); - p = vmulq_f32 (C (4), r); - poly = vfmaq_f32 (p, q, r2); + p = vmulq_f32 (d->c4, r); + float32x4_t poly = vfmaq_f32 (p, q, r2); if (__glibc_unlikely (v_any_u32 (cmp))) #if WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/v_expf_inline.h b/sysdeps/aarch64/fpu/v_expf_inline.h index 08b06e0a6b..eacd2af241 100644 --- a/sysdeps/aarch64/fpu/v_expf_inline.h +++ b/sysdeps/aarch64/fpu/v_expf_inline.h @@ -24,50 +24,45 @@ struct v_expf_data { - float32x4_t poly[5]; - float32x4_t shift; - float invln2_and_ln2[4]; + float ln2_hi, ln2_lo, c0, c2; + float32x4_t inv_ln2, c1, c3, c4; + /* asuint(1.0f). */ + uint32x4_t exponent_bias; }; /* maxerr: 1.45358 +0.5 ulp. */ #define V_EXPF_DATA \ { \ - .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), \ - V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, \ - .shift = V4 (0x1.8p23f), \ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \ + .c0 = 0x1.0e4020p-7f, .c1 = V4 (0x1.573e2ep-5f), .c2 = 0x1.555e66p-3f, \ + .c3 = V4 (0x1.fffdb6p-2f), .c4 = V4 (0x1.ffffecp-1f), \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ + .inv_ln2 = V4 (0x1.715476p+0f), .exponent_bias = V4 (0x3f800000), \ } -#define ExponentBias v_u32 (0x3f800000) /* asuint(1.0f). */ -#define C(i) d->poly[i] - static inline float32x4_t v_expf_inline (float32x4_t x, const struct v_expf_data *d) { - /* Helper routine for calculating exp(x). + /* Helper routine for calculating exp(ax). Copied from v_expf.c, with all special-case handling removed - the calling routine should handle special values if required. */ - /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - float32x4_t n, r, z; - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - z = vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0); - n = vsubq_f32 (z, d->shift); - r = vfmsq_laneq_f32 (x, n, invln2_and_ln2, 1); - r = vfmsq_laneq_f32 (r, n, invln2_and_ln2, 2); - uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); - float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias)); + /* exp(ax) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] + ax = ln2*n + r, with r in [-ln2/2, ln2/2]. */ + float32x4_t ax = vabsq_f32 (x); + float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi); + float32x4_t n = vrndaq_f32 (vmulq_f32 (ax, d->inv_ln2)); + float32x4_t r = vfmsq_laneq_f32 (ax, n, ln2_c02, 0); + r = vfmsq_laneq_f32 (r, n, ln2_c02, 1); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); /* Custom order-4 Estrin avoids building high order monomial. */ float32x4_t r2 = vmulq_f32 (r, r); - float32x4_t p, q, poly; - p = vfmaq_f32 (C (1), C (0), r); - q = vfmaq_f32 (C (3), C (2), r); + float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2); + float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3); q = vfmaq_f32 (q, p, r2); - p = vmulq_f32 (C (4), r); - poly = vfmaq_f32 (p, q, r2); + p = vmulq_f32 (d->c4, r); + float32x4_t poly = vfmaq_f32 (p, q, r2); return vfmaq_f32 (scale, poly, scale); } - #endif commit abfd20ebbd2883f2c6e5f16709f7b9781c3c8068 Author: Luna Lamb Date: Fri Jan 3 19:00:12 2025 +0000 AArch64: Improve codegen in AdvSIMD asinh Improves memory access and removes spills. Load the polynomial evaluation coefficients into 2 vectors and use lanewise MLAs. Reduces MOVs 6->3 , LDR 11->5, STR/STP 2->0, ADRP 3->2. (cherry picked from commit 140b985e5a2071000122b3cb63ebfe88cf21dd29) diff --git a/sysdeps/aarch64/fpu/asinh_advsimd.c b/sysdeps/aarch64/fpu/asinh_advsimd.c index 6207e7da95..2739f98b39 100644 --- a/sysdeps/aarch64/fpu/asinh_advsimd.c +++ b/sysdeps/aarch64/fpu/asinh_advsimd.c @@ -20,41 +20,71 @@ #include "v_math.h" #include "poly_advsimd_f64.h" -#define A(i) v_f64 (__v_log_data.poly[i]) -#define N (1 << V_LOG_TABLE_BITS) -#define IndexMask (N - 1) - const static struct data { - float64x2_t poly[18]; - uint64x2_t off, huge_bound, abs_mask; - float64x2_t ln2, tiny_bound; + uint64x2_t huge_bound, abs_mask, off, mask; +#if WANT_SIMD_EXCEPT + float64x2_t tiny_bound; +#endif + float64x2_t lc0, lc2; + double lc1, lc3, ln2, lc4; + + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c17; + double c1, c3, c5, c7, c9, c11, c13, c15; + } data = { - .off = V2 (0x3fe6900900000000), - .ln2 = V2 (0x1.62e42fefa39efp-1), - .huge_bound = V2 (0x5fe0000000000000), + +#if WANT_SIMD_EXCEPT .tiny_bound = V2 (0x1p-26), - .abs_mask = V2 (0x7fffffffffffffff), +#endif /* Even terms of polynomial s.t. asinh(x) is approximated by asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...). Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2). */ - .poly = { V2 (-0x1.55555555554a7p-3), V2 (0x1.3333333326c7p-4), - V2 (-0x1.6db6db68332e6p-5), V2 (0x1.f1c71b26fb40dp-6), - V2 (-0x1.6e8b8b654a621p-6), V2 (0x1.1c4daa9e67871p-6), - V2 (-0x1.c9871d10885afp-7), V2 (0x1.7a16e8d9d2ecfp-7), - V2 (-0x1.3ddca533e9f54p-7), V2 (0x1.0becef748dafcp-7), - V2 (-0x1.b90c7099dd397p-8), V2 (0x1.541f2bb1ffe51p-8), - V2 (-0x1.d217026a669ecp-9), V2 (0x1.0b5c7977aaf7p-9), - V2 (-0x1.e0f37daef9127p-11), V2 (0x1.388b5fe542a6p-12), - V2 (-0x1.021a48685e287p-14), V2 (0x1.93d4ba83d34dap-18) }, + + .c0 = V2 (-0x1.55555555554a7p-3), + .c1 = 0x1.3333333326c7p-4, + .c2 = V2 (-0x1.6db6db68332e6p-5), + .c3 = 0x1.f1c71b26fb40dp-6, + .c4 = V2 (-0x1.6e8b8b654a621p-6), + .c5 = 0x1.1c4daa9e67871p-6, + .c6 = V2 (-0x1.c9871d10885afp-7), + .c7 = 0x1.7a16e8d9d2ecfp-7, + .c8 = V2 (-0x1.3ddca533e9f54p-7), + .c9 = 0x1.0becef748dafcp-7, + .c10 = V2 (-0x1.b90c7099dd397p-8), + .c11 = 0x1.541f2bb1ffe51p-8, + .c12 = V2 (-0x1.d217026a669ecp-9), + .c13 = 0x1.0b5c7977aaf7p-9, + .c14 = V2 (-0x1.e0f37daef9127p-11), + .c15 = 0x1.388b5fe542a6p-12, + .c16 = V2 (-0x1.021a48685e287p-14), + .c17 = V2 (0x1.93d4ba83d34dap-18), + + .lc0 = V2 (-0x1.ffffffffffff7p-2), + .lc1 = 0x1.55555555170d4p-2, + .lc2 = V2 (-0x1.0000000399c27p-2), + .lc3 = 0x1.999b2e90e94cap-3, + .lc4 = -0x1.554e550bd501ep-3, + .ln2 = 0x1.62e42fefa39efp-1, + + .off = V2 (0x3fe6900900000000), + .huge_bound = V2 (0x5fe0000000000000), + .abs_mask = V2 (0x7fffffffffffffff), + .mask = V2 (0xfffULL << 52), }; static float64x2_t NOINLINE VPCS_ATTR -special_case (float64x2_t x, float64x2_t y, uint64x2_t special) +special_case (float64x2_t x, float64x2_t y, uint64x2_t abs_mask, + uint64x2_t special) { + /* Copy sign. */ + y = vbslq_f64 (abs_mask, y, x); return v_call_f64 (asinh, x, y, special); } +#define N (1 << V_LOG_TABLE_BITS) +#define IndexMask (N - 1) + struct entry { float64x2_t invc; @@ -76,27 +106,34 @@ lookup (uint64x2_t i) } static inline float64x2_t -log_inline (float64x2_t x, const struct data *d) +log_inline (float64x2_t xm, const struct data *d) { - /* Double-precision vector log, copied from ordinary vector log with some - cosmetic modification and special-cases removed. */ - uint64x2_t ix = vreinterpretq_u64_f64 (x); - uint64x2_t tmp = vsubq_u64 (ix, d->off); - int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); - uint64x2_t iz - = vsubq_u64 (ix, vandq_u64 (tmp, vdupq_n_u64 (0xfffULL << 52))); + + uint64x2_t u = vreinterpretq_u64_f64 (xm); + uint64x2_t u_off = vsubq_u64 (u, d->off); + + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52); + uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->mask)); float64x2_t z = vreinterpretq_f64_u64 (iz); - struct entry e = lookup (tmp); + + struct entry e = lookup (u_off); + + /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); float64x2_t kd = vcvtq_f64_s64 (k); - float64x2_t hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2); + + /* hi = r + log(c) + k*Ln2. */ + float64x2_t ln2_and_lc4 = vld1q_f64 (&d->ln2); + float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_lc4, 0); + + /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ + float64x2_t odd_coeffs = vld1q_f64 (&d->lc1); float64x2_t r2 = vmulq_f64 (r, r); - float64x2_t y = vfmaq_f64 (A (2), A (3), r); - float64x2_t p = vfmaq_f64 (A (0), A (1), r); - y = vfmaq_f64 (y, A (4), r2); - y = vfmaq_f64 (p, y, r2); - y = vfmaq_f64 (hi, y, r2); - return y; + float64x2_t y = vfmaq_laneq_f64 (d->lc2, r, odd_coeffs, 1); + float64x2_t p = vfmaq_laneq_f64 (d->lc0, r, odd_coeffs, 0); + y = vfmaq_laneq_f64 (y, r2, ln2_and_lc4, 1); + y = vfmaq_f64 (p, r2, y); + return vfmaq_f64 (hi, y, r2); } /* Double-precision implementation of vector asinh(x). @@ -106,23 +143,24 @@ log_inline (float64x2_t x, const struct data *d) asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1 = sign(x) * (|x| + |x|^3 * P(x^2)) otherwise where log(x) is an optimized log approximation, and P(x) is a polynomial - shared with the scalar routine. The greatest observed error 3.29 ULP, in + shared with the scalar routine. The greatest observed error 2.79 ULP, in |x| >= 1: - __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1 - want 0x1.ffffcfd0e2352p-1. */ + _ZGVnN2v_asinh(0x1.2cd9d73ea76a6p+0) got 0x1.ffffd003219dap-1 + want 0x1.ffffd003219ddp-1. */ VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x) { const struct data *d = ptr_barrier (&data); - float64x2_t ax = vabsq_f64 (x); - uint64x2_t iax = vreinterpretq_u64_f64 (ax); uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1)); - uint64x2_t special = vcgeq_u64 (iax, d->huge_bound); #if WANT_SIMD_EXCEPT + uint64x2_t iax = vreinterpretq_u64_f64 (ax); + uint64x2_t special = vcgeq_u64 (iax, (d->huge_bound)); uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound); special = vorrq_u64 (special, tiny); +#else + uint64x2_t special = vcgeq_f64 (ax, vreinterpretq_f64_u64 (d->huge_bound)); #endif /* Option 1: |x| >= 1. @@ -147,19 +185,45 @@ VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x) overflow, and tiny lanes, which will underflow, by setting them to 0. They will be fixed later, either by selecting x or falling back to the scalar special-case. The largest observed error in this region is 1.47 ULPs: - __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1 - want 0x1.c1d6bf874019cp-1. */ + _ZGVnN2v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1 + want 0x1.c1d6bf874019cp-1. */ float64x2_t option_2 = v_f64 (0); + if (__glibc_likely (v_any_u64 (vceqzq_u64 (gt1)))) { + #if WANT_SIMD_EXCEPT ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1)); #endif - float64x2_t x2 = vmulq_f64 (ax, ax), x3 = vmulq_f64 (ax, x2), - z2 = vmulq_f64 (x2, x2), z4 = vmulq_f64 (z2, z2), - z8 = vmulq_f64 (z4, z4), z16 = vmulq_f64 (z8, z8); - float64x2_t p = v_estrin_17_f64 (x2, z2, z4, z8, z16, d->poly); - option_2 = vfmaq_f64 (ax, p, x3); + float64x2_t x2 = vmulq_f64 (ax, ax), z2 = vmulq_f64 (x2, x2); + /* Order-17 Pairwise Horner scheme. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + float64x2_t c1315 = vld1q_f64 (&d->c13); + + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, x2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, x2, c13, 1); + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, x2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, x2, c57, 1); + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, x2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, x2, c911, 1); + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, x2, c1315, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, x2, c1315, 1); + float64x2_t p1617 = vfmaq_f64 (d->c16, x2, d->c17); + + float64x2_t p = vfmaq_f64 (p1415, z2, p1617); + p = vfmaq_f64 (p1213, z2, p); + p = vfmaq_f64 (p1011, z2, p); + p = vfmaq_f64 (p89, z2, p); + + p = vfmaq_f64 (p67, z2, p); + p = vfmaq_f64 (p45, z2, p); + + p = vfmaq_f64 (p23, z2, p); + + p = vfmaq_f64 (p01, z2, p); + option_2 = vfmaq_f64 (ax, p, vmulq_f64 (ax, x2)); #if WANT_SIMD_EXCEPT option_2 = vbslq_f64 (tiny, x, option_2); #endif @@ -167,10 +231,10 @@ VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x) /* Choose the right option for each lane. */ float64x2_t y = vbslq_f64 (gt1, option_1, option_2); - /* Copy sign. */ - y = vbslq_f64 (d->abs_mask, y, x); - if (__glibc_unlikely (v_any_u64 (special))) - return special_case (x, y, special); - return y; + { + return special_case (x, y, d->abs_mask, special); + } + /* Copy sign. */ + return vbslq_f64 (d->abs_mask, y, x); } commit 5f45c0f91eae99b7d49f5c63b900441eb3491213 Author: Luna Lamb Date: Fri Jan 3 19:02:52 2025 +0000 AArch64: Improve codegen in SVE tans Improves memory access. Tan: MOVPRFX 7 -> 2, LD1RD 12 -> 5, move MOV away from return. Tanf: MOV 2 -> 1, MOVPRFX 6 -> 3, LD1RW 5 -> 4, move mov away from return. (cherry picked from commit aa6609feb20ebf8653db639dabe2a6afc77b02cc) diff --git a/sysdeps/aarch64/fpu/tan_sve.c b/sysdeps/aarch64/fpu/tan_sve.c index b2e4447316..a7318fd417 100644 --- a/sysdeps/aarch64/fpu/tan_sve.c +++ b/sysdeps/aarch64/fpu/tan_sve.c @@ -22,24 +22,38 @@ static const struct data { - double poly[9]; - double half_pi_hi, half_pi_lo, inv_half_pi, range_val, shift; + double c2, c4, c6, c8; + double poly_1357[4]; + double c0, inv_half_pi; + double half_pi_hi, half_pi_lo, range_val; } data = { /* Polynomial generated with FPMinimax. */ - .poly = { 0x1.5555555555556p-2, 0x1.1111111110a63p-3, 0x1.ba1ba1bb46414p-5, - 0x1.664f47e5b5445p-6, 0x1.226e5e5ecdfa3p-7, 0x1.d6c7ddbf87047p-9, - 0x1.7ea75d05b583ep-10, 0x1.289f22964a03cp-11, - 0x1.4e4fd14147622p-12, }, + .c2 = 0x1.ba1ba1bb46414p-5, + .c4 = 0x1.226e5e5ecdfa3p-7, + .c6 = 0x1.7ea75d05b583ep-10, + .c8 = 0x1.4e4fd14147622p-12, + .poly_1357 = { 0x1.1111111110a63p-3, 0x1.664f47e5b5445p-6, + 0x1.d6c7ddbf87047p-9, 0x1.289f22964a03cp-11 }, + .c0 = 0x1.5555555555556p-2, + .inv_half_pi = 0x1.45f306dc9c883p-1, .half_pi_hi = 0x1.921fb54442d18p0, .half_pi_lo = 0x1.1a62633145c07p-54, - .inv_half_pi = 0x1.45f306dc9c883p-1, .range_val = 0x1p23, - .shift = 0x1.8p52, }; static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) +special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg, + svbool_t special) { + svbool_t use_recip = svcmpeq ( + pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0); + + svfloat64_t n = svmad_x (pg, p, p, -1); + svfloat64_t d = svmul_x (svptrue_b64 (), p, 2); + svfloat64_t swap = n; + n = svneg_m (n, use_recip, d); + d = svsel (use_recip, swap, d); + svfloat64_t y = svdiv_x (svnot_z (pg, special), n, d); return sv_call_f64 (tan, x, y, special); } @@ -50,15 +64,10 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special) svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg) { const struct data *dat = ptr_barrier (&data); - - /* Invert condition to catch NaNs and Infs as well as large values. */ - svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val)); - + svfloat64_t half_pi_c0 = svld1rq (svptrue_b64 (), &dat->c0); /* q = nearest integer to 2 * x / pi. */ - svfloat64_t shift = sv_f64 (dat->shift); - svfloat64_t q = svmla_x (pg, shift, x, dat->inv_half_pi); - q = svsub_x (pg, q, shift); - svint64_t qi = svcvt_s64_x (pg, q); + svfloat64_t q = svmul_lane (x, half_pi_c0, 1); + q = svrinta_x (pg, q); /* Use q to reduce x to r in [-pi/4, pi/4], by: r = x - q * pi/2, in extended precision. */ @@ -68,7 +77,7 @@ svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg) r = svmls_lane (r, q, half_pi, 1); /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle formula. */ - r = svmul_x (pg, r, 0.5); + r = svmul_x (svptrue_b64 (), r, 0.5); /* Approximate tan(r) using order 8 polynomial. tan(x) is odd, so polynomial has the form: @@ -76,29 +85,51 @@ svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg) Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ... Then compute the approximation by: tan(r) ~= r + r^3 * (C0 + r^2 * P(r)). */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t r4 = svmul_x (pg, r2, r2); - svfloat64_t r8 = svmul_x (pg, r4, r4); + + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t r4 = svmul_x (svptrue_b64 (), r2, r2); + svfloat64_t r8 = svmul_x (svptrue_b64 (), r4, r4); /* Use offset version coeff array by 1 to evaluate from C1 onwards. */ - svfloat64_t p = sv_estrin_7_f64_x (pg, r2, r4, r8, dat->poly + 1); - p = svmad_x (pg, p, r2, dat->poly[0]); - p = svmla_x (pg, r, r2, svmul_x (pg, p, r)); + svfloat64_t C_24 = svld1rq (svptrue_b64 (), &dat->c2); + svfloat64_t C_68 = svld1rq (svptrue_b64 (), &dat->c6); + + /* Use offset version coeff array by 1 to evaluate from C1 onwards. */ + svfloat64_t p01 = svmla_lane (sv_f64 (dat->poly_1357[0]), r2, C_24, 0); + svfloat64_t p23 = svmla_lane_f64 (sv_f64 (dat->poly_1357[1]), r2, C_24, 1); + svfloat64_t p03 = svmla_x (pg, p01, p23, r4); + + svfloat64_t p45 = svmla_lane (sv_f64 (dat->poly_1357[2]), r2, C_68, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (dat->poly_1357[3]), r2, C_68, 1); + svfloat64_t p47 = svmla_x (pg, p45, p67, r4); + + svfloat64_t p = svmla_x (pg, p03, p47, r8); + + svfloat64_t z = svmul_x (svptrue_b64 (), p, r); + z = svmul_x (svptrue_b64 (), r2, z); + z = svmla_lane (z, r, half_pi_c0, 0); + p = svmla_x (pg, r, r2, z); /* Recombination uses double-angle formula: tan(2x) = 2 * tan(x) / (1 - (tan(x))^2) and reciprocity around pi/2: tan(x) = 1 / (tan(pi/2 - x)) to assemble result using change-of-sign and conditional selection of - numerator/denominator dependent on odd/even-ness of q (hence quadrant). */ - svbool_t use_recip - = svcmpeq (pg, svand_x (pg, svreinterpret_u64 (qi), 1), 0); + numerator/denominator dependent on odd/even-ness of q (quadrant). */ + + /* Invert condition to catch NaNs and Infs as well as large values. */ + svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val)); + + if (__glibc_unlikely (svptest_any (pg, special))) + { + return special_case (x, p, q, pg, special); + } + svbool_t use_recip = svcmpeq ( + pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0); svfloat64_t n = svmad_x (pg, p, p, -1); - svfloat64_t d = svmul_x (pg, p, 2); + svfloat64_t d = svmul_x (svptrue_b64 (), p, 2); svfloat64_t swap = n; n = svneg_m (n, use_recip, d); d = svsel (use_recip, swap, d); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svdiv_x (svnot_z (pg, special), n, d), special); return svdiv_x (pg, n, d); } diff --git a/sysdeps/aarch64/fpu/tanf_sve.c b/sysdeps/aarch64/fpu/tanf_sve.c index f342583241..e850fb4882 100644 --- a/sysdeps/aarch64/fpu/tanf_sve.c +++ b/sysdeps/aarch64/fpu/tanf_sve.c @@ -60,21 +60,16 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - /* Determine whether input is too large to perform fast regression. */ - svbool_t cmp = svacge (pg, x, d->range_val); - svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1); svfloat32_t pi_vals = svld1rq (svptrue_b32 (), &d->pio2_1); /* n = rint(x/(pi/2)). */ - svfloat32_t q = svmla_lane (sv_f32 (d->shift), x, pi_vals, 3); - svfloat32_t n = svsub_x (pg, q, d->shift); + svfloat32_t n = svrintn_x (pg, svmul_lane (x, pi_vals, 3)); /* n is already a signed integer, simply convert it. */ svint32_t in = svcvt_s32_x (pg, n); /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */ svint32_t alt = svand_x (pg, in, 1); svbool_t pred_alt = svcmpne (pg, alt, 0); - /* r = x - n * (pi/2) (range reduction into 0 .. pi/4). */ svfloat32_t r; r = svmls_lane (x, n, pi_vals, 0); @@ -93,7 +88,7 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg) /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4], using Estrin on z^2. */ - svfloat32_t z2 = svmul_x (pg, z, z); + svfloat32_t z2 = svmul_x (svptrue_b32 (), r, r); svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0); svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1); svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2); @@ -106,13 +101,14 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg) svfloat32_t y = svmla_x (pg, z, p, svmul_x (pg, z, z2)); - /* Transform result back, if necessary. */ - svfloat32_t inv_y = svdivr_x (pg, y, 1.0f); - /* No need to pass pg to specialcase here since cmp is a strict subset, guaranteed by the cmpge above. */ + + /* Determine whether input is too large to perform fast regression. */ + svbool_t cmp = svacge (pg, x, d->range_val); if (__glibc_unlikely (svptest_any (pg, cmp))) - return special_case (x, svsel (pred_alt, inv_y, y), cmp); + return special_case (x, svdivr_x (pg, y, 1.0f), cmp); + svfloat32_t inv_y = svdivr_x (pg, y, 1.0f); return svsel (pred_alt, inv_y, y); } commit ab5ba6c188159bb5e12be95cd90458924c2fe592 Author: Yat Long Poon Date: Fri Jan 3 19:07:30 2025 +0000 AArch64: Improve codegen for SVE logs Reduce memory access by using lanewise MLA and moving constants to struct and reduce number of MOVPRFXs. Update maximum ULP error for double log_sve from 1 to 2. Speedup on Neoverse V1 for log (3%), log2 (5%), and log10 (4%). (cherry picked from commit 32d193a372feb28f9da247bb7283d404b84429c6) diff --git a/sysdeps/aarch64/fpu/log10_sve.c b/sysdeps/aarch64/fpu/log10_sve.c index ab7362128d..f1cad2759a 100644 --- a/sysdeps/aarch64/fpu/log10_sve.c +++ b/sysdeps/aarch64/fpu/log10_sve.c @@ -23,28 +23,49 @@ #define Min 0x0010000000000000 #define Max 0x7ff0000000000000 #define Thres 0x7fe0000000000000 /* Max - Min. */ -#define Off 0x3fe6900900000000 #define N (1 << V_LOG10_TABLE_BITS) +static const struct data +{ + double c0, c2; + double c1, c3; + double invln10, log10_2; + double c4; + uint64_t off; +} data = { + .c0 = -0x1.bcb7b1526e506p-3, + .c1 = 0x1.287a7636be1d1p-3, + .c2 = -0x1.bcb7b158af938p-4, + .c3 = 0x1.63c78734e6d07p-4, + .c4 = -0x1.287461742fee4p-4, + .invln10 = 0x1.bcb7b1526e50ep-2, + .log10_2 = 0x1.34413509f79ffp-2, + .off = 0x3fe6900900000000, +}; + static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) +special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2, + svbool_t special, const struct data *d) { - return sv_call_f64 (log10, x, y, special); + svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off)); + return sv_call_f64 (log10, x, svmla_x (svptrue_b64 (), hi, r2, y), special); } -/* SVE log10 algorithm. +/* Double-precision SVE log10 routine. Maximum measured error is 2.46 ulps. SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6 want 0x1.fffbdf6eaa667p-6. */ svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + svuint64_t ix = svreinterpret_u64 (x); svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG10_TABLE_BITS); i = svand_x (pg, i, (N - 1) << 1); svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52)); @@ -62,15 +83,19 @@ svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg) svfloat64_t r = svmad_x (pg, invc, z, -1.0); /* hi = log(c) + k*log(2). */ - svfloat64_t w = svmla_x (pg, logc, r, __v_log10_data.invln10); - svfloat64_t hi = svmla_x (pg, w, k, __v_log10_data.log10_2); + svfloat64_t invln10_log10_2 = svld1rq_f64 (svptrue_b64 (), &d->invln10); + svfloat64_t w = svmla_lane_f64 (logc, r, invln10_log10_2, 0); + svfloat64_t hi = svmla_lane_f64 (w, k, invln10_log10_2, 1); /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log10_data.poly); + svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1); + svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0); + y = svmla_x (pg, y, r2, d->c4); + y = svmla_x (pg, p, r2, y); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y), - special); + return special_case (hi, tmp, y, r2, special, d); return svmla_x (pg, hi, r2, y); } diff --git a/sysdeps/aarch64/fpu/log2_sve.c b/sysdeps/aarch64/fpu/log2_sve.c index 743fa2a913..908e638246 100644 --- a/sysdeps/aarch64/fpu/log2_sve.c +++ b/sysdeps/aarch64/fpu/log2_sve.c @@ -21,15 +21,32 @@ #include "poly_sve_f64.h" #define N (1 << V_LOG2_TABLE_BITS) -#define Off 0x3fe6900900000000 #define Max (0x7ff0000000000000) #define Min (0x0010000000000000) #define Thresh (0x7fe0000000000000) /* Max - Min. */ +static const struct data +{ + double c0, c2; + double c1, c3; + double invln2, c4; + uint64_t off; +} data = { + .c0 = -0x1.71547652b83p-1, + .c1 = 0x1.ec709dc340953p-2, + .c2 = -0x1.71547651c8f35p-2, + .c3 = 0x1.2777ebe12dda5p-2, + .c4 = -0x1.ec738d616fe26p-3, + .invln2 = 0x1.71547652b82fep0, + .off = 0x3fe6900900000000, +}; + static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp) +special_case (svfloat64_t w, svuint64_t tmp, svfloat64_t y, svfloat64_t r2, + svbool_t special, const struct data *d) { - return sv_call_f64 (log2, x, y, cmp); + svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off)); + return sv_call_f64 (log2, x, svmla_x (svptrue_b64 (), w, r2, y), special); } /* Double-precision SVE log2 routine. @@ -40,13 +57,15 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp) want 0x1.fffb34198d9ddp-5. */ svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + svuint64_t ix = svreinterpret_u64 (x); svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG2_TABLE_BITS); i = svand_x (pg, i, (N - 1) << 1); svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52)); @@ -59,15 +78,19 @@ svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg) /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */ + svfloat64_t invln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->invln2); svfloat64_t r = svmad_x (pg, invc, z, -1.0); - svfloat64_t w = svmla_x (pg, log2c, r, __v_log2_data.invln2); - - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log2_data.poly); + svfloat64_t w = svmla_lane_f64 (log2c, r, invln2_and_c4, 0); w = svadd_x (pg, k, w); + svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1); + svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0); + y = svmla_lane_f64 (y, r2, invln2_and_c4, 1); + y = svmla_x (pg, p, r2, y); + if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), w, r2, y), - special); + return special_case (w, tmp, y, r2, special, d); return svmla_x (pg, w, r2, y); } diff --git a/sysdeps/aarch64/fpu/log_sve.c b/sysdeps/aarch64/fpu/log_sve.c index 9b689f2ec7..044223400b 100644 --- a/sysdeps/aarch64/fpu/log_sve.c +++ b/sysdeps/aarch64/fpu/log_sve.c @@ -19,39 +19,54 @@ #include "sv_math.h" -#define P(i) sv_f64 (__v_log_data.poly[i]) #define N (1 << V_LOG_TABLE_BITS) -#define Off (0x3fe6900900000000) -#define MaxTop (0x7ff) -#define MinTop (0x001) -#define ThreshTop (0x7fe) /* MaxTop - MinTop. */ +#define Max (0x7ff0000000000000) +#define Min (0x0010000000000000) +#define Thresh (0x7fe0000000000000) /* Max - Min. */ + +static const struct data +{ + double c0, c2; + double c1, c3; + double ln2, c4; + uint64_t off; +} data = { + .c0 = -0x1.ffffffffffff7p-2, + .c1 = 0x1.55555555170d4p-2, + .c2 = -0x1.0000000399c27p-2, + .c3 = 0x1.999b2e90e94cap-3, + .c4 = -0x1.554e550bd501ep-3, + .ln2 = 0x1.62e42fefa39efp-1, + .off = 0x3fe6900900000000, +}; static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp) +special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2, + svbool_t special, const struct data *d) { - return sv_call_f64 (log, x, y, cmp); + svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off)); + return sv_call_f64 (log, x, svmla_x (svptrue_b64 (), hi, r2, y), special); } -/* SVE port of AdvSIMD log algorithm. - Maximum measured error is 2.17 ulp: - SV_NAME_D1 (log)(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2 - want 0x1.ffffff1cca045p-2. */ +/* Double-precision SVE log routine. + Maximum measured error is 2.64 ulp: + SV_NAME_D1 (log)(0x1.95e54bc91a5e2p+184) got 0x1.fffffffe88cacp+6 + want 0x1.fffffffe88cafp+6. */ svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + svuint64_t ix = svreinterpret_u64 (x); - svuint64_t top = svlsr_x (pg, ix, 52); - svbool_t cmp = svcmpge (pg, svsub_x (pg, top, MinTop), sv_u64 (ThreshTop)); + svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N. The actual value of i is double this due to table layout. */ svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1); - svint64_t k - = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift. */ svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)); svfloat64_t z = svreinterpret_f64 (iz); /* Lookup in 2 global lists (length N). */ @@ -59,18 +74,22 @@ svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg) svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i); /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ - svfloat64_t r = svmad_x (pg, invc, z, -1); - svfloat64_t kd = svcvt_f64_x (pg, k); + svfloat64_t kd = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52)); /* hi = r + log(c) + k*Ln2. */ - svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2); + svfloat64_t ln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->ln2); + svfloat64_t r = svmad_x (pg, invc, z, -1); + svfloat64_t hi = svmla_lane_f64 (logc, kd, ln2_and_c4, 0); + hi = svadd_x (pg, r, hi); + /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = svmla_x (pg, P (2), r, P (3)); - svfloat64_t p = svmla_x (pg, P (0), r, P (1)); - y = svmla_x (pg, y, r2, P (4)); + svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1); + svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0); + y = svmla_lane_f64 (y, r2, ln2_and_c4, 1); y = svmla_x (pg, p, r2, y); - if (__glibc_unlikely (svptest_any (pg, cmp))) - return special_case (x, svmla_x (svnot_z (pg, cmp), hi, r2, y), cmp); + if (__glibc_unlikely (svptest_any (pg, special))) + return special_case (hi, tmp, y, r2, special, d); return svmla_x (pg, hi, r2, y); } diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index 6c96304611..b76c38dac2 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -1460,7 +1460,7 @@ float: 2 ldouble: 1 Function: "log_sve": -double: 1 +double: 2 float: 3 Function: "log_towardzero": commit aa7c61ea15e27ae14717e065a5d4c50baa472851 Author: Yat Long Poon Date: Fri Jan 3 19:09:05 2025 +0000 AArch64: Improve codegen for SVE log1pf users Reduce memory access by using lanewise MLA and reduce number of MOVPRFXs. Move log1pf implementation to inline helper function. Speedup on Neoverse V1 for log1pf (10%), acoshf (-1%), atanhf (2%), asinhf (2%). (cherry picked from commit 91c1fadba338752bf514cd4cca057b27b1b10eed) diff --git a/sysdeps/aarch64/fpu/acoshf_sve.c b/sysdeps/aarch64/fpu/acoshf_sve.c index 2110894e62..491365e24d 100644 --- a/sysdeps/aarch64/fpu/acoshf_sve.c +++ b/sysdeps/aarch64/fpu/acoshf_sve.c @@ -17,23 +17,26 @@ License along with the GNU C Library; if not, see . */ +#include "sv_math.h" +#include "sv_log1pf_inline.h" + #define One 0x3f800000 #define Thres 0x20000000 /* asuint(0x1p64) - One. */ -#include "sv_log1pf_inline.h" - static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special) { + svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f); + svfloat32_t y = sv_log1pf_inline (tmp, svptrue_b32 ()); return sv_call_f32 (acoshf, x, y, special); } /* Single-precision SVE acosh(x) routine. Implements the same algorithm as vector acoshf and log1p. - Maximum error is 2.78 ULPs: - SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4 - want 0x1.f45b3cp-4. */ + Maximum error is 2.47 ULPs: + SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4 + want 0x1.e435a2p-4. */ svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg) { svuint32_t ix = svreinterpret_u32 (x); @@ -41,9 +44,9 @@ svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg) svfloat32_t xm1 = svsub_x (pg, x, 1.0f); svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f)); - svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg); + svfloat32_t tmp = svadd_x (pg, xm1, svsqrt_x (pg, u)); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, y, special); - return y; + return special_case (xm1, tmp, special); + return sv_log1pf_inline (tmp, pg); } diff --git a/sysdeps/aarch64/fpu/asinhf_sve.c b/sysdeps/aarch64/fpu/asinhf_sve.c index d85c3a685c..b7f253bf32 100644 --- a/sysdeps/aarch64/fpu/asinhf_sve.c +++ b/sysdeps/aarch64/fpu/asinhf_sve.c @@ -20,20 +20,23 @@ #include "sv_math.h" #include "sv_log1pf_inline.h" -#define BigBound (0x5f800000) /* asuint(0x1p64). */ +#define BigBound 0x5f800000 /* asuint(0x1p64). */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special) { + svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign)); + y = svreinterpret_f32 ( + svorr_x (svptrue_b32 (), sign, svreinterpret_u32 (y))); return sv_call_f32 (asinhf, x, y, special); } /* Single-precision SVE asinh(x) routine. Implements the same algorithm as vector asinhf and log1p. - Maximum error is 2.48 ULPs: - SV_NAME_F1 (asinh) (0x1.008864p-3) got 0x1.ffbbbcp-4 - want 0x1.ffbbb8p-4. */ + Maximum error is 1.92 ULPs: + SV_NAME_F1 (asinh) (-0x1.0922ecp-1) got -0x1.fd0bccp-2 + want -0x1.fd0bc8p-2. */ svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg) { svfloat32_t ax = svabs_x (pg, x); @@ -49,8 +52,6 @@ svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg) = sv_log1pf_inline (svadd_x (pg, ax, svdiv_x (pg, ax2, d)), pg); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case ( - x, svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))), - special); + return special_case (iax, sign, y, special); return svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))); } diff --git a/sysdeps/aarch64/fpu/atanhf_sve.c b/sysdeps/aarch64/fpu/atanhf_sve.c index dae83041ef..2d3005bbc8 100644 --- a/sysdeps/aarch64/fpu/atanhf_sve.c +++ b/sysdeps/aarch64/fpu/atanhf_sve.c @@ -17,21 +17,25 @@ License along with the GNU C Library; if not, see . */ +#include "sv_math.h" #include "sv_log1pf_inline.h" #define One (0x3f800000) #define Half (0x3f000000) static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign, + svfloat32_t y, svbool_t special) { + svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign)); + y = svmul_x (svptrue_b32 (), halfsign, y); return sv_call_f32 (atanhf, x, y, special); } /* Approximation for vector single-precision atanh(x) using modified log1p. - The maximum error is 2.28 ULP: - _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5 - want 0x1.ffbbb6p-5. */ + The maximum error is 1.99 ULP: + _ZGVsMxv_atanhf(0x1.f1583p-5) got 0x1.f1f4fap-5 + want 0x1.f1f4f6p-5. */ svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg) { svfloat32_t ax = svabs_x (pg, x); @@ -48,7 +52,7 @@ svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg) y = sv_log1pf_inline (y, pg); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmul_x (pg, halfsign, y), special); + return special_case (iax, sign, halfsign, y, special); return svmul_x (pg, halfsign, y); } diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c index 5256d5e94c..18a185c838 100644 --- a/sysdeps/aarch64/fpu/log1pf_sve.c +++ b/sysdeps/aarch64/fpu/log1pf_sve.c @@ -18,30 +18,13 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" - -static const struct data -{ - float poly[8]; - float ln2, exp_bias; - uint32_t four, three_quarters; -} data = {.poly = {/* Do not store first term of polynomial, which is -0.5, as - this can be fmov-ed directly instead of including it in - the main load-and-mla polynomial schedule. */ - 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f, - -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, - 0x1.abcb6p-4f, -0x1.6f0d5ep-5f}, - .ln2 = 0x1.62e43p-1f, - .exp_bias = 0x1p-23f, - .four = 0x40800000, - .three_quarters = 0x3f400000}; - -#define SignExponentMask 0xff800000 +#include "sv_log1pf_inline.h" static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t x, svbool_t special) { - return sv_call_f32 (log1pf, x, y, special); + return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()), + special); } /* Vector log1pf approximation using polynomial on reduced interval. Worst-case @@ -50,53 +33,14 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) want 0x1.9f323ep-2. */ svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg) { - const struct data *d = ptr_barrier (&data); /* x < -1, Inf/Nan. */ svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000); special = svorn_z (pg, special, svcmpge (pg, x, -1)); - /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m - is in [-0.25, 0.5]): - log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). - - We approximate log1p(m) with a polynomial, then scale by - k*log(2). Instead of doing this directly, we use an intermediate - scale factor s = 4*k*log(2) to ensure the scale is representable - as a normalised fp32 number. */ - svfloat32_t m = svadd_x (pg, x, 1); - - /* Choose k to scale x to the range [-1/4, 1/2]. */ - svint32_t k - = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters), - sv_s32 (SignExponentMask)); - - /* Scale x by exponent manipulation. */ - svfloat32_t m_scale = svreinterpret_f32 ( - svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k))); - - /* Scale up to ensure that the scale factor is representable as normalised - fp32 number, and scale m down accordingly. */ - svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four)); - m_scale = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1), s, 0.25)); - - /* Evaluate polynomial on reduced interval. */ - svfloat32_t ms2 = svmul_x (pg, m_scale, m_scale), - ms4 = svmul_x (pg, ms2, ms2); - svfloat32_t p = sv_estrin_7_f32_x (pg, m_scale, ms2, ms4, d->poly); - p = svmad_x (pg, m_scale, p, -0.5); - p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p)); - - /* The scale factor to be applied back at the end - by multiplying float(k) - by 2^-23 we get the unbiased exponent of k. */ - svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->exp_bias); - - /* Apply the scaling back. */ - svfloat32_t y = svmla_x (pg, p, scale_back, d->ln2); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, y, special); + return special_case (x, special); - return y; + return sv_log1pf_inline (x, pg); } strong_alias (SV_NAME_F1 (log1p), SV_NAME_F1 (logp1)) diff --git a/sysdeps/aarch64/fpu/sv_log1pf_inline.h b/sysdeps/aarch64/fpu/sv_log1pf_inline.h index b94b2da055..850297d615 100644 --- a/sysdeps/aarch64/fpu/sv_log1pf_inline.h +++ b/sysdeps/aarch64/fpu/sv_log1pf_inline.h @@ -22,55 +22,76 @@ #include "sv_math.h" #include "vecmath_config.h" -#include "poly_sve_f32.h" + +#define SignExponentMask 0xff800000 static const struct sv_log1pf_data { - float32_t poly[9]; - float32_t ln2; - float32_t scale_back; + float c0, c2, c4, c6; + float c1, c3, c5, c7; + float ln2, exp_bias, quarter; + uint32_t four, three_quarters; } sv_log1pf_data = { - /* Polynomial generated using FPMinimax in [-0.25, 0.5]. */ - .poly = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f, - -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f, - -0x1.6f0d5ep-5f }, - .scale_back = 0x1.0p-23f, - .ln2 = 0x1.62e43p-1f, + /* Do not store first term of polynomial, which is -0.5, as + this can be fmov-ed directly instead of including it in + the main load-and-mla polynomial schedule. */ + .c0 = 0x1.5555aap-2f, .c1 = -0x1.000038p-2f, .c2 = 0x1.99675cp-3f, + .c3 = -0x1.54ef78p-3f, .c4 = 0x1.28a1f4p-3f, .c5 = -0x1.0da91p-3f, + .c6 = 0x1.abcb6p-4f, .c7 = -0x1.6f0d5ep-5f, .ln2 = 0x1.62e43p-1f, + .exp_bias = 0x1p-23f, .quarter = 0x1p-2f, .four = 0x40800000, + .three_quarters = 0x3f400000, }; -static inline svfloat32_t -eval_poly (svfloat32_t m, const float32_t *c, svbool_t pg) -{ - svfloat32_t p_12 = svmla_x (pg, sv_f32 (c[0]), m, sv_f32 (c[1])); - svfloat32_t m2 = svmul_x (pg, m, m); - svfloat32_t q = svmla_x (pg, m, m2, p_12); - svfloat32_t p = sv_pw_horner_6_f32_x (pg, m, m2, c + 2); - p = svmul_x (pg, m2, p); - - return svmla_x (pg, q, m2, p); -} - static inline svfloat32_t sv_log1pf_inline (svfloat32_t x, svbool_t pg) { const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data); - svfloat32_t m = svadd_x (pg, x, 1.0f); - - svint32_t ks = svsub_x (pg, svreinterpret_s32 (m), - svreinterpret_s32 (svdup_f32 (0.75f))); - ks = svand_x (pg, ks, 0xff800000); - svuint32_t k = svreinterpret_u32 (ks); - svfloat32_t s = svreinterpret_f32 ( - svsub_x (pg, svreinterpret_u32 (svdup_f32 (4.0f)), k)); - - svfloat32_t m_scale - = svreinterpret_f32 (svsub_x (pg, svreinterpret_u32 (x), k)); - m_scale - = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1.0f), sv_f32 (0.25f), s)); - svfloat32_t p = eval_poly (m_scale, d->poly, pg); - svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->scale_back); - return svmla_x (pg, p, scale_back, d->ln2); + /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m + is in [-0.25, 0.5]): + log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). + + We approximate log1p(m) with a polynomial, then scale by + k*log(2). Instead of doing this directly, we use an intermediate + scale factor s = 4*k*log(2) to ensure the scale is representable + as a normalised fp32 number. */ + svfloat32_t m = svadd_x (pg, x, 1); + + /* Choose k to scale x to the range [-1/4, 1/2]. */ + svint32_t k + = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters), + sv_s32 (SignExponentMask)); + + /* Scale x by exponent manipulation. */ + svfloat32_t m_scale = svreinterpret_f32 ( + svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k))); + + /* Scale up to ensure that the scale factor is representable as normalised + fp32 number, and scale m down accordingly. */ + svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four)); + svfloat32_t fconst = svld1rq_f32 (svptrue_b32 (), &d->ln2); + m_scale = svadd_x (pg, m_scale, svmla_lane_f32 (sv_f32 (-1), s, fconst, 2)); + + /* Evaluate polynomial on reduced interval. */ + svfloat32_t ms2 = svmul_x (svptrue_b32 (), m_scale, m_scale); + + svfloat32_t c1357 = svld1rq_f32 (svptrue_b32 (), &d->c1); + svfloat32_t p01 = svmla_lane_f32 (sv_f32 (d->c0), m_scale, c1357, 0); + svfloat32_t p23 = svmla_lane_f32 (sv_f32 (d->c2), m_scale, c1357, 1); + svfloat32_t p45 = svmla_lane_f32 (sv_f32 (d->c4), m_scale, c1357, 2); + svfloat32_t p67 = svmla_lane_f32 (sv_f32 (d->c6), m_scale, c1357, 3); + + svfloat32_t p = svmla_x (pg, p45, p67, ms2); + p = svmla_x (pg, p23, p, ms2); + p = svmla_x (pg, p01, p, ms2); + + p = svmad_x (pg, m_scale, p, -0.5); + p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p)); + + /* The scale factor to be applied back at the end - by multiplying float(k) + by 2^-23 we get the unbiased exponent of k. */ + svfloat32_t scale_back = svmul_lane_f32 (svcvt_f32_x (pg, k), fconst, 1); + return svmla_lane_f32 (p, scale_back, fconst, 0); } #endif commit d983f14c304df2d880c7b01e904e4a889064b9b3 Author: Luna Lamb Date: Fri Jan 3 20:15:17 2025 +0000 AArch64: Improve codegen in SVE expm1f and users Use unpredicated muls, use absolute compare and improve memory access. Expm1f, sinhf and tanhf show 7%, 5% and 1% improvement in throughput microbenchmark on Neoverse V1. (cherry picked from commit f86b4cf87581cf1e45702b07880679ffa0b1f47a) diff --git a/sysdeps/aarch64/fpu/expm1f_sve.c b/sysdeps/aarch64/fpu/expm1f_sve.c index 7c852125cd..05a66400d4 100644 --- a/sysdeps/aarch64/fpu/expm1f_sve.c +++ b/sysdeps/aarch64/fpu/expm1f_sve.c @@ -18,7 +18,6 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" /* Largest value of x for which expm1(x) should round to -1. */ #define SpecialBound 0x1.5ebc4p+6f @@ -28,20 +27,17 @@ static const struct data /* These 4 are grouped together so they can be loaded as one quadword, then used with _lane forms of svmla/svmls. */ float c2, c4, ln2_hi, ln2_lo; - float c0, c1, c3, inv_ln2, special_bound, shift; + float c0, inv_ln2, c1, c3, special_bound; } data = { /* Generated using fpminimax. */ .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7, - .c4 = 0x1.6b55a2p-10, + .c4 = 0x1.6b55a2p-10, .inv_ln2 = 0x1.715476p+0f, + .special_bound = SpecialBound, .ln2_lo = 0x1.7f7d1cp-20f, + .ln2_hi = 0x1.62e4p-1f, - .special_bound = SpecialBound, .shift = 0x1.8p23f, - .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, - .ln2_lo = 0x1.7f7d1cp-20f, }; -#define C(i) sv_f32 (d->c##i) - static svfloat32_t NOINLINE special_case (svfloat32_t x, svbool_t pg) { @@ -71,9 +67,8 @@ svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg) and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 where 2^i is exact because i is an integer. */ - svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2); - j = svsub_x (pg, j, d->shift); - svint32_t i = svcvt_s32_x (pg, j); + svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2); + j = svrinta_x (pg, j); svfloat32_t f = svmls_lane (x, j, lane_constants, 2); f = svmls_lane (f, j, lane_constants, 3); @@ -83,17 +78,17 @@ svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg) x + ax^2 + bx^3 + cx^4 .... So we calculate the polynomial P(f) = a + bf + cf^2 + ... and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0); - svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1); - svfloat32_t f2 = svmul_x (pg, f, f); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1); + svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f); svfloat32_t p = svmla_x (pg, p12, f2, p34); - p = svmla_x (pg, C (0), f, p); + + p = svmla_x (pg, sv_f32 (d->c0), f, p); p = svmla_x (pg, f, f2, p); /* Assemble the result. expm1(x) ~= 2^i * (p + 1) - 1 Let t = 2^i. */ - svfloat32_t t = svreinterpret_f32 ( - svadd_x (pg, svreinterpret_u32 (svlsl_x (pg, i, 23)), 0x3f800000)); - return svmla_x (pg, svsub_x (pg, t, 1), p, t); + svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j)); + return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t); } diff --git a/sysdeps/aarch64/fpu/sinhf_sve.c b/sysdeps/aarch64/fpu/sinhf_sve.c index 6c204b57a2..50dd386774 100644 --- a/sysdeps/aarch64/fpu/sinhf_sve.c +++ b/sysdeps/aarch64/fpu/sinhf_sve.c @@ -63,5 +63,5 @@ svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg) if (__glibc_unlikely (svptest_any (pg, special))) return special_case (x, svmul_x (pg, t, halfsign), special); - return svmul_x (pg, t, halfsign); + return svmul_x (svptrue_b32 (), t, halfsign); } diff --git a/sysdeps/aarch64/fpu/sv_expm1f_inline.h b/sysdeps/aarch64/fpu/sv_expm1f_inline.h index 5b72451222..e46ddda543 100644 --- a/sysdeps/aarch64/fpu/sv_expm1f_inline.h +++ b/sysdeps/aarch64/fpu/sv_expm1f_inline.h @@ -27,21 +27,18 @@ struct sv_expm1f_data /* These 4 are grouped together so they can be loaded as one quadword, then used with _lane forms of svmla/svmls. */ float32_t c2, c4, ln2_hi, ln2_lo; - float32_t c0, c1, c3, inv_ln2, shift; + float c0, inv_ln2, c1, c3, special_bound; }; /* Coefficients generated using fpminimax. */ #define SV_EXPM1F_DATA \ { \ - .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5, \ - .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \ + .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .inv_ln2 = 0x1.715476p+0f, \ + .c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7, \ \ - .shift = 0x1.8p23f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \ - .ln2_lo = 0x1.7f7d1cp-20f, \ + .c4 = 0x1.6b55a2p-10, .ln2_lo = 0x1.7f7d1cp-20f, .ln2_hi = 0x1.62e4p-1f, \ } -#define C(i) sv_f32 (d->c##i) - static inline svfloat32_t expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d) { @@ -55,9 +52,8 @@ expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d) and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 where 2^i is exact because i is an integer. */ - svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2); - j = svsub_x (pg, j, d->shift); - svint32_t i = svcvt_s32_x (pg, j); + svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2); + j = svrinta_x (pg, j); svfloat32_t f = svmls_lane (x, j, lane_constants, 2); f = svmls_lane (f, j, lane_constants, 3); @@ -67,18 +63,18 @@ expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d) x + ax^2 + bx^3 + cx^4 .... So we calculate the polynomial P(f) = a + bf + cf^2 + ... and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0); - svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1); - svfloat32_t f2 = svmul_x (pg, f, f); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1); + svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f); svfloat32_t p = svmla_x (pg, p12, f2, p34); - p = svmla_x (pg, C (0), f, p); + p = svmla_x (pg, sv_f32 (d->c0), f, p); p = svmla_x (pg, f, f2, p); /* Assemble the result. expm1(x) ~= 2^i * (p + 1) - 1 Let t = 2^i. */ - svfloat32_t t = svscale_x (pg, sv_f32 (1), i); - return svmla_x (pg, svsub_x (pg, t, 1), p, t); + svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j)); + return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t); } #endif diff --git a/sysdeps/aarch64/fpu/tanhf_sve.c b/sysdeps/aarch64/fpu/tanhf_sve.c index 0b94523cf5..80dd679346 100644 --- a/sysdeps/aarch64/fpu/tanhf_sve.c +++ b/sysdeps/aarch64/fpu/tanhf_sve.c @@ -19,20 +19,27 @@ #include "sv_expm1f_inline.h" +/* Largest value of x for which tanhf(x) rounds to 1 (or -1 for negative). */ +#define BoringBound 0x1.205966p+3f + static const struct data { struct sv_expm1f_data expm1f_consts; - uint32_t boring_bound, onef; + uint32_t onef, special_bound; + float boring_bound; } data = { .expm1f_consts = SV_EXPM1F_DATA, - /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */ - .boring_bound = 0x41102cb3, .onef = 0x3f800000, + .special_bound = 0x7f800000, + .boring_bound = BoringBound, }; static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t x, svbool_t pg, svbool_t is_boring, + svfloat32_t boring, svfloat32_t q, svbool_t special) { + svfloat32_t y + = svsel_f32 (is_boring, boring, svdiv_x (pg, q, svadd_x (pg, q, 2.0))); return sv_call_f32 (tanhf, x, y, special); } @@ -47,15 +54,16 @@ svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg) svfloat32_t ax = svabs_x (pg, x); svuint32_t iax = svreinterpret_u32 (ax); svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax); - svbool_t is_boring = svcmpgt (pg, iax, d->boring_bound); svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef)); - - svbool_t special = svcmpgt (pg, iax, 0x7f800000); + svbool_t special = svcmpgt (pg, iax, d->special_bound); + svbool_t is_boring = svacgt (pg, x, d->boring_bound); /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ - svfloat32_t q = expm1f_inline (svmul_x (pg, x, 2.0), pg, &d->expm1f_consts); - svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0)); + svfloat32_t q = expm1f_inline (svmul_x (svptrue_b32 (), x, 2.0), pg, + &d->expm1f_consts); + if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svsel_f32 (is_boring, boring, y), special); + return special_case (x, pg, is_boring, boring, q, special); + svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0)); return svsel_f32 (is_boring, boring, y); } commit 0ff6a9ff79bca9384ce4ba20e8942d39cc377a14 Author: Luna Lamb Date: Thu Feb 13 17:52:09 2025 +0000 Aarch64: Improve codegen in SVE asinh Use unpredicated muls, use lanewise mla's and improve memory access. 1% regression in throughput microbenchmark on Neoverse V1. Reviewed-by: Wilco Dijkstra (cherry picked from commit 8f0e7fe61e0a2ad5ed777933703ce09053810ec4) diff --git a/sysdeps/aarch64/fpu/asinh_sve.c b/sysdeps/aarch64/fpu/asinh_sve.c index 28dc5c4587..fe8715e06c 100644 --- a/sysdeps/aarch64/fpu/asinh_sve.c +++ b/sysdeps/aarch64/fpu/asinh_sve.c @@ -18,36 +18,49 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" #define SignMask (0x8000000000000000) #define One (0x3ff0000000000000) #define Thres (0x5fe0000000000000) /* asuint64 (0x1p511). */ +#define IndexMask (((1 << V_LOG_TABLE_BITS) - 1) << 1) static const struct data { - double poly[18]; - double ln2, p3, p1, p4, p0, p2; - uint64_t n; - uint64_t off; + double even_coeffs[9]; + double ln2, p3, p1, p4, p0, p2, c1, c3, c5, c7, c9, c11, c13, c15, c17; + uint64_t off, mask; } data = { - /* Polynomial generated using Remez on [2^-26, 1]. */ - .poly - = { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5, - 0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6, - -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7, - 0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8, - -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11, - 0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18 }, + /* Polynomial generated using Remez on [2^-26, 1]. */ + .even_coeffs ={ + -0x1.55555555554a7p-3, + -0x1.6db6db68332e6p-5, + -0x1.6e8b8b654a621p-6, + -0x1.c9871d10885afp-7, + -0x1.3ddca533e9f54p-7, + -0x1.b90c7099dd397p-8, + -0x1.d217026a669ecp-9, + -0x1.e0f37daef9127p-11, + -0x1.021a48685e287p-14, }, + + .c1 = 0x1.3333333326c7p-4, + .c3 = 0x1.f1c71b26fb40dp-6, + .c5 = 0x1.1c4daa9e67871p-6, + .c7 = 0x1.7a16e8d9d2ecfp-7, + .c9 = 0x1.0becef748dafcp-7, + .c11 = 0x1.541f2bb1ffe51p-8, + .c13 = 0x1.0b5c7977aaf7p-9, + .c15 = 0x1.388b5fe542a6p-12, + .c17 = 0x1.93d4ba83d34dap-18, + .ln2 = 0x1.62e42fefa39efp-1, .p0 = -0x1.ffffffffffff7p-2, .p1 = 0x1.55555555170d4p-2, .p2 = -0x1.0000000399c27p-2, .p3 = 0x1.999b2e90e94cap-3, .p4 = -0x1.554e550bd501ep-3, - .n = 1 << V_LOG_TABLE_BITS, - .off = 0x3fe6900900000000 + .off = 0x3fe6900900000000, + .mask = 0xfffULL << 52, }; static svfloat64_t NOINLINE @@ -64,11 +77,10 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg) of the algorithm used. */ svuint64_t ix = svreinterpret_u64 (x); - svuint64_t tmp = svsub_x (pg, ix, d->off); - svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), - (d->n - 1) << 1); - svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52); - svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)); + svuint64_t i_off = svsub_x (pg, ix, d->off); + svuint64_t i + = svand_x (pg, svlsr_x (pg, i_off, (51 - V_LOG_TABLE_BITS)), IndexMask); + svuint64_t iz = svsub_x (pg, ix, svand_x (pg, i_off, d->mask)); svfloat64_t z = svreinterpret_f64 (iz); svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i); @@ -78,14 +90,14 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg) svfloat64_t p1_p4 = svld1rq (svptrue_b64 (), &d->p1); svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z); - svfloat64_t kd = svcvt_f64_x (pg, k); + svfloat64_t kd + = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (i_off), 52)); svfloat64_t hi = svmla_lane (svadd_x (pg, logc, r), kd, ln2_p3, 0); - svfloat64_t r2 = svmul_x (pg, r, r); - + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); svfloat64_t y = svmla_lane (sv_f64 (d->p2), r, ln2_p3, 1); - svfloat64_t p = svmla_lane (sv_f64 (d->p0), r, p1_p4, 0); + y = svmla_lane (y, r2, p1_p4, 1); y = svmla_x (pg, p, r2, y); y = svmla_x (pg, hi, r2, y); @@ -111,7 +123,6 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) svuint64_t iax = svbic_x (pg, ix, SignMask); svuint64_t sign = svand_x (pg, ix, SignMask); svfloat64_t ax = svreinterpret_f64 (iax); - svbool_t ge1 = svcmpge (pg, iax, One); svbool_t special = svcmpge (pg, iax, Thres); @@ -120,7 +131,7 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) svfloat64_t option_1 = sv_f64 (0); if (__glibc_likely (svptest_any (pg, ge1))) { - svfloat64_t x2 = svmul_x (pg, ax, ax); + svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax); option_1 = __sv_log_inline ( svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, x2, 1))), d, pg); } @@ -130,21 +141,53 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) The largest observed error in this region is 1.51 ULPs: _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1 want 0x1.c1e649ee2681dp-1. */ + svfloat64_t option_2 = sv_f64 (0); if (__glibc_likely (svptest_any (pg, svnot_z (pg, ge1)))) { - svfloat64_t x2 = svmul_x (pg, ax, ax); - svfloat64_t x4 = svmul_x (pg, x2, x2); - svfloat64_t p = sv_pw_horner_17_f64_x (pg, x2, x4, d->poly); - option_2 = svmla_x (pg, ax, p, svmul_x (pg, x2, ax)); + svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax); + svfloat64_t x4 = svmul_x (svptrue_b64 (), x2, x2); + /* Order-17 Pairwise Horner scheme. */ + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5); + svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9); + svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13); + + svfloat64_t p01 = svmla_lane (sv_f64 (d->even_coeffs[0]), x2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->even_coeffs[1]), x2, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->even_coeffs[2]), x2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->even_coeffs[3]), x2, c57, 1); + svfloat64_t p89 = svmla_lane (sv_f64 (d->even_coeffs[4]), x2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->even_coeffs[5]), x2, c911, 1); + svfloat64_t p1213 + = svmla_lane (sv_f64 (d->even_coeffs[6]), x2, c1315, 0); + svfloat64_t p1415 + = svmla_lane (sv_f64 (d->even_coeffs[7]), x2, c1315, 1); + svfloat64_t p1617 = svmla_x (pg, sv_f64 (d->even_coeffs[8]), x2, d->c17); + + svfloat64_t p = svmla_x (pg, p1415, x4, p1617); + p = svmla_x (pg, p1213, x4, p); + p = svmla_x (pg, p1011, x4, p); + p = svmla_x (pg, p89, x4, p); + + p = svmla_x (pg, p67, x4, p); + p = svmla_x (pg, p45, x4, p); + + p = svmla_x (pg, p23, x4, p); + + p = svmla_x (pg, p01, x4, p); + + option_2 = svmla_x (pg, ax, p, svmul_x (svptrue_b64 (), x2, ax)); } - /* Choose the right option for each lane. */ - svfloat64_t y = svsel (ge1, option_1, option_2); - if (__glibc_unlikely (svptest_any (pg, special))) return special_case ( - x, svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)), + x, + svreinterpret_f64 (sveor_x ( + pg, svreinterpret_u64 (svsel (ge1, option_1, option_2)), sign)), special); + + /* Choose the right option for each lane. */ + svfloat64_t y = svsel (ge1, option_1, option_2); return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)); } commit 4b0bb84eb7e52a135c873fd9d0fc6c30599aedf4 Author: Luna Lamb Date: Thu Feb 13 17:54:46 2025 +0000 Aarch64: Improve codegen in SVE exp and users, and update expf_inline Use unpredicted muls, and improve memory access. 7%, 3% and 1% improvement in throughput microbenchmark on Neoverse V1, for exp, exp2 and cosh respectively. Reviewed-by: Wilco Dijkstra (cherry picked from commit c0ff447edf19bd4630fe79adf5e8b896405b059f) diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c index 919f34604a..e375dd8a34 100644 --- a/sysdeps/aarch64/fpu/cosh_sve.c +++ b/sysdeps/aarch64/fpu/cosh_sve.c @@ -23,7 +23,7 @@ static const struct data { float64_t poly[3]; float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres; - uint64_t index_mask, special_bound; + uint64_t special_bound; } data = { .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3, 0x1.5555576a59599p-5, }, @@ -35,14 +35,16 @@ static const struct data .shift = 0x1.8p+52, .thres = 704.0, - .index_mask = 0xff, /* 0x1.6p9, above which exp overflows. */ .special_bound = 0x4086000000000000, }; static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) +special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special) { + svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5); + svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); + svfloat64_t y = svadd_x (pg, half_t, half_over_t); return sv_call_f64 (cosh, x, y, special); } @@ -60,12 +62,12 @@ exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d) svuint64_t u = svreinterpret_u64 (z); svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS); - svuint64_t i = svand_x (pg, u, d->index_mask); + svuint64_t i = svand_x (svptrue_b64 (), u, 0xff); svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]); y = svmla_x (pg, sv_f64 (d->poly[0]), r, y); y = svmla_x (pg, sv_f64 (1.0), r, y); - y = svmul_x (pg, r, y); + y = svmul_x (svptrue_b64 (), r, y); /* s = 2^(n/N). */ u = svld1_gather_index (pg, __v_exp_tail_data, i); @@ -94,12 +96,12 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg) /* Up to the point that exp overflows, we can use it to calculate cosh by exp(|x|) / 2 + 1 / (2 * exp(|x|)). */ svfloat64_t t = exp_inline (ax, pg, d); - svfloat64_t half_t = svmul_x (pg, t, 0.5); - svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); /* Fall back to scalar for any special cases. */ if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svadd_x (pg, half_t, half_over_t), special); + return special_case (x, pg, t, special); + svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5); + svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); return svadd_x (pg, half_t, half_over_t); } diff --git a/sysdeps/aarch64/fpu/exp10_sve.c b/sysdeps/aarch64/fpu/exp10_sve.c index ddf64708cb..bfd3fb9e19 100644 --- a/sysdeps/aarch64/fpu/exp10_sve.c +++ b/sysdeps/aarch64/fpu/exp10_sve.c @@ -18,21 +18,23 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" #define SpecialBound 307.0 /* floor (log10 (2^1023)). */ static const struct data { - double poly[5]; + double c1, c3, c2, c4, c0; double shift, log10_2, log2_10_hi, log2_10_lo, scale_thres, special_bound; } data = { /* Coefficients generated using Remez algorithm. rel error: 0x1.9fcb9b3p-60 abs error: 0x1.a20d9598p-60 in [ -log10(2)/128, log10(2)/128 ] max ulp err 0.52 +0.5. */ - .poly = { 0x1.26bb1bbb55516p1, 0x1.53524c73cd32ap1, 0x1.0470591daeafbp1, - 0x1.2bd77b1361ef6p0, 0x1.142b5d54e9621p-1 }, + .c0 = 0x1.26bb1bbb55516p1, + .c1 = 0x1.53524c73cd32ap1, + .c2 = 0x1.0470591daeafbp1, + .c3 = 0x1.2bd77b1361ef6p0, + .c4 = 0x1.142b5d54e9621p-1, /* 1.5*2^46+1023. This value is further explained below. */ .shift = 0x1.800000000ffc0p+46, .log10_2 = 0x1.a934f0979a371p1, /* 1/log2(10). */ @@ -70,9 +72,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n, /* |n| > 1280 => 2^(n) overflows. */ svbool_t p_cmp = svacgt (pg, n, d->scale_thres); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); return svsel (p_cmp, r1, r0); } @@ -103,11 +105,14 @@ svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg) comes at significant performance cost. */ svuint64_t u = svreinterpret_u64 (z); svfloat64_t scale = svexpa (u); - + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); /* Approximate exp10(r) using polynomial. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = svmla_x (pg, svmul_x (pg, r, d->poly[0]), r2, - sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly + 1)); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + svfloat64_t p14 = svmla_x (pg, p12, p34, r2); + + svfloat64_t y = svmla_x (pg, svmul_x (svptrue_b64 (), r, d->c0), r2, p14); /* Assemble result as exp10(x) = 2^n * exp10(r). If |x| > SpecialBound multiplication may overflow, so use special case routine. */ diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c index 22848ebfa5..5dfb77cdbc 100644 --- a/sysdeps/aarch64/fpu/exp2_sve.c +++ b/sysdeps/aarch64/fpu/exp2_sve.c @@ -18,7 +18,6 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" #define N (1 << V_EXP_TABLE_BITS) @@ -27,15 +26,15 @@ static const struct data { - double poly[4]; + double c0, c2; + double c1, c3; double shift, big_bound, uoflow_bound; } data = { /* Coefficients are computed using Remez algorithm with minimisation of the absolute error. */ - .poly = { 0x1.62e42fefa3686p-1, 0x1.ebfbdff82c241p-3, 0x1.c6b09b16de99ap-5, - 0x1.3b2abf5571ad8p-7 }, - .shift = 0x1.8p52 / N, - .uoflow_bound = UOFlowBound, + .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3, + .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7, + .shift = 0x1.8p52 / N, .uoflow_bound = UOFlowBound, .big_bound = BigBound, }; @@ -67,9 +66,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n, /* |n| > 1280 => 2^(n) overflows. */ svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); return svsel (p_cmp, r1, r0); } @@ -99,11 +98,14 @@ svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg) svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS); svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top)); + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); /* Approximate exp2(r) using polynomial. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t p = sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly); - svfloat64_t y = svmul_x (pg, r, p); - + /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4. */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1); + svfloat64_t p = svmla_x (pg, p01, p23, r2); + svfloat64_t y = svmul_x (svptrue_b64 (), r, p); /* Assemble exp2(x) = exp2(r) * scale. */ if (__glibc_unlikely (svptest_any (pg, special))) return special_case (pg, scale, y, kd, d); diff --git a/sysdeps/aarch64/fpu/exp_sve.c b/sysdeps/aarch64/fpu/exp_sve.c index aabaaa1d61..b2421d493f 100644 --- a/sysdeps/aarch64/fpu/exp_sve.c +++ b/sysdeps/aarch64/fpu/exp_sve.c @@ -21,12 +21,15 @@ static const struct data { - double poly[4]; + double c0, c2; + double c1, c3; double ln2_hi, ln2_lo, inv_ln2, shift, thres; + } data = { - .poly = { /* ulp error: 0.53. */ - 0x1.fffffffffdbcdp-2, 0x1.555555555444cp-3, 0x1.555573c6a9f7dp-5, - 0x1.1111266d28935p-7 }, + .c0 = 0x1.fffffffffdbcdp-2, + .c1 = 0x1.555555555444cp-3, + .c2 = 0x1.555573c6a9f7dp-5, + .c3 = 0x1.1111266d28935p-7, .ln2_hi = 0x1.62e42fefa3800p-1, .ln2_lo = 0x1.ef35793c76730p-45, /* 1/ln2. */ @@ -36,7 +39,6 @@ static const struct data .thres = 704.0, }; -#define C(i) sv_f64 (d->poly[i]) #define SpecialOffset 0x6000000000000000 /* 0x1p513. */ /* SpecialBias1 + SpecialBias1 = asuint(1.0). */ #define SpecialBias1 0x7000000000000000 /* 0x1p769. */ @@ -56,20 +58,20 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n) svuint64_t b = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0. */ - /* Set s1 to generate overflow depending on sign of exponent n. */ - svfloat64_t s1 = svreinterpret_f64 ( - svsubr_x (pg, b, SpecialBias1)); /* 0x70...0 - b. */ - /* Offset s to avoid overflow in final result if n is below threshold. */ + /* Set s1 to generate overflow depending on sign of exponent n, + ie. s1 = 0x70...0 - b. */ + svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1)); + /* Offset s to avoid overflow in final result if n is below threshold. + ie. s2 = as_u64 (s) - 0x3010...0 + b. */ svfloat64_t s2 = svreinterpret_f64 ( - svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), - b)); /* as_u64 (s) - 0x3010...0 + b. */ + svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b)); /* |n| > 1280 => 2^(n) overflows. */ svbool_t p_cmp = svacgt (pg, n, 1280.0); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); return svsel (p_cmp, r1, r0); } @@ -103,16 +105,16 @@ svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg) svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); svuint64_t u = svreinterpret_u64 (z); svfloat64_t n = svsub_x (pg, z, d->shift); - + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); /* r = x - n * ln2, r is in [-ln2/(2N), ln2/(2N)]. */ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); svfloat64_t r = svmls_lane (x, n, ln2, 0); r = svmls_lane (r, n, ln2, 1); /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t p01 = svmla_x (pg, C (0), C (1), r); - svfloat64_t p23 = svmla_x (pg, C (2), C (3), r); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1); svfloat64_t p04 = svmla_x (pg, p01, p23, r2); svfloat64_t y = svmla_x (pg, r, p04, r2); diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h index 6166df6553..75781fb4dd 100644 --- a/sysdeps/aarch64/fpu/sv_expf_inline.h +++ b/sysdeps/aarch64/fpu/sv_expf_inline.h @@ -61,7 +61,7 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) /* scale = 2^(n/N). */ svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ + /* poly(r) = exp(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4 + C4 r^5. */ svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); @@ -71,5 +71,4 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) return svmla_x (pg, scale, scale, poly); } - #endif commit 194185c28954dfa11a6ded8b32f34fee680d3218 Author: Yat Long Poon Date: Thu Feb 13 18:00:50 2025 +0000 AArch64: Improve codegen for SVE erfcf Reduce number of MOV/MOVPRFXs and use unpredicated FMUL. Replace MUL with LSL. Speedup on Neoverse V1: 6%. Reviewed-by: Wilco Dijkstra (cherry picked from commit f5ff34cb3c75ec1061c75bb9188b3c1176426947) diff --git a/sysdeps/aarch64/fpu/erfcf_sve.c b/sysdeps/aarch64/fpu/erfcf_sve.c index ecacb933ac..e4869263e3 100644 --- a/sysdeps/aarch64/fpu/erfcf_sve.c +++ b/sysdeps/aarch64/fpu/erfcf_sve.c @@ -76,7 +76,7 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg) svuint32_t i = svqadd (svreinterpret_u32 (z), dat->off_idx); /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */ - i = svmul_x (pg, i, 2); + i = svlsl_x (svptrue_b32 (), i, 1); const float32_t *p = &__v_erfcf_data.tab[0].erfc - 2 * dat->off_arr; svfloat32_t erfcr = svld1_gather_index (pg, p, i); svfloat32_t scale = svld1_gather_index (pg, p + 1, i); @@ -84,15 +84,15 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg) /* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */ svfloat32_t r = svsub_x (pg, z, shift); svfloat32_t d = svsub_x (pg, a, r); - svfloat32_t d2 = svmul_x (pg, d, d); - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t d2 = svmul_x (svptrue_b32 (), d, d); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); svfloat32_t coeffs = svld1rq (svptrue_b32 (), &dat->third); - svfloat32_t third = svdup_lane (coeffs, 0); svfloat32_t p1 = r; - svfloat32_t p2 = svmls_lane (third, r2, coeffs, 1); - svfloat32_t p3 = svmul_x (pg, r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0)); + svfloat32_t p2 = svmls_lane (sv_f32 (dat->third), r2, coeffs, 1); + svfloat32_t p3 + = svmul_x (svptrue_b32 (), r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0)); svfloat32_t p4 = svmla_lane (sv_f32 (dat->two_over_five), r2, coeffs, 2); p4 = svmls_x (pg, sv_f32 (dat->tenth), r2, p4); commit 7dc549c5a4af3c32689147550144397116404d22 Author: Yat Long Poon Date: Thu Feb 13 18:02:01 2025 +0000 AArch64: Improve codegen for SVE pow Move constants to struct. Improve memory access with indexed/unpredicated instructions. Eliminate register spills. Speedup on Neoverse V1: 24%. Reviewed-by: Wilco Dijkstra (cherry picked from commit 0b195651db3ae793187c7dd6d78b5a7a8da9d5e6) diff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c index 4c0bf8956c..4242d22a49 100644 --- a/sysdeps/aarch64/fpu/pow_sve.c +++ b/sysdeps/aarch64/fpu/pow_sve.c @@ -44,19 +44,18 @@ /* Data is defined in v_pow_log_data.c. */ #define N_LOG (1 << V_POW_LOG_TABLE_BITS) -#define A __v_pow_log_data.poly #define Off 0x3fe6955500000000 /* Data is defined in v_pow_exp_data.c. */ #define N_EXP (1 << V_POW_EXP_TABLE_BITS) #define SignBias (0x800 << V_POW_EXP_TABLE_BITS) -#define C __v_pow_exp_data.poly #define SmallExp 0x3c9 /* top12(0x1p-54). */ #define BigExp 0x408 /* top12(512.). */ #define ThresExp 0x03f /* BigExp - SmallExp. */ #define HugeExp 0x409 /* top12(1024.). */ /* Constants associated with pow. */ +#define SmallBoundX 0x1p-126 #define SmallPowX 0x001 /* top12(0x1p-126). */ #define BigPowX 0x7ff /* top12(INFINITY). */ #define ThresPowX 0x7fe /* BigPowX - SmallPowX. */ @@ -64,6 +63,31 @@ #define BigPowY 0x43e /* top12(0x1.749p62). */ #define ThresPowY 0x080 /* BigPowY - SmallPowY. */ +static const struct data +{ + double log_c0, log_c2, log_c4, log_c6, ln2_hi, ln2_lo; + double log_c1, log_c3, log_c5, off; + double n_over_ln2, exp_c2, ln2_over_n_hi, ln2_over_n_lo; + double exp_c0, exp_c1; +} data = { + .log_c0 = -0x1p-1, + .log_c1 = -0x1.555555555556p-1, + .log_c2 = 0x1.0000000000006p-1, + .log_c3 = 0x1.999999959554ep-1, + .log_c4 = -0x1.555555529a47ap-1, + .log_c5 = -0x1.2495b9b4845e9p0, + .log_c6 = 0x1.0002b8b263fc3p0, + .off = Off, + .exp_c0 = 0x1.fffffffffffd4p-2, + .exp_c1 = 0x1.5555571d6ef9p-3, + .exp_c2 = 0x1.5555576a5adcep-5, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .n_over_ln2 = 0x1.71547652b82fep0 * N_EXP, + .ln2_over_n_hi = 0x1.62e42fefc0000p-9, + .ln2_over_n_lo = -0x1.c610ca86c3899p-45, +}; + /* Check if x is an integer. */ static inline svbool_t sv_isint (svbool_t pg, svfloat64_t x) @@ -82,7 +106,7 @@ sv_isnotint (svbool_t pg, svfloat64_t x) static inline svbool_t sv_isodd (svbool_t pg, svfloat64_t x) { - svfloat64_t y = svmul_x (pg, x, 0.5); + svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5); return sv_isnotint (pg, y); } @@ -121,7 +145,7 @@ zeroinfnan (uint64_t i) static inline svbool_t sv_zeroinfnan (svbool_t pg, svuint64_t i) { - return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2), 1), + return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1), 2 * asuint64 (INFINITY) - 1); } @@ -174,16 +198,17 @@ sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2, additional 15 bits precision. IX is the bit representation of x, but normalized in the subnormal range using the sign bit for the exponent. */ static inline svfloat64_t -sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail) +sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail, + const struct data *d) { /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS), sv_u64 (N_LOG - 1)); svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52); - svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, sv_u64 (0xfffULL << 52))); + svuint64_t iz = svsub_x (pg, ix, svlsl_x (pg, svreinterpret_u64 (k), 52)); svfloat64_t z = svreinterpret_f64 (iz); svfloat64_t kd = svcvt_f64_x (pg, k); @@ -199,40 +224,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail) |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ svfloat64_t r = svmad_x (pg, z, invc, -1.0); /* k*Ln2 + log(c) + r. */ - svfloat64_t t1 = svmla_x (pg, logc, kd, __v_pow_log_data.ln2_hi); + + svfloat64_t ln2_hilo = svld1rq_f64 (svptrue_b64 (), &d->ln2_hi); + svfloat64_t t1 = svmla_lane_f64 (logc, kd, ln2_hilo, 0); svfloat64_t t2 = svadd_x (pg, t1, r); - svfloat64_t lo1 = svmla_x (pg, logctail, kd, __v_pow_log_data.ln2_lo); + svfloat64_t lo1 = svmla_lane_f64 (logctail, kd, ln2_hilo, 1); svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r); /* Evaluation is optimized assuming superscalar pipelined execution. */ - svfloat64_t ar = svmul_x (pg, r, -0.5); /* A[0] = -0.5. */ - svfloat64_t ar2 = svmul_x (pg, r, ar); - svfloat64_t ar3 = svmul_x (pg, r, ar2); + + svfloat64_t log_c02 = svld1rq_f64 (svptrue_b64 (), &d->log_c0); + svfloat64_t ar = svmul_lane_f64 (r, log_c02, 0); + svfloat64_t ar2 = svmul_x (svptrue_b64 (), r, ar); + svfloat64_t ar3 = svmul_x (svptrue_b64 (), r, ar2); /* k*Ln2 + log(c) + r + A[0]*r*r. */ svfloat64_t hi = svadd_x (pg, t2, ar2); - svfloat64_t lo3 = svmla_x (pg, svneg_x (pg, ar2), ar, r); + svfloat64_t lo3 = svmls_x (pg, ar2, ar, r); svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2); /* p = log1p(r) - r - A[0]*r*r. */ /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6])))). */ - svfloat64_t a56 = svmla_x (pg, sv_f64 (A[5]), r, A[6]); - svfloat64_t a34 = svmla_x (pg, sv_f64 (A[3]), r, A[4]); - svfloat64_t a12 = svmla_x (pg, sv_f64 (A[1]), r, A[2]); + + svfloat64_t log_c46 = svld1rq_f64 (svptrue_b64 (), &d->log_c4); + svfloat64_t a56 = svmla_lane_f64 (sv_f64 (d->log_c5), r, log_c46, 1); + svfloat64_t a34 = svmla_lane_f64 (sv_f64 (d->log_c3), r, log_c46, 0); + svfloat64_t a12 = svmla_lane_f64 (sv_f64 (d->log_c1), r, log_c02, 1); svfloat64_t p = svmla_x (pg, a34, ar2, a56); p = svmla_x (pg, a12, ar2, p); - p = svmul_x (pg, ar3, p); + p = svmul_x (svptrue_b64 (), ar3, p); svfloat64_t lo = svadd_x ( - pg, svadd_x (pg, svadd_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p); + pg, svadd_x (pg, svsub_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p); svfloat64_t y = svadd_x (pg, hi, lo); *tail = svadd_x (pg, svsub_x (pg, hi, y), lo); return y; } +static inline svfloat64_t +sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail, + svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits, + svuint64_t *ki, const struct data *d) +{ + /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ + /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ + svfloat64_t n_over_ln2_and_c2 = svld1rq_f64 (svptrue_b64 (), &d->n_over_ln2); + svfloat64_t z = svmul_lane_f64 (x, n_over_ln2_and_c2, 0); + /* z - kd is in [-1, 1] in non-nearest rounding modes. */ + svfloat64_t kd = svrinta_x (pg, z); + *ki = svreinterpret_u64 (svcvt_s64_x (pg, kd)); + + svfloat64_t ln2_over_n_hilo + = svld1rq_f64 (svptrue_b64 (), &d->ln2_over_n_hi); + svfloat64_t r = x; + r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 0); + r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 1); + /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ + r = svadd_x (pg, r, xtail); + /* 2^(k/N) ~= scale. */ + svuint64_t idx = svand_x (pg, *ki, N_EXP - 1); + svuint64_t top + = svlsl_x (pg, svadd_x (pg, *ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS); + /* This is only a valid scale when -1023*N < k < 1024*N. */ + *sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx); + *sbits = svadd_x (pg, *sbits, top); + /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + *tmp = svmla_lane_f64 (sv_f64 (d->exp_c1), r, n_over_ln2_and_c2, 1); + *tmp = svmla_x (pg, sv_f64 (d->exp_c0), r, *tmp); + *tmp = svmla_x (pg, r, r2, *tmp); + svfloat64_t scale = svreinterpret_f64 (*sbits); + /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there + is no spurious underflow here even without fma. */ + z = svmla_x (pg, scale, scale, *tmp); + return z; +} + /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */ static inline svfloat64_t sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail, - svuint64_t sign_bias) + svuint64_t sign_bias, const struct data *d) { /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow) and other cases of large values of x (scale * (1 + TMP) oflow). */ @@ -240,73 +310,46 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail, /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54). */ svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp); - /* Conditions special, uflow and oflow are all expressed as uoflow && - something, hence do not bother computing anything if no lane in uoflow is - true. */ - svbool_t special = svpfalse_b (); - svbool_t uflow = svpfalse_b (); - svbool_t oflow = svpfalse_b (); + svfloat64_t tmp; + svuint64_t sbits, ki; if (__glibc_unlikely (svptest_any (pg, uoflow))) { + svfloat64_t z + = sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d); + /* |x| is tiny (|x| <= 0x1p-54). */ - uflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000); + svbool_t uflow + = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000); uflow = svand_z (pg, uoflow, uflow); /* |x| is huge (|x| >= 1024). */ - oflow = svcmpge (pg, abstop, HugeExp); + svbool_t oflow = svcmpge (pg, abstop, HugeExp); oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow)); + /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow - or underflow. */ - special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); + or underflow. */ + svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); + + /* Update result with special and large cases. */ + z = sv_call_specialcase (tmp, sbits, ki, z, special); + + /* Handle underflow and overflow. */ + svbool_t x_is_neg = svcmplt (pg, x, 0); + svuint64_t sign_mask + = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS); + svfloat64_t res_uoflow + = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY)); + res_uoflow = svreinterpret_f64 ( + svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask)); + /* Avoid spurious underflow for tiny x. */ + svfloat64_t res_spurious_uflow + = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); + + z = svsel (oflow, res_uoflow, z); + z = svsel (uflow, res_spurious_uflow, z); + return z; } - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ - svfloat64_t z = svmul_x (pg, x, __v_pow_exp_data.n_over_ln2); - /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - svfloat64_t shift = sv_f64 (__v_pow_exp_data.shift); - svfloat64_t kd = svadd_x (pg, z, shift); - svuint64_t ki = svreinterpret_u64 (kd); - kd = svsub_x (pg, kd, shift); - svfloat64_t r = x; - r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_hi); - r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_lo); - /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ - r = svadd_x (pg, r, xtail); - /* 2^(k/N) ~= scale. */ - svuint64_t idx = svand_x (pg, ki, N_EXP - 1); - svuint64_t top - = svlsl_x (pg, svadd_x (pg, ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - svuint64_t sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx); - sbits = svadd_x (pg, sbits, top); - /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t tmp = svmla_x (pg, sv_f64 (C[1]), r, C[2]); - tmp = svmla_x (pg, sv_f64 (C[0]), r, tmp); - tmp = svmla_x (pg, r, r2, tmp); - svfloat64_t scale = svreinterpret_f64 (sbits); - /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there - is no spurious underflow here even without fma. */ - z = svmla_x (pg, scale, scale, tmp); - - /* Update result with special and large cases. */ - if (__glibc_unlikely (svptest_any (pg, special))) - z = sv_call_specialcase (tmp, sbits, ki, z, special); - - /* Handle underflow and overflow. */ - svuint64_t sign_bit = svlsr_x (pg, svreinterpret_u64 (x), 63); - svbool_t x_is_neg = svcmpne (pg, sign_bit, 0); - svuint64_t sign_mask = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS); - svfloat64_t res_uoflow = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY)); - res_uoflow = svreinterpret_f64 ( - svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask)); - z = svsel (oflow, res_uoflow, z); - /* Avoid spurious underflow for tiny x. */ - svfloat64_t res_spurious_uflow - = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); - z = svsel (uflow, res_spurious_uflow, z); - - return z; + return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d); } static inline double @@ -341,47 +384,39 @@ pow_sc (double x, double y) svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + /* This preamble handles special case conditions used in the final scalar fallbacks. It also updates ix and sign_bias, that are used in the core computation too, i.e., exp( y * log (x) ). */ svuint64_t vix0 = svreinterpret_u64 (x); svuint64_t viy0 = svreinterpret_u64 (y); - svuint64_t vtopx0 = svlsr_x (svptrue_b64 (), vix0, 52); /* Negative x cases. */ - svuint64_t sign_bit = svlsr_m (pg, vix0, 63); - svbool_t xisneg = svcmpeq (pg, sign_bit, 1); + svbool_t xisneg = svcmplt (pg, x, 0); /* Set sign_bias and ix depending on sign of x and nature of y. */ - svbool_t yisnotint_xisneg = svpfalse_b (); + svbool_t yint_or_xpos = pg; svuint64_t sign_bias = sv_u64 (0); svuint64_t vix = vix0; - svuint64_t vtopx1 = vtopx0; if (__glibc_unlikely (svptest_any (pg, xisneg))) { /* Determine nature of y. */ - yisnotint_xisneg = sv_isnotint (xisneg, y); - svbool_t yisint_xisneg = sv_isint (xisneg, y); + yint_or_xpos = sv_isint (xisneg, y); svbool_t yisodd_xisneg = sv_isodd (xisneg, y); /* ix set to abs(ix) if y is integer. */ - vix = svand_m (yisint_xisneg, vix0, 0x7fffffffffffffff); - vtopx1 = svand_m (yisint_xisneg, vtopx0, 0x7ff); + vix = svand_m (yint_or_xpos, vix0, 0x7fffffffffffffff); /* Set to SignBias if x is negative and y is odd. */ sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0)); } - /* Special cases of x or y: zero, inf and nan. */ - svbool_t xspecial = sv_zeroinfnan (pg, vix0); - svbool_t yspecial = sv_zeroinfnan (pg, viy0); - svbool_t special = svorr_z (pg, xspecial, yspecial); - /* Small cases of x: |x| < 0x1p-126. */ - svuint64_t vabstopx0 = svand_x (pg, vtopx0, 0x7ff); - svbool_t xsmall = svcmplt (pg, vabstopx0, SmallPowX); - if (__glibc_unlikely (svptest_any (pg, xsmall))) + svbool_t xsmall = svaclt (yint_or_xpos, x, SmallBoundX); + if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall))) { /* Normalize subnormal x so exponent becomes negative. */ - svbool_t topx_is_null = svcmpeq (xsmall, vtopx1, 0); + svuint64_t vtopx = svlsr_x (svptrue_b64 (), vix, 52); + svbool_t topx_is_null = svcmpeq (xsmall, vtopx, 0); svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52)); vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff); @@ -391,20 +426,24 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) /* y_hi = log(ix, &y_lo). */ svfloat64_t vlo; - svfloat64_t vhi = sv_log_inline (pg, vix, &vlo); + svfloat64_t vhi = sv_log_inline (yint_or_xpos, vix, &vlo, d); /* z = exp(y_hi, y_lo, sign_bias). */ - svfloat64_t vehi = svmul_x (pg, y, vhi); - svfloat64_t velo = svmul_x (pg, y, vlo); - svfloat64_t vemi = svmls_x (pg, vehi, y, vhi); - velo = svsub_x (pg, velo, vemi); - svfloat64_t vz = sv_exp_inline (pg, vehi, velo, sign_bias); + svfloat64_t vehi = svmul_x (svptrue_b64 (), y, vhi); + svfloat64_t vemi = svmls_x (yint_or_xpos, vehi, y, vhi); + svfloat64_t velo = svnmls_x (yint_or_xpos, vemi, y, vlo); + svfloat64_t vz = sv_exp_inline (yint_or_xpos, vehi, velo, sign_bias, d); /* Cases of finite y and finite negative x. */ - vz = svsel (yisnotint_xisneg, sv_f64 (__builtin_nan ("")), vz); + vz = svsel (yint_or_xpos, vz, sv_f64 (__builtin_nan (""))); + + /* Special cases of x or y: zero, inf and nan. */ + svbool_t xspecial = sv_zeroinfnan (svptrue_b64 (), vix0); + svbool_t yspecial = sv_zeroinfnan (svptrue_b64 (), viy0); + svbool_t special = svorr_z (svptrue_b64 (), xspecial, yspecial); /* Cases of zero/inf/nan x or y. */ - if (__glibc_unlikely (svptest_any (pg, special))) + if (__glibc_unlikely (svptest_any (svptrue_b64 (), special))) vz = sv_call2_f64 (pow_sc, x, y, vz, special); return vz; commit 06fd8ad78f35a6cc65dc7c6c08ce55faf6ad079d Author: Yat Long Poon Date: Thu Feb 13 18:03:04 2025 +0000 AArch64: Improve codegen for SVE powf Improve memory access with indexed/unpredicated instructions. Eliminate register spills. Speedup on Neoverse V1: 3%. Reviewed-by: Wilco Dijkstra (cherry picked from commit 95e807209b680257a9afe81a507754f1565dbb4d) diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c index 4f6a142325..08d7019a18 100644 --- a/sysdeps/aarch64/fpu/powf_sve.c +++ b/sysdeps/aarch64/fpu/powf_sve.c @@ -26,7 +26,6 @@ #define Tlogc __v_powf_data.logc #define Texp __v_powf_data.scale #define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11)) -#define Shift 0x1.8p52 #define Norm 0x1p23f /* 0x4b000000. */ /* Overall ULP error bound for pow is 2.6 ulp @@ -36,7 +35,7 @@ static const struct data double log_poly[4]; double exp_poly[3]; float uflow_bound, oflow_bound, small_bound; - uint32_t sign_bias, sign_mask, subnormal_bias, off; + uint32_t sign_bias, subnormal_bias, off; } data = { /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of V_POWF_EXP2_N. */ @@ -53,7 +52,6 @@ static const struct data .small_bound = 0x1p-126f, .off = 0x3f35d000, .sign_bias = SignBias, - .sign_mask = 0x80000000, .subnormal_bias = 0x0b800000, /* 23 << 23. */ }; @@ -86,7 +84,7 @@ svisodd (svbool_t pg, svfloat32_t x) static inline svbool_t sv_zeroinfnan (svbool_t pg, svuint32_t i) { - return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1), + return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1), 2u * 0x7f800000 - 1); } @@ -150,9 +148,14 @@ powf_specialcase (float x, float y, float z) } /* Scalar fallback for special case routines with custom signature. */ -static inline svfloat32_t -sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp) +static svfloat32_t NOINLINE +sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y) { + /* Special cases of x or y: zero, inf and nan. */ + svbool_t xspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x1)); + svbool_t yspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x2)); + svbool_t cmp = svorr_z (svptrue_b32 (), xspecial, yspecial); + svbool_t p = svpfirst (cmp, svpfalse ()); while (svptest_any (cmp, p)) { @@ -182,30 +185,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k, /* Polynomial to approximate log1p(r)/ln2. */ svfloat64_t logx = A (0); - logx = svmla_x (pg, A (1), r, logx); - logx = svmla_x (pg, A (2), r, logx); - logx = svmla_x (pg, A (3), r, logx); - logx = svmla_x (pg, y0, r, logx); + logx = svmad_x (pg, r, logx, A (1)); + logx = svmad_x (pg, r, logx, A (2)); + logx = svmad_x (pg, r, logx, A (3)); + logx = svmad_x (pg, r, logx, y0); *pylogx = svmul_x (pg, y, logx); /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - svfloat64_t kd = svadd_x (pg, *pylogx, Shift); - svuint64_t ki = svreinterpret_u64 (kd); - kd = svsub_x (pg, kd, Shift); + svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx); + svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd)); r = svsub_x (pg, *pylogx, kd); /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */ - svuint64_t t - = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1)); - svuint64_t ski = svadd_x (pg, ki, sign_bias); - t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS)); + svuint64_t t = svld1_gather_index ( + svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1)); + svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias); + t = svadd_x (svptrue_b64 (), t, + svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS)); svfloat64_t s = svreinterpret_f64 (t); svfloat64_t p = C (0); p = svmla_x (pg, C (1), p, r); p = svmla_x (pg, C (2), p, r); - p = svmla_x (pg, s, p, svmul_x (pg, s, r)); + p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r)); return p; } @@ -219,19 +222,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, { const svbool_t ptrue = svptrue_b64 (); - /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in - order to perform core computation in double precision. */ + /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two + * in order to perform core computation in double precision. */ const svbool_t pg_lo = svunpklo (pg); const svbool_t pg_hi = svunpkhi (pg); - svfloat64_t y_lo = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); - svfloat64_t y_hi = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); - svfloat32_t z = svreinterpret_f32 (iz); - svfloat64_t z_lo = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z)))); - svfloat64_t z_hi = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z)))); + svfloat64_t y_lo + = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); + svfloat64_t y_hi + = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); + svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz))); + svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz))); svuint64_t i_lo = svunpklo (i); svuint64_t i_hi = svunpkhi (i); svint64_t k_lo = svunpklo (k); @@ -258,9 +258,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, /* Implementation of SVE powf. Provides the same accuracy as AdvSIMD powf, since it relies on the same algorithm. The theoretical maximum error is under 2.60 ULPs. - Maximum measured error is 2.56 ULPs: - SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127 - want 0x1.fd4b06p+127. */ + Maximum measured error is 2.57 ULPs: + SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127 + want 0x1.fff862p+127. */ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) { const struct data *d = ptr_barrier (&data); @@ -269,21 +269,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) svuint32_t viy0 = svreinterpret_u32 (y); /* Negative x cases. */ - svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask); - svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask); + svbool_t xisneg = svcmplt (pg, x, sv_f32 (0)); /* Set sign_bias and ix depending on sign of x and nature of y. */ - svbool_t yisnotint_xisneg = svpfalse_b (); + svbool_t yint_or_xpos = pg; svuint32_t sign_bias = sv_u32 (0); svuint32_t vix = vix0; if (__glibc_unlikely (svptest_any (pg, xisneg))) { /* Determine nature of y. */ - yisnotint_xisneg = svisnotint (xisneg, y); - svbool_t yisint_xisneg = svisint (xisneg, y); + yint_or_xpos = svisint (xisneg, y); svbool_t yisodd_xisneg = svisodd (xisneg, y); /* ix set to abs(ix) if y is integer. */ - vix = svand_m (yisint_xisneg, vix0, 0x7fffffff); + vix = svand_m (yint_or_xpos, vix0, 0x7fffffff); /* Set to SignBias if x is negative and y is odd. */ sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0)); } @@ -294,8 +292,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) svbool_t cmp = svorr_z (pg, xspecial, yspecial); /* Small cases of x: |x| < 0x1p-126. */ - svbool_t xsmall = svaclt (pg, x, d->small_bound); - if (__glibc_unlikely (svptest_any (pg, xsmall))) + svbool_t xsmall = svaclt (yint_or_xpos, x, d->small_bound); + if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall))) { /* Normalize subnormal x so exponent becomes negative. */ svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm)); @@ -304,32 +302,35 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) vix = svsel (xsmall, vix_norm, vix); } /* Part of core computation carried in working precision. */ - svuint32_t tmp = svsub_x (pg, vix, d->off); - svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)), - V_POWF_LOG2_N - 1); - svuint32_t top = svand_x (pg, tmp, 0xff800000); - svuint32_t iz = svsub_x (pg, vix, top); - svint32_t k - = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS)); - - /* Compute core in extended precision and return intermediate ylogx results to - handle cases of underflow and underflow in exp. */ + svuint32_t tmp = svsub_x (yint_or_xpos, vix, d->off); + svuint32_t i = svand_x ( + yint_or_xpos, svlsr_x (yint_or_xpos, tmp, (23 - V_POWF_LOG2_TABLE_BITS)), + V_POWF_LOG2_N - 1); + svuint32_t top = svand_x (yint_or_xpos, tmp, 0xff800000); + svuint32_t iz = svsub_x (yint_or_xpos, vix, top); + svint32_t k = svasr_x (yint_or_xpos, svreinterpret_s32 (top), + (23 - V_POWF_EXP2_TABLE_BITS)); + + /* Compute core in extended precision and return intermediate ylogx results + * to handle cases of underflow and underflow in exp. */ svfloat32_t ylogx; - svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d); + svfloat32_t ret + = sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d); /* Handle exp special cases of underflow and overflow. */ - svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS); + svuint32_t sign + = svlsl_x (yint_or_xpos, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS); svfloat32_t ret_oflow - = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY))); + = svreinterpret_f32 (svorr_x (yint_or_xpos, sign, asuint (INFINITY))); svfloat32_t ret_uflow = svreinterpret_f32 (sign); - ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret); - ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret); + ret = svsel (svcmple (yint_or_xpos, ylogx, d->uflow_bound), ret_uflow, ret); + ret = svsel (svcmpgt (yint_or_xpos, ylogx, d->oflow_bound), ret_oflow, ret); /* Cases of finite y and finite negative x. */ - ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret); + ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf (""))); - if (__glibc_unlikely (svptest_any (pg, cmp))) - return sv_call_powf_sc (x, y, ret, cmp); + if (__glibc_unlikely (svptest_any (cmp, cmp))) + return sv_call_powf_sc (x, y, ret); return ret; } commit fd9a3a36fdcf14d1678c469e8b9033a46aa6c6fb Author: Wilco Dijkstra Date: Thu Feb 27 20:34:34 2025 +0000 Revert "AArch64: Add vector logp1 alias for log1p" This reverts commit a991a0fc7c051d7ef2ea7778e0a699f22d4e53d7. diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index 5019e8e25c..08a41c46ad 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -253,17 +253,6 @@ #define __DECL_SIMD_log1pf64x #define __DECL_SIMD_log1pf128x -#define __DECL_SIMD_logp1 -#define __DECL_SIMD_logp1f -#define __DECL_SIMD_logp1l -#define __DECL_SIMD_logp1f16 -#define __DECL_SIMD_logp1f32 -#define __DECL_SIMD_logp1f64 -#define __DECL_SIMD_logp1f128 -#define __DECL_SIMD_logp1f32x -#define __DECL_SIMD_logp1f64x -#define __DECL_SIMD_logp1f128x - #define __DECL_SIMD_atanh #define __DECL_SIMD_atanhf #define __DECL_SIMD_atanhl diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 92856becc4..6cb594b6ff 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -126,7 +126,7 @@ __MATHCALL (log2p1,, (_Mdouble_ __x)); __MATHCALL (log10p1,, (_Mdouble_ __x)); /* Return log(1 + X). */ -__MATHCALL_VEC (logp1,, (_Mdouble_ __x)); +__MATHCALL (logp1,, (_Mdouble_ __x)); #endif #if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions index 015211f5f4..cc15ce2d1e 100644 --- a/sysdeps/aarch64/fpu/Versions +++ b/sysdeps/aarch64/fpu/Versions @@ -135,11 +135,4 @@ libmvec { _ZGVsMxv_tanh; _ZGVsMxv_tanhf; } - GLIBC_2.41 { - _ZGVnN2v_logp1; - _ZGVnN2v_logp1f; - _ZGVnN4v_logp1f; - _ZGVsMxv_logp1; - _ZGVsMxv_logp1f; - } } diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h index 5909bb4ce9..097d403ffe 100644 --- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h +++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h @@ -36,7 +36,6 @@ libmvec_hidden_proto (V_NAME_F2(hypot)); libmvec_hidden_proto (V_NAME_F1(log10)); libmvec_hidden_proto (V_NAME_F1(log1p)); libmvec_hidden_proto (V_NAME_F1(log2)); -libmvec_hidden_proto (V_NAME_F1(logp1)); libmvec_hidden_proto (V_NAME_F1(log)); libmvec_hidden_proto (V_NAME_F2(pow)); libmvec_hidden_proto (V_NAME_F1(sin)); diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h index f295fe185d..7484150131 100644 --- a/sysdeps/aarch64/fpu/bits/math-vector.h +++ b/sysdeps/aarch64/fpu/bits/math-vector.h @@ -113,10 +113,6 @@ # define __DECL_SIMD_log2 __DECL_SIMD_aarch64 # undef __DECL_SIMD_log2f # define __DECL_SIMD_log2f __DECL_SIMD_aarch64 -# undef __DECL_SIMD_logp1 -# define __DECL_SIMD_logp1 __DECL_SIMD_aarch64 -# undef __DECL_SIMD_logp1f -# define __DECL_SIMD_logp1f __DECL_SIMD_aarch64 # undef __DECL_SIMD_pow # define __DECL_SIMD_pow __DECL_SIMD_aarch64 # undef __DECL_SIMD_powf @@ -184,7 +180,6 @@ __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t); -__vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t); @@ -212,7 +207,6 @@ __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t); -__vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t); __vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t); @@ -245,7 +239,6 @@ __sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log10f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log1pf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t); -__sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t); @@ -273,7 +266,6 @@ __sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log10 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log1p (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t); -__sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t); diff --git a/sysdeps/aarch64/fpu/log1p_advsimd.c b/sysdeps/aarch64/fpu/log1p_advsimd.c index 1263587201..9d18578ce6 100644 --- a/sysdeps/aarch64/fpu/log1p_advsimd.c +++ b/sysdeps/aarch64/fpu/log1p_advsimd.c @@ -58,5 +58,3 @@ VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x) return log1p_inline (x, &d->d); } - -strong_alias (V_NAME_D1 (log1p), V_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c index b21cfb2c90..04f7e5720e 100644 --- a/sysdeps/aarch64/fpu/log1p_sve.c +++ b/sysdeps/aarch64/fpu/log1p_sve.c @@ -116,5 +116,3 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) return y; } - -strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c index 00006fc703..f2d47962fe 100644 --- a/sysdeps/aarch64/fpu/log1pf_advsimd.c +++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c @@ -93,6 +93,3 @@ VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) libmvec_hidden_def (V_NAME_F1 (log1p)) HALF_WIDTH_ALIAS_F1 (log1p) -strong_alias (V_NAME_F1 (log1p), V_NAME_F1 (logp1)) -libmvec_hidden_def (V_NAME_F1 (logp1)) -HALF_WIDTH_ALIAS_F1 (logp1) diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c index 18a185c838..4f17c44e2d 100644 --- a/sysdeps/aarch64/fpu/log1pf_sve.c +++ b/sysdeps/aarch64/fpu/log1pf_sve.c @@ -42,5 +42,3 @@ svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg) return sv_log1pf_inline (x, pg); } - -strong_alias (SV_NAME_F1 (log1p), SV_NAME_F1 (logp1)) diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist index 98687cae0d..b685106954 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist @@ -128,8 +128,3 @@ GLIBC_2.40 _ZGVsMxvv_hypot F GLIBC_2.40 _ZGVsMxvv_hypotf F GLIBC_2.40 _ZGVsMxvv_pow F GLIBC_2.40 _ZGVsMxvv_powf F -GLIBC_2.41 _ZGVnN2v_logp1 F -GLIBC_2.41 _ZGVnN2v_logp1f F -GLIBC_2.41 _ZGVnN4v_logp1f F -GLIBC_2.41 _ZGVsMxv_logp1 F -GLIBC_2.41 _ZGVsMxv_logp1f F commit 64896b7d329809127035fde42768a6f7eeffed75 Author: Wilco Dijkstra Date: Wed Aug 7 14:43:47 2024 +0100 AArch64: Improve generic strlen Improve performance by handling another 16 bytes before entering the loop. Use ADDHN in the loop to avoid SHRN+FMOV when it terminates. Change final size computation to avoid increasing latency. On Neoverse V1 performance of the random strlen benchmark improves by 4.6%. Reviewed-by: Adhemerval Zanella (cherry picked from commit 3dc426b642dcafdbc11a99f2767e081d086f5fc7) diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S index ab2a576cdb..352fb40d3a 100644 --- a/sysdeps/aarch64/strlen.S +++ b/sysdeps/aarch64/strlen.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2024 Free Software Foundation, Inc. +/* Generic optimized strlen using SIMD. + Copyright (C) 2012-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -56,36 +57,50 @@ ENTRY (STRLEN) shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov synd, dend lsr synd, synd, shift - cbz synd, L(loop) + cbz synd, L(next16) rbit synd, synd clz result, synd lsr result, result, 2 ret +L(next16): + ldr data, [src, 16] + cmeq vhas_nul.16b, vdata.16b, 0 + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ + fmov synd, dend + cbz synd, L(loop) + add src, src, 16 +#ifndef __AARCH64EB__ + rbit synd, synd +#endif + sub result, src, srcin + clz tmp, synd + add result, result, tmp, lsr 2 + ret + .p2align 5 L(loop): - ldr data, [src, 16] + ldr data, [src, 32]! cmeq vhas_nul.16b, vdata.16b, 0 - umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + addhn vend.8b, vhas_nul.8h, vhas_nul.8h fmov synd, dend cbnz synd, L(loop_end) - ldr data, [src, 32]! + ldr data, [src, 16] cmeq vhas_nul.16b, vdata.16b, 0 - umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + addhn vend.8b, vhas_nul.8h, vhas_nul.8h fmov synd, dend cbz synd, L(loop) - sub src, src, 16 + add src, src, 16 L(loop_end): - shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ - sub result, src, srcin - fmov synd, dend + sub result, shift, src, lsl 2 /* (srcin - src) << 2. */ #ifndef __AARCH64EB__ rbit synd, synd + sub result, result, 3 #endif - add result, result, 16 clz tmp, synd - add result, result, tmp, lsr 2 + sub result, tmp, result + lsr result, result, 2 ret END (STRLEN) commit 544fb349d35efd5f86ed7e482759ff21496a32fd Author: Wilco Dijkstra Date: Mon Sep 9 15:26:47 2024 +0100 AArch64: Optimize memset Improve small memsets by avoiding branches and use overlapping stores. Use DC ZVA for copies over 128 bytes. Remove unnecessary code for ZVA sizes other than 64 and 128. Performance of random memset benchmark improves by 24% on Neoverse N1. Reviewed-by: Adhemerval Zanella (cherry picked from commit cec3aef32412779e207f825db0d057ebb4628ae8) diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index 7ef77ee8c9..caafb019e2 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2024 Free Software Foundation, Inc. +/* Generic optimized memset using SIMD. + Copyright (C) 2012-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -17,7 +18,6 @@ . */ #include -#include "memset-reg.h" #ifndef MEMSET # define MEMSET memset @@ -25,130 +25,132 @@ /* Assumptions: * - * ARMv8-a, AArch64, unaligned accesses + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. * */ -ENTRY (MEMSET) +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define zva_val x5 +#define off x3 +#define dstend2 x5 +ENTRY (MEMSET) PTR_ARG (0) SIZE_ARG (2) dup v0.16B, valw + cmp count, 16 + b.lo L(set_small) + add dstend, dstin, count + cmp count, 64 + b.hs L(set_128) - cmp count, 96 - b.hi L(set_long) - cmp count, 16 - b.hs L(set_medium) - mov val, v0.D[0] + /* Set 16..63 bytes. */ + mov off, 16 + and off, off, count, lsr 1 + sub dstend2, dstend, off + str q0, [dstin] + str q0, [dstin, off] + str q0, [dstend2, -16] + str q0, [dstend, -16] + ret + .p2align 4 /* Set 0..15 bytes. */ - tbz count, 3, 1f - str val, [dstin] - str val, [dstend, -8] - ret - nop -1: tbz count, 2, 2f - str valw, [dstin] - str valw, [dstend, -4] +L(set_small): + add dstend, dstin, count + cmp count, 4 + b.lo 2f + lsr off, count, 3 + sub dstend2, dstend, off, lsl 2 + str s0, [dstin] + str s0, [dstin, off, lsl 2] + str s0, [dstend2, -4] + str s0, [dstend, -4] ret + + /* Set 0..3 bytes. */ 2: cbz count, 3f + lsr off, count, 1 strb valw, [dstin] - tbz count, 1, 3f - strh valw, [dstend, -2] + strb valw, [dstin, off] + strb valw, [dstend, -1] 3: ret - /* Set 17..96 bytes. */ -L(set_medium): - str q0, [dstin] - tbnz count, 6, L(set96) - str q0, [dstend, -16] - tbz count, 5, 1f - str q0, [dstin, 16] - str q0, [dstend, -32] -1: ret - .p2align 4 - /* Set 64..96 bytes. Write 64 bytes from the start and - 32 bytes from the end. */ -L(set96): - str q0, [dstin, 16] +L(set_128): + bic dst, dstin, 15 + cmp count, 128 + b.hi L(set_long) + stp q0, q0, [dstin] stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret - .p2align 3 - nop + .p2align 4 L(set_long): - and valw, valw, 255 - bic dst, dstin, 15 str q0, [dstin] - cmp count, 256 - ccmp valw, 0, 0, cs - b.eq L(try_zva) -L(no_zva): - sub count, dstend, dst /* Count is 16 too large. */ - sub dst, dst, 16 /* Dst is biased by -32. */ - sub count, count, 64 + 16 /* Adjust count and bias for loop. */ -1: stp q0, q0, [dst, 32] - stp q0, q0, [dst, 64]! -L(tail64): - subs count, count, 64 - b.hi 1b -2: stp q0, q0, [dstend, -64] + str q0, [dst, 16] + tst valw, 255 + b.ne L(no_zva) +#ifndef ZVA64_ONLY + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne L(zva_128) +#endif + stp q0, q0, [dst, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 64 + 64 /* Adjust count and bias for loop. */ + + /* Write last bytes before ZVA loop. */ + stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] + + .p2align 4 +L(zva64_loop): + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi L(zva64_loop) ret -L(try_zva): -#ifndef ZVA64_ONLY .p2align 3 - mrs tmp1, dczid_el0 - tbnz tmp1w, 4, L(no_zva) - and tmp1w, tmp1w, 15 - cmp tmp1w, 4 /* ZVA size is 64 bytes. */ - b.ne L(zva_128) - nop -#endif - /* Write the first and last 64 byte aligned block using stp rather - than using DC ZVA. This is faster on some cores. - */ - .p2align 4 -L(zva_64): - str q0, [dst, 16] +L(no_zva): + sub count, dstend, dst /* Count is 32 too large. */ + sub count, count, 64 + 32 /* Adjust count and bias for loop. */ +L(no_zva_loop): stp q0, q0, [dst, 32] - bic dst, dst, 63 stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+64+64 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst add dst, dst, 64 subs count, count, 64 - b.hi 1b - stp q0, q0, [dst, 0] - stp q0, q0, [dst, 32] + b.hi L(no_zva_loop) stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret #ifndef ZVA64_ONLY - .p2align 3 + .p2align 4 L(zva_128): - cmp tmp1w, 5 /* ZVA size is 128 bytes. */ - b.ne L(zva_other) + cmp zva_val, 5 /* ZVA size is 128 bytes. */ + b.ne L(no_zva) - str q0, [dst, 16] stp q0, q0, [dst, 32] stp q0, q0, [dst, 64] stp q0, q0, [dst, 96] bic dst, dst, 127 sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+128 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst - add dst, dst, 128 + sub count, count, 128 + 128 /* Adjust count and bias for loop. */ +1: add dst, dst, 128 + dc zva, dst subs count, count, 128 b.hi 1b stp q0, q0, [dstend, -128] @@ -156,35 +158,6 @@ L(zva_128): stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret - -L(zva_other): - mov tmp2w, 4 - lsl zva_lenw, tmp2w, tmp1w - add tmp1, zva_len, 64 /* Max alignment bytes written. */ - cmp count, tmp1 - blo L(no_zva) - - sub tmp2, zva_len, 1 - add tmp1, dst, zva_len - add dst, dst, 16 - subs count, tmp1, dst /* Actual alignment bytes to write. */ - bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ - beq 2f -1: stp q0, q0, [dst], 64 - stp q0, q0, [dst, -32] - subs count, count, 64 - b.hi 1b -2: mov dst, tmp1 - sub count, dstend, tmp1 /* Remaining bytes to write. */ - subs count, count, zva_len - b.lo 4f -3: dc zva, dst - add dst, dst, zva_len - subs count, count, zva_len - b.hs 3b -4: add count, count, zva_len - sub dst, dst, 32 /* Bias dst for tail loop. */ - b L(tail64) #endif END (MEMSET) commit 41eb2f8b5847079caca90a74659456adbb80ec29 Author: Wilco Dijkstra Date: Mon Nov 25 18:43:08 2024 +0000 AArch64: Remove zva_128 from memset Remove ZVA 128 support from memset - the new memset no longer guarantees count >= 256, which can result in underflow and a crash if ZVA size is 128 ([1]). Since only one CPU uses a ZVA size of 128 and its memcpy implementation was removed in commit e162ab2bf1b82c40f29e1925986582fa07568ce8, remove this special case too. [1] https://sourceware.org/pipermail/libc-alpha/2024-November/161626.html Reviewed-by: Andrew Pinski (cherry picked from commit a08d9a52f967531a77e1824c23b5368c6434a72d) diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index caafb019e2..71814d0b2f 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -104,7 +104,7 @@ L(set_long): mrs zva_val, dczid_el0 and zva_val, zva_val, 31 cmp zva_val, 4 /* ZVA size is 64 bytes. */ - b.ne L(zva_128) + b.ne L(no_zva) #endif stp q0, q0, [dst, 32] bic dst, dstin, 63 @@ -137,28 +137,5 @@ L(no_zva_loop): stp q0, q0, [dstend, -32] ret -#ifndef ZVA64_ONLY - .p2align 4 -L(zva_128): - cmp zva_val, 5 /* ZVA size is 128 bytes. */ - b.ne L(no_zva) - - stp q0, q0, [dst, 32] - stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - bic dst, dst, 127 - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128 + 128 /* Adjust count and bias for loop. */ -1: add dst, dst, 128 - dc zva, dst - subs count, count, 128 - b.hi 1b - stp q0, q0, [dstend, -128] - stp q0, q0, [dstend, -96] - stp q0, q0, [dstend, -64] - stp q0, q0, [dstend, -32] - ret -#endif - END (MEMSET) libc_hidden_builtin_def (MEMSET) commit 27fa0268ead054810a5e2669d0b5bb88ceb05b05 Author: Wilco Dijkstra Date: Wed Jul 24 15:17:47 2024 +0100 math: Improve layout of expf data GCC aligns global data to 16 bytes if their size is >= 16 bytes. This patch changes the exp2f_data struct slightly so that the fields are better aligned. As a result on targets that support them, load-pair instructions accessing poly_scaled and invln2_scaled are now 16-byte aligned. Reviewed-by: Adhemerval Zanella (cherry picked from commit 44fa9c1080fe6a9539f0d2345b9d2ae37b8ee57a) diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h index 729f22cd4f..dc07ebd459 100644 --- a/sysdeps/ieee754/flt-32/math_config.h +++ b/sysdeps/ieee754/flt-32/math_config.h @@ -166,9 +166,9 @@ extern const struct exp2f_data uint64_t tab[1 << EXP2F_TABLE_BITS]; double shift_scaled; double poly[EXP2F_POLY_ORDER]; - double shift; double invln2_scaled; double poly_scaled[EXP2F_POLY_ORDER]; + double shift; } __exp2f_data attribute_hidden; #define LOGF_TABLE_BITS 4 commit 7038970f1f485fb660606f0c596f432fdef250f6 Author: Wilco Dijkstra Date: Tue Dec 24 18:01:59 2024 +0000 AArch64: Add SVE memset Add SVE memset based on the generic memset with predicated load for sizes < 16. Unaligned memsets of 128-1024 are improved by ~20% on average by using aligned stores for the last 64 bytes. Performance of random memset benchmark improves by ~2% on Neoverse V1. Reviewed-by: Yury Khrustalev (cherry picked from commit 163b1bbb76caba4d9673c07940c5930a1afa7548) diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index 3e251cc234..6880ebc035 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -16,6 +16,7 @@ sysdep_routines += \ memset_kunpeng \ memset_mops \ memset_oryon1 \ + memset_sve_zva64 \ memset_zva64 \ strlen_asimd \ strlen_generic \ diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index b2fda541f9..1f101a719b 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -61,6 +61,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) #if HAVE_AARCH64_SVE_ASM IFUNC_IMPL_ADD (array, i, memset, sve && !bti && zva_size == 256, __memset_a64fx) + IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 64, __memset_sve_zva64) #endif IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic)) diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c index bd063c16c9..4f65295e77 100644 --- a/sysdeps/aarch64/multiarch/memset.c +++ b/sysdeps/aarch64/multiarch/memset.c @@ -36,6 +36,7 @@ extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden; extern __typeof (__redirect_memset) __memset_generic attribute_hidden; extern __typeof (__redirect_memset) __memset_mops attribute_hidden; extern __typeof (__redirect_memset) __memset_oryon1 attribute_hidden; +extern __typeof (__redirect_memset) __memset_sve_zva64 attribute_hidden; static inline __typeof (__redirect_memset) * select_memset_ifunc (void) @@ -49,6 +50,9 @@ select_memset_ifunc (void) { if (IS_A64FX (midr) && zva_size == 256) return __memset_a64fx; + + if (zva_size == 64) + return __memset_sve_zva64; } if (IS_ORYON1 (midr) && zva_size == 64) diff --git a/sysdeps/aarch64/multiarch/memset_sve_zva64.S b/sysdeps/aarch64/multiarch/memset_sve_zva64.S new file mode 100644 index 0000000000..7fb40fdd9e --- /dev/null +++ b/sysdeps/aarch64/multiarch/memset_sve_zva64.S @@ -0,0 +1,123 @@ +/* Optimized memset for SVE. + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +/* Assumptions: + * + * ARMv8-a, AArch64, Advanced SIMD, SVE, unaligned accesses. + * ZVA size is 64. + */ + +#if HAVE_AARCH64_SVE_ASM + +.arch armv8.2-a+sve + +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define zva_val x5 +#define vlen x5 +#define off x3 +#define dstend2 x5 + +ENTRY (__memset_sve_zva64) + dup v0.16B, valw + cmp count, 16 + b.lo L(set_16) + + add dstend, dstin, count + cmp count, 64 + b.hs L(set_128) + + /* Set 16..63 bytes. */ + mov off, 16 + and off, off, count, lsr 1 + sub dstend2, dstend, off + str q0, [dstin] + str q0, [dstin, off] + str q0, [dstend2, -16] + str q0, [dstend, -16] + ret + + .p2align 4 +L(set_16): + whilelo p0.b, xzr, count + st1b z0.b, p0, [dstin] + ret + + .p2align 4 +L(set_128): + bic dst, dstin, 15 + cmp count, 128 + b.hi L(set_long) + stp q0, q0, [dstin] + stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + + .p2align 4 +L(set_long): + cmp count, 256 + b.lo L(no_zva) + tst valw, 255 + b.ne L(no_zva) + + str q0, [dstin] + str q0, [dst, 16] + bic dst, dstin, 31 + stp q0, q0, [dst, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + sub x8, dstend, 1 /* Write last bytes before ZVA loop. */ + bic x8, x8, 15 + stp q0, q0, [x8, -48] + str q0, [x8, -16] + str q0, [dstend, -16] + + .p2align 4 +L(zva64_loop): + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi L(zva64_loop) + ret + +L(no_zva): + str q0, [dstin] + sub count, dstend, dst /* Count is 16 too large. */ + sub count, count, 64 + 16 /* Adjust count and bias for loop. */ +L(no_zva_loop): + stp q0, q0, [dst, 16] + stp q0, q0, [dst, 48] + add dst, dst, 64 + subs count, count, 64 + b.hi L(no_zva_loop) + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + +END (__memset_sve_zva64) +#endif commit d6175a44e95fe443d0fbfed37a9ff7424f1e2661 Author: Wilco Dijkstra Date: Thu Feb 27 16:28:52 2025 +0000 AArch64: Use prefer_sve_ifuncs for SVE memset Use prefer_sve_ifuncs for SVE memset just like memcpy. Reviewed-by: Yury Khrustalev (cherry picked from commit 0f044be1dae5169d0e57f8d487b427863aeadab4) diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c index 4f65295e77..bb1e865c97 100644 --- a/sysdeps/aarch64/multiarch/memset.c +++ b/sysdeps/aarch64/multiarch/memset.c @@ -51,7 +51,7 @@ select_memset_ifunc (void) if (IS_A64FX (midr) && zva_size == 256) return __memset_a64fx; - if (zva_size == 64) + if (prefer_sve_ifuncs && zva_size == 64) return __memset_sve_zva64; } commit d8e8342369831808b00324790c8809ba33408ee7 Author: Wilco Dijkstra Date: Fri Dec 13 15:43:07 2024 +0000 math: Improve layout of exp/exp10 data GCC aligns global data to 16 bytes if their size is >= 16 bytes. This patch changes the exp_data struct slightly so that the fields are better aligned and without gaps. As a result on targets that support them, more load-pair instructions are used in exp. Exp10 is improved by moving invlog10_2N later so that neglog10_2hiN and neglog10_2loN can be loaded using load-pair. The exp benchmark improves 2.5%, "144bits" by 7.2%, "768bits" by 12.7% on Neoverse V2. Exp10 improves by 1.5%. Reviewed-by: Adhemerval Zanella (cherry picked from commit 5afaf99edb326fd9f36eb306a828d129a3a1d7f7) diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h index ef87cfa6be..05515fd95a 100644 --- a/sysdeps/ieee754/dbl-64/math_config.h +++ b/sysdeps/ieee754/dbl-64/math_config.h @@ -195,16 +195,18 @@ check_uflow (double x) extern const struct exp_data { double invln2N; - double shift; double negln2hiN; double negln2loN; double poly[4]; /* Last four coefficients. */ + double shift; + double exp2_shift; double exp2_poly[EXP2_POLY_ORDER]; - double invlog10_2N; + double neglog10_2hiN; double neglog10_2loN; double exp10_poly[5]; + double invlog10_2N; uint64_t tab[2*(1 << EXP_TABLE_BITS)]; } __exp_data attribute_hidden; commit 3e820e17a8cef84645d83b67abcbc3f88c7fd268 Author: Michael Jeanson Date: Fri Feb 14 13:54:22 2025 -0500 nptl: clear the whole rseq area before registration Due to the extensible nature of the rseq area we can't explictly initialize fields that are not part of the ABI yet. It was agreed with upstream that all new fields will be documented as zero initialized by userspace. Future kernels configured with CONFIG_DEBUG_RSEQ will validate the content of all fields during registration. Replace the explicit field initialization with a memset of the whole rseq area which will cover fields as they are added to future kernels. Signed-off-by: Michael Jeanson Reviewed-by: Florian Weimer (cherry picked from commit 689a62a4217fae78b9ce0db781dc2a421f2b1ab4) diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c index 7803e19fd1..ed10185e37 100644 --- a/sysdeps/nptl/dl-tls_init_tp.c +++ b/sysdeps/nptl/dl-tls_init_tp.c @@ -23,6 +23,7 @@ #include #include #include +#include #define TUNABLE_NAMESPACE pthread #include diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h index ef3eab1fef..76de2b7ff0 100644 --- a/sysdeps/unix/sysv/linux/rseq-internal.h +++ b/sysdeps/unix/sysv/linux/rseq-internal.h @@ -52,13 +52,12 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) but still expected size 32. */ size = RSEQ_AREA_SIZE_INITIAL; - /* Initialize the rseq fields that are read by the kernel on - registration, there is no guarantee that struct pthread is - cleared on all architectures. */ + /* Initialize the whole rseq area to zero prior to registration. */ + memset (&self->rseq_area, 0, size); + + /* Set the cpu_id field to RSEQ_CPU_ID_UNINITIALIZED, this is checked by + the kernel at registration when CONFIG_DEBUG_RSEQ is enabled. */ THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); - THREAD_SETMEM (self, rseq_area.cpu_id_start, 0); - THREAD_SETMEM (self, rseq_area.rseq_cs, 0); - THREAD_SETMEM (self, rseq_area.flags, 0); int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area, size, 0, RSEQ_SIG); commit ee1ab9302363066b49cf8862b96664ed35eda81c Author: Sunil K Pandey Date: Mon Mar 10 10:24:07 2025 -0700 x86_64: Add tanh with FMA On Skylake, it improves tanh bench performance by: Before After Improvement max 110.89 95.826 14% min 20.966 20.157 4% mean 30.9601 29.8431 4% Reviewed-by: H.J. Lu (cherry picked from commit c6352111c72a20b3588ae304dd99b63e25dd6d85) diff --git a/sysdeps/ieee754/dbl-64/s_tanh.c b/sysdeps/ieee754/dbl-64/s_tanh.c index 673a97102d..13063db04e 100644 --- a/sysdeps/ieee754/dbl-64/s_tanh.c +++ b/sysdeps/ieee754/dbl-64/s_tanh.c @@ -46,6 +46,11 @@ static char rcsid[] = "$NetBSD: s_tanh.c,v 1.7 1995/05/10 20:48:22 jtc Exp $"; static const double one = 1.0, two = 2.0, tiny = 1.0e-300; +#ifndef SECTION +# define SECTION +#endif + +SECTION double __tanh (double x) { diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index cbe09d49f4..0f69f7089c 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -10,6 +10,7 @@ CFLAGS-s_expm1-fma.c = -mfma -mavx2 CFLAGS-s_log1p-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 +CFLAGS-s_tanh-fma.c = -mfma -mavx2 CFLAGS-s_sincos-fma.c = -mfma -mavx2 CFLAGS-e_exp2f-fma.c = -mfma -mavx2 @@ -92,6 +93,7 @@ libm-sysdep_routines += \ s_sinf-sse2 \ s_tan-avx \ s_tan-fma \ + s_tanh-fma \ s_trunc-sse4_1 \ s_truncf-sse4_1 \ # libm-sysdep_routines diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c new file mode 100644 index 0000000000..1b808b1227 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c @@ -0,0 +1,11 @@ +#define __tanh __tanh_fma +#define __expm1 __expm1_fma + +/* NB: __expm1 may be expanded to __expm1_fma in the following + prototypes. */ +extern long double __expm1l (long double); +extern long double __expm1f128 (long double); + +#define SECTION __attribute__ ((section (".text.fma"))) + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh.c b/sysdeps/x86_64/fpu/multiarch/s_tanh.c new file mode 100644 index 0000000000..5539b6c61c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh.c @@ -0,0 +1,31 @@ +/* Multiple versions of tanh. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL + +extern double __redirect_tanh (double); + +# define SYMBOL_NAME tanh +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_tanh, __tanh, IFUNC_SELECTOR ()); + +# define __tanh __tanh_sse2 +#endif +#include commit e854f6d37cbeabb9130fed74b587befad8b4ba08 Author: Sunil K Pandey Date: Sat Mar 8 08:51:10 2025 -0800 x86_64: Add sinh with FMA On SPR, it improves sinh bench performance by: Before After Improvement reciprocal-throughput 14.2017 11.815 17% latency 36.4917 35.2114 4% Reviewed-by: H.J. Lu (cherry picked from commit dded0d20f67ba1925ccbcb9cf28f0c75febe0dbe) diff --git a/benchtests/sinh-inputs b/benchtests/sinh-inputs index 7b1ac46a39..2fcb2fabf8 100644 --- a/benchtests/sinh-inputs +++ b/benchtests/sinh-inputs @@ -1,6 +1,7 @@ ## args: double ## ret: double ## includes: math.h +## name: workload-random 0x1.bcb6129b5ff2bp8 -0x1.63057386325ebp9 0x1.62f1d7dc4e8bfp9 diff --git a/sysdeps/ieee754/dbl-64/e_sinh.c b/sysdeps/ieee754/dbl-64/e_sinh.c index b4b5857ddd..3f787967f9 100644 --- a/sysdeps/ieee754/dbl-64/e_sinh.c +++ b/sysdeps/ieee754/dbl-64/e_sinh.c @@ -41,6 +41,11 @@ static char rcsid[] = "$NetBSD: e_sinh.c,v 1.7 1995/05/10 20:46:13 jtc Exp $"; static const double one = 1.0, shuge = 1.0e307; +#ifndef SECTION +# define SECTION +#endif + +SECTION double __ieee754_sinh (double x) { @@ -90,4 +95,7 @@ __ieee754_sinh (double x) /* |x| > overflowthresold, sinh(x) overflow */ return math_narrow_eval (x * shuge); } + +#ifndef __ieee754_sinh libm_alias_finite (__ieee754_sinh, __sinh) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 0f69f7089c..b527cab8d1 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -5,6 +5,7 @@ CFLAGS-e_exp-fma.c = -mfma -mavx2 CFLAGS-e_log-fma.c = -mfma -mavx2 CFLAGS-e_log2-fma.c = -mfma -mavx2 CFLAGS-e_pow-fma.c = -mfma -mavx2 +CFLAGS-e_sinh-fma.c = -mfma -mavx2 CFLAGS-s_atan-fma.c = -mfma -mavx2 CFLAGS-s_expm1-fma.c = -mfma -mavx2 CFLAGS-s_log1p-fma.c = -mfma -mavx2 @@ -67,6 +68,7 @@ libm-sysdep_routines += \ e_logf-fma \ e_pow-fma \ e_powf-fma \ + e_sinh-fma \ s_atan-avx \ s_atan-fma \ s_ceil-sse4_1 \ diff --git a/sysdeps/x86_64/fpu/multiarch/e_sinh-fma.c b/sysdeps/x86_64/fpu/multiarch/e_sinh-fma.c new file mode 100644 index 0000000000..e0e1e39a7a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_sinh-fma.c @@ -0,0 +1,12 @@ +#define __ieee754_sinh __ieee754_sinh_fma +#define __ieee754_exp __ieee754_exp_fma +#define __expm1 __expm1_fma + +/* NB: __expm1 may be expanded to __expm1_fma in the following + prototypes. */ +extern long double __expm1l (long double); +extern long double __expm1f128 (long double); + +#define SECTION __attribute__ ((section (".text.fma"))) + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/e_sinh.c b/sysdeps/x86_64/fpu/multiarch/e_sinh.c new file mode 100644 index 0000000000..3d3c18ccdf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_sinh.c @@ -0,0 +1,35 @@ +/* Multiple versions of sinh. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include + +extern double __redirect_ieee754_sinh (double); + +# define SYMBOL_NAME ieee754_sinh +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_ieee754_sinh, __ieee754_sinh, + IFUNC_SELECTOR ()); + +libm_alias_finite (__ieee754_sinh, __sinh) + +# define __ieee754_sinh __ieee754_sinh_sse2 +#endif +#include commit e5f5dfdda28def8362896bdb1748bb27dfc8be73 Author: Sunil K Pandey Date: Wed Mar 5 16:13:38 2025 -0800 x86_64: Add atanh with FMA On SPR, it improves atanh bench performance by: Before After Improvement reciprocal-throughput 15.1715 14.8628 2% latency 57.1941 56.1883 2% Reviewed-by: H.J. Lu (cherry picked from commit c7c4a5906f326f1290b1c2413a83c530564ec4b8) diff --git a/benchtests/atanh-inputs b/benchtests/atanh-inputs index 455aa65b65..4985293254 100644 --- a/benchtests/atanh-inputs +++ b/benchtests/atanh-inputs @@ -1,6 +1,7 @@ ## args: double ## ret: double ## includes: math.h +## name: workload-random 0x1.5a2730bacd94ap-1 -0x1.b57eb40fc048ep-21 -0x1.c0b185fb450e2p-17 diff --git a/sysdeps/ieee754/dbl-64/e_atanh.c b/sysdeps/ieee754/dbl-64/e_atanh.c index 11a2a45799..05ac0a1b30 100644 --- a/sysdeps/ieee754/dbl-64/e_atanh.c +++ b/sysdeps/ieee754/dbl-64/e_atanh.c @@ -44,6 +44,11 @@ static const double huge = 1e300; +#ifndef SECTION +# define SECTION +#endif + +SECTION double __ieee754_atanh (double x) { @@ -73,4 +78,7 @@ __ieee754_atanh (double x) return copysign (t, x); } + +#ifndef __ieee754_atanh libm_alias_finite (__ieee754_atanh, __atanh) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index b527cab8d1..bc479b42d2 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -1,6 +1,7 @@ ifeq ($(subdir),math) CFLAGS-e_asin-fma.c = -mfma -mavx2 CFLAGS-e_atan2-fma.c = -mfma -mavx2 +CFLAGS-e_atanh-fma.c = -mfma -mavx2 CFLAGS-e_exp-fma.c = -mfma -mavx2 CFLAGS-e_log-fma.c = -mfma -mavx2 CFLAGS-e_log2-fma.c = -mfma -mavx2 @@ -57,6 +58,7 @@ libm-sysdep_routines += \ e_asin-fma \ e_atan2-avx \ e_atan2-fma \ + e_atanh-fma \ e_exp-avx \ e_exp-fma \ e_exp2f-fma \ diff --git a/sysdeps/x86_64/fpu/multiarch/e_atanh-fma.c b/sysdeps/x86_64/fpu/multiarch/e_atanh-fma.c new file mode 100644 index 0000000000..c3f2f9e550 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_atanh-fma.c @@ -0,0 +1,6 @@ +#define __ieee754_atanh __ieee754_atanh_fma +#define __log1p __log1p_fma + +#define SECTION __attribute__ ((section (".text.fma"))) + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/e_atanh.c b/sysdeps/x86_64/fpu/multiarch/e_atanh.c new file mode 100644 index 0000000000..d2b785dfc0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_atanh.c @@ -0,0 +1,34 @@ +/* Multiple versions of atanh. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include + +extern double __redirect_ieee754_atanh (double); + +# define SYMBOL_NAME ieee754_atanh +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_ieee754_atanh, __ieee754_atanh, IFUNC_SELECTOR ()); + +libm_alias_finite (__ieee754_atanh, __atanh) + +# define __ieee754_atanh __ieee754_atanh_sse2 +#endif +#include commit 8fc492bb4234edc1a5e8c3b7f76ba345ea7109ec Author: Florian Weimer Date: Fri Mar 28 09:26:06 2025 +0100 x86: Skip XSAVE state size reset if ISA level requires XSAVE If we have to use XSAVE or XSAVEC trampolines, do not adjust the size information they need. Technically, it is an operator error to try to run with -XSAVE,-XSAVEC on such builds, but this change here disables some unnecessary code with higher ISA levels and simplifies testing. Related to commit befe2d3c4dec8be2cdd01a47132e47bdb7020922 ("x86-64: Don't use SSE resolvers for ISA level 3 or above"). Reviewed-by: H.J. Lu (cherry picked from commit 59585ddaa2d44f22af04bb4b8bd4ad1e302c4c02) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index c096dd390a..b5b264db7f 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -24,6 +24,7 @@ #include #include #include +#include extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden; @@ -1119,6 +1120,9 @@ no_cpuid: TUNABLE_CALLBACK (set_prefer_map_32bit_exec)); #endif + /* Do not add the logic to disable XSAVE/XSAVEC if this glibc build + requires AVX and therefore XSAVE or XSAVEC support. */ +#ifndef GCCMACRO__AVX__ bool disable_xsave_features = false; if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE)) @@ -1172,6 +1176,7 @@ no_cpuid: CPU_FEATURE_UNSET (cpu_features, FMA4); } +#endif #ifdef __x86_64__ GLRO(dl_hwcap) = HWCAP_X86_64; commit df22af58f66e6815c054b1c56249356c2994935a Author: Florian Weimer Date: Fri Mar 28 09:26:59 2025 +0100 x86: Use separate variable for TLSDESC XSAVE/XSAVEC state size (bug 32810) Previously, the initialization code reused the xsave_state_full_size member of struct cpu_features for the TLSDESC state size. However, the tunable processing code assumes that this member has the original XSAVE (non-compact) state size, so that it can use its value if XSAVEC is disabled via tunable. This change uses a separate variable and not a struct member because the value is only needed in ld.so and the static libc, but not in libc.so. As a result, struct cpu_features layout does not change, helping a future backport of this change. Fixes commit 9b7091415af47082664717210ac49d51551456ab ("x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers"). Reviewed-by: H.J. Lu (cherry picked from commit 145097dff170507fe73190e8e41194f5b5f7e6bf) diff --git a/NEWS b/NEWS index 57feba81cd..7a6985f5dd 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,7 @@ The following bugs are resolved with this release: [32231] elf: Change ldconfig auxcache magic number [32245] glibc -Wstringop-overflow= build failure on hppa [32470] x86: Avoid integer truncation with large cache sizes + [32810] Crash on x86-64 if XSAVEC disable via tunable Version 2.40 diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 5311b594af..8819fba1b7 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -21,6 +21,9 @@ tests += \ tst-cpu-features-supports-static \ tst-get-cpu-features \ tst-get-cpu-features-static \ + tst-gnu2-tls2-x86-noxsave \ + tst-gnu2-tls2-x86-noxsavec \ + tst-gnu2-tls2-x86-noxsavexsavec \ tst-hwcap-tunables \ # tests tests-static += \ @@ -91,6 +94,22 @@ CFLAGS-tst-gnu2-tls2.c += -msse CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell + +LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy +LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy +LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy + +# Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled +# via tunable. +tst-gnu2-tls2-x86-noxsave-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE +tst-gnu2-tls2-x86-noxsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC +tst-gnu2-tls2-x86-noxsavexsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE,-XSAVEC +$(objpfx)tst-gnu2-tls2-x86-noxsave.out \ +$(objpfx)tst-gnu2-tls2-x86-noxsavec.out \ +$(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ + $(objpfx)tst-gnu2-tls2mod0.so \ + $(objpfx)tst-gnu2-tls2mod1.so \ + $(objpfx)tst-gnu2-tls2mod2.so endif ifeq ($(subdir),math) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index b5b264db7f..ec27337337 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -84,6 +84,8 @@ extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *) # include #endif +unsigned long int _dl_x86_features_tlsdesc_state_size; + static void update_active (struct cpu_features *cpu_features) { @@ -318,6 +320,7 @@ update_active (struct cpu_features *cpu_features) = xsave_state_full_size; cpu_features->xsave_state_full_size = xsave_state_full_size; + _dl_x86_features_tlsdesc_state_size = xsave_state_full_size; /* Check if XSAVEC is available. */ if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC)) @@ -406,11 +409,9 @@ update_active (struct cpu_features *cpu_features) = ALIGN_UP ((amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA), 64); - /* Set xsave_state_full_size to the compact AMX - state size for XSAVEC. NB: xsave_state_full_size - is only used in _dl_tlsdesc_dynamic_xsave and - _dl_tlsdesc_dynamic_xsavec. */ - cpu_features->xsave_state_full_size = amx_size; + /* Set TLSDESC state size to the compact AMX + state size for XSAVEC. */ + _dl_x86_features_tlsdesc_state_size = amx_size; #endif cpu_features->xsave_state_size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c index ccc6b64dc2..a0b31d80f6 100644 --- a/sysdeps/x86/cpu-tunables.c +++ b/sysdeps/x86/cpu-tunables.c @@ -164,6 +164,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) /* Update xsave_state_size to XSAVE state size. */ cpu_features->xsave_state_size = cpu_features->xsave_state_full_size; + _dl_x86_features_tlsdesc_state_size + = cpu_features->xsave_state_full_size; CPU_FEATURE_UNSET (cpu_features, XSAVEC); } } diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c index 49eeb5f70a..41100a908a 100644 --- a/sysdeps/x86/dl-diagnostics-cpu.c +++ b/sysdeps/x86/dl-diagnostics-cpu.c @@ -89,6 +89,8 @@ _dl_diagnostics_cpu (void) cpu_features->xsave_state_size); print_cpu_features_value ("xsave_state_full_size", cpu_features->xsave_state_full_size); + print_cpu_features_value ("tlsdesc_state_full_size", + _dl_x86_features_tlsdesc_state_size); print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size); print_cpu_features_value ("shared_cache_size", cpu_features->shared_cache_size); diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h index aaae44f0e1..03c71387dd 100644 --- a/sysdeps/x86/include/cpu-features.h +++ b/sysdeps/x86/include/cpu-features.h @@ -934,8 +934,6 @@ struct cpu_features /* The full state size for XSAVE when XSAVEC is disabled by GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC - - and the AMX state size when XSAVEC is available. */ unsigned int xsave_state_full_size; /* Data cache size for use in memory and string routines, typically @@ -989,6 +987,13 @@ extern const struct cpu_features *_dl_x86_get_cpu_features (void) #define __get_cpu_features() _dl_x86_get_cpu_features() +#if IS_IN (rtld) || IS_IN (libc) +/* XSAVE/XSAVEC state size used by TLS descriptors. Compared to + xsave_state_size from struct cpu_features, this includes additional + registers. */ +extern unsigned long int _dl_x86_features_tlsdesc_state_size attribute_hidden; +#endif + #if defined (_LIBC) && !IS_IN (nonlib) /* Unused for x86. */ # define INIT_ARCH() diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c new file mode 100644 index 0000000000..f0024c143d --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c new file mode 100644 index 0000000000..f0024c143d --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c new file mode 100644 index 0000000000..f0024c143d --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h index 9f02cfc3eb..44d948696f 100644 --- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h +++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h @@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic: # endif #else /* Allocate stack space of the required size to save the state. */ - sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP + sub _dl_x86_features_tlsdesc_state_size(%rip), %RSP_LP #endif /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9, r10 and r11. */ commit a87d9a2c2cc17a3b22fd3be8d106336f4dcf2042 Author: Florian Weimer Date: Mon Mar 31 21:33:18 2025 +0200 x86: Link tst-gnu2-tls2-x86-noxsave{,c,xsavec} with libpthread This fixes a test build failure on Hurd. Fixes commit 145097dff170507fe73190e8e41194f5b5f7e6bf ("x86: Use separate variable for TLSDESC XSAVE/XSAVEC state size (bug 32810)"). Reviewed-by: Adhemerval Zanella (cherry picked from commit c6e2895695118ab59c7b17feb0fcb75a53e3478c) diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 8819fba1b7..01b0192ddf 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -104,6 +104,9 @@ LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy tst-gnu2-tls2-x86-noxsave-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE tst-gnu2-tls2-x86-noxsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC tst-gnu2-tls2-x86-noxsavexsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE,-XSAVEC +$(objpfx)tst-gnu2-tls2-x86-noxsave: $(shared-thread-library) +$(objpfx)tst-gnu2-tls2-x86-noxsavec: $(shared-thread-library) +$(objpfx)tst-gnu2-tls2-x86-noxsavexsavec: $(shared-thread-library) $(objpfx)tst-gnu2-tls2-x86-noxsave.out \ $(objpfx)tst-gnu2-tls2-x86-noxsavec.out \ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ commit 8fe27af20c8b25b84e12bcd52353862a95044aa2 Author: Noah Goldstein Date: Wed Aug 14 14:37:30 2024 +0800 x86: Use `Avoid_Non_Temporal_Memset` to control non-temporal path This is just a refactor and there should be no behavioral change from this commit. The goal is to make `Avoid_Non_Temporal_Memset` a more universal knob for controlling whether we use non-temporal memset rather than having extra logic based on vendor. Reviewed-by: H.J. Lu (cherry picked from commit b93dddfaf440aa12f45d7c356f6ffe9f27d35577) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index ec27337337..8841020b36 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -758,6 +758,12 @@ init_cpu_features (struct cpu_features *cpu_features) unsigned int stepping = 0; enum cpu_features_kind kind; + /* Default is avoid non-temporal memset for non Intel/AMD hardware. This is, + as of writing this, we only have benchmarks indicatings it profitability + on Intel/AMD. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + |= bit_arch_Avoid_Non_Temporal_Memset; + cpu_features->cachesize_non_temporal_divisor = 4; #if !HAS_CPUID if (__get_cpuid_max (0, 0) == 0) @@ -783,6 +789,11 @@ init_cpu_features (struct cpu_features *cpu_features) update_active (cpu_features); + /* Benchmarks indicate non-temporal memset can be profitable on Intel + hardware. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + &= ~bit_arch_Avoid_Non_Temporal_Memset; + if (family == 0x06) { model += extended_model; @@ -993,6 +1004,11 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx; + /* Benchmarks indicate non-temporal memset can be profitable on AMD + hardware. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + &= ~bit_arch_Avoid_Non_Temporal_Memset; + if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) { /* Since the FMA4 bit is in CPUID_INDEX_80000001 and diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index ac97414b5b..7b1b61c096 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -988,14 +988,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (CPU_FEATURE_USABLE_P (cpu_features, FSRM)) rep_movsb_threshold = 2112; - /* Non-temporal stores are more performant on Intel and AMD hardware above - non_temporal_threshold. Enable this for both Intel and AMD hardware. */ - unsigned long int memset_non_temporal_threshold = SIZE_MAX; - if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset) - && (cpu_features->basic.kind == arch_kind_intel - || cpu_features->basic.kind == arch_kind_amd)) - memset_non_temporal_threshold = non_temporal_threshold; - /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of cases slower than the vectorized path (and for some alignments, it is really slow, check BZ #30994). */ @@ -1017,6 +1009,13 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (tunable_size != 0) shared = tunable_size; + /* Non-temporal stores are more performant on some hardware above + non_temporal_threshold. Currently Prefer_Non_Temporal is set for for both + Intel and AMD hardware. */ + unsigned long int memset_non_temporal_threshold = SIZE_MAX; + if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset)) + memset_non_temporal_threshold = non_temporal_threshold; + tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL); if (tunable_size > minimum_non_temporal_threshold && tunable_size <= maximum_non_temporal_threshold) commit 7c6bd71b4dbdadab34e4fd21ec09b86b32daf443 Author: Sunil K Pandey Date: Thu Apr 3 13:00:45 2025 -0700 x86: Optimize xstate size calculation Scan xstate IDs up to the maximum supported xstate ID. Remove the separate AMX xstate calculation. Instead, exclude the AMX space from the start of TILECFG to the end of TILEDATA in xsave_state_size. Completed validation on SKL/SKX/SPR/SDE and compared xsave state size with "ld.so --list-diagnostics" option, no regression. Co-Authored-By: H.J. Lu Reviewed-by: Sunil K Pandey (cherry picked from commit 70b648855185e967e54668b101d24704c3fb869d) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 8841020b36..1d5e2a0072 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -325,13 +325,8 @@ update_active (struct cpu_features *cpu_features) /* Check if XSAVEC is available. */ if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC)) { - unsigned int xstate_comp_offsets[32]; - unsigned int xstate_comp_sizes[32]; -#ifdef __x86_64__ - unsigned int xstate_amx_comp_offsets[32]; - unsigned int xstate_amx_comp_sizes[32]; - unsigned int amx_ecx; -#endif + unsigned int xstate_comp_offsets[X86_XSTATE_MAX_ID + 1]; + unsigned int xstate_comp_sizes[X86_XSTATE_MAX_ID + 1]; unsigned int i; xstate_comp_offsets[0] = 0; @@ -339,39 +334,16 @@ update_active (struct cpu_features *cpu_features) xstate_comp_offsets[2] = 576; xstate_comp_sizes[0] = 160; xstate_comp_sizes[1] = 256; -#ifdef __x86_64__ - xstate_amx_comp_offsets[0] = 0; - xstate_amx_comp_offsets[1] = 160; - xstate_amx_comp_offsets[2] = 576; - xstate_amx_comp_sizes[0] = 160; - xstate_amx_comp_sizes[1] = 256; -#endif - for (i = 2; i < 32; i++) + for (i = 2; i <= X86_XSTATE_MAX_ID; i++) { if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0) { __cpuid_count (0xd, i, eax, ebx, ecx, edx); -#ifdef __x86_64__ - /* Include this in xsave_state_full_size. */ - amx_ecx = ecx; - xstate_amx_comp_sizes[i] = eax; - if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0) - { - /* Exclude this from xsave_state_size. */ - ecx = 0; - xstate_comp_sizes[i] = 0; - } - else -#endif - xstate_comp_sizes[i] = eax; + xstate_comp_sizes[i] = eax; } else { -#ifdef __x86_64__ - amx_ecx = 0; - xstate_amx_comp_sizes[i] = 0; -#endif ecx = 0; xstate_comp_sizes[i] = 0; } @@ -380,42 +352,32 @@ update_active (struct cpu_features *cpu_features) { xstate_comp_offsets[i] = (xstate_comp_offsets[i - 1] - + xstate_comp_sizes[i -1]); + + xstate_comp_sizes[i - 1]); if ((ecx & (1 << 1)) != 0) xstate_comp_offsets[i] = ALIGN_UP (xstate_comp_offsets[i], 64); -#ifdef __x86_64__ - xstate_amx_comp_offsets[i] - = (xstate_amx_comp_offsets[i - 1] - + xstate_amx_comp_sizes[i - 1]); - if ((amx_ecx & (1 << 1)) != 0) - xstate_amx_comp_offsets[i] - = ALIGN_UP (xstate_amx_comp_offsets[i], - 64); -#endif } } /* Use XSAVEC. */ unsigned int size - = xstate_comp_offsets[31] + xstate_comp_sizes[31]; + = (xstate_comp_offsets[X86_XSTATE_MAX_ID] + + xstate_comp_sizes[X86_XSTATE_MAX_ID]); if (size) { + size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, + 64); #ifdef __x86_64__ - unsigned int amx_size - = (xstate_amx_comp_offsets[31] - + xstate_amx_comp_sizes[31]); - amx_size - = ALIGN_UP ((amx_size - + TLSDESC_CALL_REGISTER_SAVE_AREA), - 64); - /* Set TLSDESC state size to the compact AMX - state size for XSAVEC. */ - _dl_x86_features_tlsdesc_state_size = amx_size; + _dl_x86_features_tlsdesc_state_size = size; + /* Exclude the AMX space from the start of TILECFG + space to the end of TILEDATA space. If CPU + doesn't support AMX, TILECFG offset is the same + as TILEDATA + 1 offset. Otherwise, they are + multiples of 64. */ + size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] + - xstate_comp_offsets[X86_XSTATE_TILECFG_ID]); #endif - cpu_features->xsave_state_size - = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, - 64); + cpu_features->xsave_state_size = size; CPU_FEATURE_SET (cpu_features, XSAVEC); } } diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index 7359149e17..1d6cabd816 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -102,6 +102,9 @@ | (1 << X86_XSTATE_ZMM_ID) \ | (1 << X86_XSTATE_APX_F_ID)) +/* The maximum supported xstate ID. */ +# define X86_XSTATE_MAX_ID X86_XSTATE_APX_F_ID + /* AMX state mask. */ # define AMX_STATE_SAVE_MASK \ ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID)) @@ -123,6 +126,9 @@ | (1 << X86_XSTATE_K_ID) \ | (1 << X86_XSTATE_ZMM_H_ID)) +/* The maximum supported xstate ID. */ +# define X86_XSTATE_MAX_ID X86_XSTATE_ZMM_H_ID + /* States to be included in xsave_state_size. */ # define FULL_STATE_SAVE_MASK STATE_SAVE_MASK #endif commit 44f92df8007d57f82b1518e219a0dbb60389ef2c Author: Sunil K Pandey Date: Thu Apr 3 18:14:20 2025 -0700 x86: Add ARL/PTL/CWF model detection support - Add ARROWLAKE model detection. - Add PANTHERLAKE model detection. - Add CLEARWATERFOREST model detection. Intel® Architecture Instruction Set Extensions Programming Reference https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2. No regression, validated model detection on SDE. Reviewed-by: H.J. Lu (cherry picked from commit e53eb952b970ac94c97d74fb447418fb327ca096) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 1d5e2a0072..7f21a8227e 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -512,6 +512,7 @@ enum INTEL_ATOM_GOLDMONT, INTEL_ATOM_GOLDMONT_PLUS, INTEL_ATOM_SIERRAFOREST, + INTEL_ATOM_CLEARWATERFOREST, INTEL_ATOM_GRANDRIDGE, INTEL_ATOM_TREMONT, @@ -539,6 +540,7 @@ enum INTEL_BIGCORE_METEORLAKE, INTEL_BIGCORE_LUNARLAKE, INTEL_BIGCORE_ARROWLAKE, + INTEL_BIGCORE_PANTHERLAKE, INTEL_BIGCORE_GRANITERAPIDS, /* Mixed (bigcore + atom SOC). */ @@ -584,6 +586,8 @@ intel_get_fam6_microarch (unsigned int model, return INTEL_ATOM_GOLDMONT_PLUS; case 0xAF: return INTEL_ATOM_SIERRAFOREST; + case 0xDD: + return INTEL_ATOM_CLEARWATERFOREST; case 0xB6: return INTEL_ATOM_GRANDRIDGE; case 0x86: @@ -691,8 +695,12 @@ intel_get_fam6_microarch (unsigned int model, return INTEL_BIGCORE_METEORLAKE; case 0xbd: return INTEL_BIGCORE_LUNARLAKE; + case 0xb5: + case 0xc5: case 0xc6: return INTEL_BIGCORE_ARROWLAKE; + case 0xCC: + return INTEL_BIGCORE_PANTHERLAKE; case 0xAD: case 0xAE: return INTEL_BIGCORE_GRANITERAPIDS; @@ -808,6 +816,7 @@ init_cpu_features (struct cpu_features *cpu_features) Default tuned atom microarch. case INTEL_ATOM_SIERRAFOREST: case INTEL_ATOM_GRANDRIDGE: + case INTEL_ATOM_CLEARWATERFOREST: */ /* Bigcore/Default Tuning. */ @@ -864,6 +873,7 @@ init_cpu_features (struct cpu_features *cpu_features) case INTEL_BIGCORE_METEORLAKE: case INTEL_BIGCORE_LUNARLAKE: case INTEL_BIGCORE_ARROWLAKE: + case INTEL_BIGCORE_PANTHERLAKE: case INTEL_BIGCORE_SAPPHIRERAPIDS: case INTEL_BIGCORE_EMERALDRAPIDS: case INTEL_BIGCORE_GRANITERAPIDS: commit 9ee8083c4edbe5e92af7aabb23261309f03ef05c Author: Sunil K Pandey Date: Fri Apr 11 08:52:52 2025 -0700 x86: Handle unknown Intel processor with default tuning Enable default tuning for unknown Intel processor. Tested on x86, no regression. Co-Authored-By: H.J. Lu Reviewed-by: H.J. Lu (cherry picked from commit 9f0deff558d1d6b08c425c157f50de85013ada9c) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 7f21a8227e..1a6e694abf 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -502,8 +502,8 @@ _Static_assert (((index_arch_Fast_Unaligned_Load "Incorrect index_arch_Fast_Unaligned_Load"); -/* Intel Family-6 microarch list. */ -enum +/* Intel microarch list. */ +enum intel_microarch { /* Atom processors. */ INTEL_ATOM_BONNELL, @@ -555,7 +555,7 @@ enum INTEL_UNKNOWN, }; -static unsigned int +static enum intel_microarch intel_get_fam6_microarch (unsigned int model, __attribute__ ((unused)) unsigned int stepping) { @@ -764,134 +764,20 @@ init_cpu_features (struct cpu_features *cpu_features) cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] &= ~bit_arch_Avoid_Non_Temporal_Memset; + enum intel_microarch microarch = INTEL_UNKNOWN; if (family == 0x06) { model += extended_model; - unsigned int microarch - = intel_get_fam6_microarch (model, stepping); + microarch = intel_get_fam6_microarch (model, stepping); + /* Disable TSX on some processors to avoid TSX on kernels that + weren't updated with the latest microcode package (which + disables broken feature by default). */ switch (microarch) { - /* Atom / KNL tuning. */ - case INTEL_ATOM_BONNELL: - /* BSF is slow on Bonnell. */ - cpu_features->preferred[index_arch_Slow_BSF] - |= bit_arch_Slow_BSF; - break; - - /* Unaligned load versions are faster than SSSE3 - on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ - case INTEL_ATOM_AIRMONT: - case INTEL_ATOM_SILVERMONT: - case INTEL_ATOM_GOLDMONT: - case INTEL_ATOM_GOLDMONT_PLUS: - - /* Knights Landing. Enable Silvermont optimizations. */ - case INTEL_KNIGHTS_LANDING: - - cpu_features->preferred[index_arch_Fast_Unaligned_Load] - |= (bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop - | bit_arch_Slow_SSE4_2); - break; - - case INTEL_ATOM_TREMONT: - /* Enable rep string instructions, unaligned load, unaligned - copy, pminub and avoid SSE 4.2 on Tremont. */ - cpu_features->preferred[index_arch_Fast_Rep_String] - |= (bit_arch_Fast_Rep_String - | bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop - | bit_arch_Slow_SSE4_2); - break; - - /* - Default tuned Knights microarch. - case INTEL_KNIGHTS_MILL: - */ - - /* - Default tuned atom microarch. - case INTEL_ATOM_SIERRAFOREST: - case INTEL_ATOM_GRANDRIDGE: - case INTEL_ATOM_CLEARWATERFOREST: - */ - - /* Bigcore/Default Tuning. */ default: - default_tuning: - /* Unknown family 0x06 processors. Assuming this is one - of Core i3/i5/i7 processors if AVX is available. */ - if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) - break; - - enable_modern_features: - /* Rep string instructions, unaligned load, unaligned copy, - and pminub are fast on Intel Core i3, i5 and i7. */ - cpu_features->preferred[index_arch_Fast_Rep_String] - |= (bit_arch_Fast_Rep_String - | bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop); break; - case INTEL_BIGCORE_NEHALEM: - case INTEL_BIGCORE_WESTMERE: - /* Older CPUs prefer non-temporal stores at lower threshold. */ - cpu_features->cachesize_non_temporal_divisor = 8; - goto enable_modern_features; - - /* Older Bigcore microarch (smaller non-temporal store - threshold). */ - case INTEL_BIGCORE_SANDYBRIDGE: - case INTEL_BIGCORE_IVYBRIDGE: - case INTEL_BIGCORE_HASWELL: - case INTEL_BIGCORE_BROADWELL: - cpu_features->cachesize_non_temporal_divisor = 8; - goto default_tuning; - - /* Newer Bigcore microarch (larger non-temporal store - threshold). */ - case INTEL_BIGCORE_SKYLAKE_AVX512: - case INTEL_BIGCORE_CANNONLAKE: - /* Benchmarks indicate non-temporal memset is not - necessarily profitable on SKX (and in some cases much - worse). This is likely unique to SKX due its it unique - mesh interconnect (not present on ICX or BWD). Disable - non-temporal on all Skylake servers. */ - cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] - |= bit_arch_Avoid_Non_Temporal_Memset; - case INTEL_BIGCORE_COMETLAKE: - case INTEL_BIGCORE_SKYLAKE: - case INTEL_BIGCORE_KABYLAKE: - case INTEL_BIGCORE_ICELAKE: - case INTEL_BIGCORE_TIGERLAKE: - case INTEL_BIGCORE_ROCKETLAKE: - case INTEL_BIGCORE_RAPTORLAKE: - case INTEL_BIGCORE_METEORLAKE: - case INTEL_BIGCORE_LUNARLAKE: - case INTEL_BIGCORE_ARROWLAKE: - case INTEL_BIGCORE_PANTHERLAKE: - case INTEL_BIGCORE_SAPPHIRERAPIDS: - case INTEL_BIGCORE_EMERALDRAPIDS: - case INTEL_BIGCORE_GRANITERAPIDS: - cpu_features->cachesize_non_temporal_divisor = 2; - goto default_tuning; - - /* Default tuned Mixed (bigcore + atom SOC). */ - case INTEL_MIXED_LAKEFIELD: - case INTEL_MIXED_ALDERLAKE: - cpu_features->cachesize_non_temporal_divisor = 2; - goto default_tuning; - } - - /* Disable TSX on some processors to avoid TSX on kernels that - weren't updated with the latest microcode package (which - disables broken feature by default). */ - switch (microarch) - { case INTEL_BIGCORE_SKYLAKE_AVX512: /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */ if (stepping <= 5) @@ -900,38 +786,152 @@ init_cpu_features (struct cpu_features *cpu_features) case INTEL_BIGCORE_KABYLAKE: /* NB: Although the errata documents that for model == 0x8e - (kabylake skylake client), only 0xb stepping or lower are - impacted, the intention of the errata was to disable TSX on - all client processors on all steppings. Include 0xc - stepping which is an Intel Core i7-8665U, a client mobile - processor. */ + (kabylake skylake client), only 0xb stepping or lower are + impacted, the intention of the errata was to disable TSX on + all client processors on all steppings. Include 0xc + stepping which is an Intel Core i7-8665U, a client mobile + processor. */ if (stepping > 0xc) break; /* Fall through. */ case INTEL_BIGCORE_SKYLAKE: - /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for - processors listed in: - -https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html - */ - disable_tsx: - CPU_FEATURE_UNSET (cpu_features, HLE); - CPU_FEATURE_UNSET (cpu_features, RTM); - CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); - break; + /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for + processors listed in: + + https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html + */ +disable_tsx: + CPU_FEATURE_UNSET (cpu_features, HLE); + CPU_FEATURE_UNSET (cpu_features, RTM); + CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); + break; case INTEL_BIGCORE_HASWELL: - /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working - TSX. Haswell also include other model numbers that have - working TSX. */ - if (model == 0x3f && stepping >= 4) + /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working + TSX. Haswell also includes other model numbers that have + working TSX. */ + if (model == 0x3f && stepping >= 4) break; - CPU_FEATURE_UNSET (cpu_features, RTM); - break; + CPU_FEATURE_UNSET (cpu_features, RTM); + break; } } + switch (microarch) + { + /* Atom / KNL tuning. */ + case INTEL_ATOM_BONNELL: + /* BSF is slow on Bonnell. */ + cpu_features->preferred[index_arch_Slow_BSF] + |= bit_arch_Slow_BSF; + break; + + /* Unaligned load versions are faster than SSSE3 + on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ + case INTEL_ATOM_AIRMONT: + case INTEL_ATOM_SILVERMONT: + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_PLUS: + + /* Knights Landing. Enable Silvermont optimizations. */ + case INTEL_KNIGHTS_LANDING: + + cpu_features->preferred[index_arch_Fast_Unaligned_Load] + |= (bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); + break; + + case INTEL_ATOM_TREMONT: + /* Enable rep string instructions, unaligned load, unaligned + copy, pminub and avoid SSE 4.2 on Tremont. */ + cpu_features->preferred[index_arch_Fast_Rep_String] + |= (bit_arch_Fast_Rep_String + | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); + break; + + /* + Default tuned Knights microarch. + case INTEL_KNIGHTS_MILL: + */ + + /* + Default tuned atom microarch. + case INTEL_ATOM_SIERRAFOREST: + case INTEL_ATOM_GRANDRIDGE: + case INTEL_ATOM_CLEARWATERFOREST: + */ + + /* Bigcore/Default Tuning. */ + default: + default_tuning: + /* Unknown Intel processors. Assuming this is one of Core + i3/i5/i7 processors if AVX is available. */ + if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) + break; + + enable_modern_features: + /* Rep string instructions, unaligned load, unaligned copy, + and pminub are fast on Intel Core i3, i5 and i7. */ + cpu_features->preferred[index_arch_Fast_Rep_String] + |= (bit_arch_Fast_Rep_String + | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop); + break; + + case INTEL_BIGCORE_NEHALEM: + case INTEL_BIGCORE_WESTMERE: + /* Older CPUs prefer non-temporal stores at lower threshold. */ + cpu_features->cachesize_non_temporal_divisor = 8; + goto enable_modern_features; + + /* Older Bigcore microarch (smaller non-temporal store + threshold). */ + case INTEL_BIGCORE_SANDYBRIDGE: + case INTEL_BIGCORE_IVYBRIDGE: + case INTEL_BIGCORE_HASWELL: + case INTEL_BIGCORE_BROADWELL: + cpu_features->cachesize_non_temporal_divisor = 8; + goto default_tuning; + + /* Newer Bigcore microarch (larger non-temporal store + threshold). */ + case INTEL_BIGCORE_SKYLAKE_AVX512: + case INTEL_BIGCORE_CANNONLAKE: + /* Benchmarks indicate non-temporal memset is not + necessarily profitable on SKX (and in some cases much + worse). This is likely unique to SKX due to its unique + mesh interconnect (not present on ICX or BWD). Disable + non-temporal on all Skylake servers. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + |= bit_arch_Avoid_Non_Temporal_Memset; + /* fallthrough */ + case INTEL_BIGCORE_COMETLAKE: + case INTEL_BIGCORE_SKYLAKE: + case INTEL_BIGCORE_KABYLAKE: + case INTEL_BIGCORE_ICELAKE: + case INTEL_BIGCORE_TIGERLAKE: + case INTEL_BIGCORE_ROCKETLAKE: + case INTEL_BIGCORE_RAPTORLAKE: + case INTEL_BIGCORE_METEORLAKE: + case INTEL_BIGCORE_LUNARLAKE: + case INTEL_BIGCORE_ARROWLAKE: + case INTEL_BIGCORE_PANTHERLAKE: + case INTEL_BIGCORE_SAPPHIRERAPIDS: + case INTEL_BIGCORE_EMERALDRAPIDS: + case INTEL_BIGCORE_GRANITERAPIDS: + /* Default tuned Mixed (bigcore + atom SOC). */ + case INTEL_MIXED_LAKEFIELD: + case INTEL_MIXED_ALDERLAKE: + cpu_features->cachesize_non_temporal_divisor = 2; + goto default_tuning; + } /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER if AVX512ER is available. Don't use AVX512 to avoid lower CPU commit d8a1a1aef7a58b991505b9a1349a40736dec3abf Author: H.J. Lu Date: Sat Apr 12 08:37:29 2025 -0700 x86: Detect Intel Diamond Rapids Detect Intel Diamond Rapids and tune it similar to Intel Granite Rapids. Signed-off-by: H.J. Lu Reviewed-by: Sunil K Pandey (cherry picked from commit de14f1959ee5f9b845a7cae43bee03068b8136f0) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 1a6e694abf..52a2f03bdd 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -542,6 +542,7 @@ enum intel_microarch INTEL_BIGCORE_ARROWLAKE, INTEL_BIGCORE_PANTHERLAKE, INTEL_BIGCORE_GRANITERAPIDS, + INTEL_BIGCORE_DIAMONDRAPIDS, /* Mixed (bigcore + atom SOC). */ INTEL_MIXED_LAKEFIELD, @@ -817,6 +818,16 @@ disable_tsx: break; } } + else if (family == 19) + switch (model) + { + case 0x01: + microarch = INTEL_BIGCORE_DIAMONDRAPIDS; + break; + + default: + break; + } switch (microarch) { @@ -926,6 +937,7 @@ disable_tsx: case INTEL_BIGCORE_SAPPHIRERAPIDS: case INTEL_BIGCORE_EMERALDRAPIDS: case INTEL_BIGCORE_GRANITERAPIDS: + case INTEL_BIGCORE_DIAMONDRAPIDS: /* Default tuned Mixed (bigcore + atom SOC). */ case INTEL_MIXED_LAKEFIELD: case INTEL_MIXED_ALDERLAKE: commit 736e6735053f12181d3d287898dd5fdb9e8baf59 Author: Frank Barrus Date: Wed Dec 4 07:55:02 2024 -0500 pthreads NPTL: lost wakeup fix 2 This fixes the lost wakeup (from a bug in signal stealing) with a change in the usage of g_signals[] in the condition variable internal state. It also completely eliminates the concept and handling of signal stealing, as well as the need for signalers to block to wait for waiters to wake up every time there is a G1/G2 switch. This greatly reduces the average and maximum latency for pthread_cond_signal. The g_signals[] field now contains a signal count that is relative to the current g1_start value. Since it is a 32-bit field, and the LSB is still reserved (though not currently used anymore), it has a 31-bit value that corresponds to the low 31 bits of the sequence number in g1_start. (since g1_start also has an LSB flag, this means bits 31:1 in g_signals correspond to bits 31:1 in g1_start, plus the current signal count) By making the signal count relative to g1_start, there is no longer any ambiguity or A/B/A issue, and thus any checks before blocking, including the futex call itself, are guaranteed not to block if the G1/G2 switch occurs, even if the signal count remains the same. This allows initially safely blocking in G2 until the switch to G1 occurs, and then transitioning from G1 to a new G1 or G2, and always being able to distinguish the state change. This removes the race condition and A/B/A problems that otherwise ocurred if a late (pre-empted) waiter were to resume just as the futex call attempted to block on g_signal since otherwise there was no last opportunity to re-check things like whether the current G1 group was already closed. By fixing these issues, the signal stealing code can be eliminated, since there is no concept of signal stealing anymore. The code to block for all waiters to exit g_refs can also be removed, since any waiters that are still in the g_refs region can be guaranteed to safely wake up and exit. If there are still any left at this time, they are all sent one final futex wakeup to ensure that they are not blocked any longer, but there is no need for the signaller to block and wait for them to wake up and exit the g_refs region. The signal count is then effectively "zeroed" but since it is now relative to g1_start, this is done by advancing it to a new value that can be observed by any pending blocking waiters. Any late waiters can always tell the difference, and can thus just cleanly exit if they are in a stale G1 or G2. They can never steal a signal from the current G1 if they are not in the current G1, since the signal value that has to match in the cmpxchg has the low 31 bits of the g1_start value contained in it, and that's first checked, and then it won't match if there's a G1/G2 change. Note: the 31-bit sequence number used in g_signals is designed to handle wrap-around when checking the signal count, but if the entire 31-bit wraparound (2 billion signals) occurs while there is still a late waiter that has not yet resumed, and it happens to then match the current g1_start low bits, and the pre-emption occurs after the normal "closed group" checks (which are 64-bit) but then hits the futex syscall and signal consuming code, then an A/B/A issue could still result and cause an incorrect assumption about whether it should block. This particular scenario seems unlikely in practice. Note that once awake from the futex, the waiter would notice the closed group before consuming the signal (since that's still a 64-bit check that would not be aliased in the wrap-around in g_signals), so the biggest impact would be blocking on the futex until the next full wakeup from a G1/G2 switch. Signed-off-by: Frank Barrus Reviewed-by: Carlos O'Donell (cherry picked from commit 1db84775f831a1494993ce9c118deaf9537cc50a) diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c index 3487557bb8..4855b8899f 100644 --- a/nptl/pthread_cond_common.c +++ b/nptl/pthread_cond_common.c @@ -201,7 +201,6 @@ static bool __attribute__ ((unused)) __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, unsigned int *g1index, int private) { - const unsigned int maxspin = 0; unsigned int g1 = *g1index; /* If there is no waiter in G2, we don't do anything. The expression may @@ -222,84 +221,46 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, * New waiters arriving concurrently with the group switching will all go into G2 until we atomically make the switch. Waiters existing in G2 are not affected. - * Waiters in G1 will be closed out immediately by setting a flag in - __g_signals, which will prevent waiters from blocking using a futex on - __g_signals and also notifies them that the group is closed. As a - result, they will eventually remove their group reference, allowing us - to close switch group roles. */ - - /* First, set the closed flag on __g_signals. This tells waiters that are - about to wait that they shouldn't do that anymore. This basically - serves as an advance notification of the upcoming change to __g1_start; - waiters interpret it as if __g1_start was larger than their waiter - sequence position. This allows us to change __g1_start after waiting - for all existing waiters with group references to leave, which in turn - makes recovery after stealing a signal simpler because it then can be - skipped if __g1_start indicates that the group is closed (otherwise, - we would have to recover always because waiters don't know how big their - groups are). Relaxed MO is fine. */ - atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1); - - /* Wait until there are no group references anymore. The fetch-or operation - injects us into the modification order of __g_refs; release MO ensures - that waiters incrementing __g_refs after our fetch-or see the previous - changes to __g_signals and to __g1_start that had to happen before we can - switch this G1 and alias with an older group (we have two groups, so - aliasing requires switching group roles twice). Note that nobody else - can have set the wake-request flag, so we do not have to act upon it. - - Also note that it is harmless if older waiters or waiters from this G1 - get a group reference after we have quiesced the group because it will - remain closed for them either because of the closed flag in __g_signals - or the later update to __g1_start. New waiters will never arrive here - but instead continue to go into the still current G2. */ - unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0); - while ((r >> 1) > 0) - { - for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--) - { - /* TODO Back off. */ - r = atomic_load_relaxed (cond->__data.__g_refs + g1); - } - if ((r >> 1) > 0) - { - /* There is still a waiter after spinning. Set the wake-request - flag and block. Relaxed MO is fine because this is just about - this futex word. - - Update r to include the set wake-request flag so that the upcoming - futex_wait only blocks if the flag is still set (otherwise, we'd - violate the basic client-side futex protocol). */ - r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1) | 1; - - if ((r >> 1) > 0) - futex_wait_simple (cond->__data.__g_refs + g1, r, private); - /* Reload here so we eventually see the most recent value even if we - do not spin. */ - r = atomic_load_relaxed (cond->__data.__g_refs + g1); - } - } - /* Acquire MO so that we synchronize with the release operation that waiters - use to decrement __g_refs and thus happen after the waiters we waited - for. */ - atomic_thread_fence_acquire (); + * Waiters in G1 will be closed out immediately by the advancing of + __g_signals to the next "lowseq" (low 31 bits of the new g1_start), + which will prevent waiters from blocking using a futex on + __g_signals since it provides enough signals for all possible + remaining waiters. As a result, they can each consume a signal + and they will eventually remove their group reference. */ /* Update __g1_start, which finishes closing this group. The value we add will never be negative because old_orig_size can only be zero when we switch groups the first time after a condvar was initialized, in which - case G1 will be at index 1 and we will add a value of 1. See above for - why this takes place after waiting for quiescence of the group. + case G1 will be at index 1 and we will add a value of 1. Relaxed MO is fine because the change comes with no additional constraints that others would have to observe. */ __condvar_add_g1_start_relaxed (cond, (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); - /* Now reopen the group, thus enabling waiters to again block using the - futex controlled by __g_signals. Release MO so that observers that see - no signals (and thus can block) also see the write __g1_start and thus - that this is now a new group (see __pthread_cond_wait_common for the - matching acquire MO loads). */ - atomic_store_release (cond->__data.__g_signals + g1, 0); + unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U; + + /* If any waiters still hold group references (and thus could be blocked), + then wake them all up now and prevent any running ones from blocking. + This is effectively a catch-all for any possible current or future + bugs that can allow the group size to reach 0 before all G1 waiters + have been awakened or at least given signals to consume, or any + other case that can leave blocked (or about to block) older waiters.. */ + if ((atomic_fetch_or_release (cond->__data.__g_refs + g1, 0) >> 1) > 0) + { + /* First advance signals to the end of the group (i.e. enough signals + for the entire G1 group) to ensure that waiters which have not + yet blocked in the futex will not block. + Note that in the vast majority of cases, this should never + actually be necessary, since __g_signals will have enough + signals for the remaining g_refs waiters. As an optimization, + we could check this first before proceeding, although that + could still leave the potential for futex lost wakeup bugs + if the signal count was non-zero but the futex wakeup + was somehow lost. */ + atomic_store_release (cond->__data.__g_signals + g1, lowseq); + + futex_wake (cond->__data.__g_signals + g1, INT_MAX, private); + } /* At this point, the old G1 is now a valid new G2 (but not in use yet). No old waiter can neither grab a signal nor acquire a reference without @@ -311,6 +272,10 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, g1 ^= 1; *g1index ^= 1; + /* Now advance the new G1 g_signals to the new lowseq, giving it + an effective signal count of 0 to start. */ + atomic_store_release (cond->__data.__g_signals + g1, lowseq); + /* These values are just observed by signalers, and thus protected by the lock. */ unsigned int orig_size = wseq - (old_g1_start + old_orig_size); diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index 66786c7b90..3d290e39c8 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -238,9 +238,7 @@ __condvar_cleanup_waiting (void *arg) signaled), and a reference count. The group reference count is used to maintain the number of waiters that - are using the group's futex. Before a group can change its role, the - reference count must show that no waiters are using the futex anymore; this - prevents ABA issues on the futex word. + are using the group's futex. To represent which intervals in the waiter sequence the groups cover (and thus also which group slot contains G1 or G2), we use a 64b counter to @@ -300,11 +298,12 @@ __condvar_cleanup_waiting (void *arg) last reference. * Reference count used by waiters concurrently with signalers that have acquired the condvar-internal lock. - __g_signals: The number of signals that can still be consumed. + __g_signals: The number of signals that can still be consumed, relative to + the current g1_start. (i.e. bits 31 to 1 of __g_signals are bits + 31 to 1 of g1_start with the signal count added) * Used as a futex word by waiters. Used concurrently by waiters and signalers. - * LSB is true iff this group has been completely signaled (i.e., it is - closed). + * LSB is currently reserved and 0. __g_size: Waiters remaining in this group (i.e., which have not been signaled yet. * Accessed by signalers and waiters that cancel waiting (both do so only @@ -328,18 +327,6 @@ __condvar_cleanup_waiting (void *arg) sufficient because if a waiter can see a sufficiently large value, it could have also consume a signal in the waiters group. - Waiters try to grab a signal from __g_signals without holding a reference - count, which can lead to stealing a signal from a more recent group after - their own group was already closed. They cannot always detect whether they - in fact did because they do not know when they stole, but they can - conservatively add a signal back to the group they stole from; if they - did so unnecessarily, all that happens is a spurious wake-up. To make this - even less likely, __g1_start contains the index of the current g2 too, - which allows waiters to check if there aliasing on the group slots; if - there wasn't, they didn't steal from the current G1, which means that the - G1 they stole from must have been already closed and they do not need to - fix anything. - It is essential that the last field in pthread_cond_t is __g_signals[1]: The previous condvar used a pointer-sized field in pthread_cond_t, so a PTHREAD_COND_INITIALIZER from that condvar implementation might only @@ -435,6 +422,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, { while (1) { + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + /* Spin-wait first. Note that spinning first without checking whether a timeout passed might lead to what looks like a spurious wake-up even @@ -446,35 +436,45 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, having to compare against the current time seems to be the right choice from a performance perspective for most use cases. */ unsigned int spin = maxspin; - while (signals == 0 && spin > 0) + while (spin > 0 && ((int)(signals - lowseq) < 2)) { /* Check that we are not spinning on a group that's already closed. */ - if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) - goto done; + if (seq < (g1_start >> 1)) + break; /* TODO Back off. */ /* Reload signals. See above for MO. */ signals = atomic_load_acquire (cond->__data.__g_signals + g); + g1_start = __condvar_load_g1_start_relaxed (cond); + lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; spin--; } - /* If our group will be closed as indicated by the flag on signals, - don't bother grabbing a signal. */ - if (signals & 1) - goto done; - - /* If there is an available signal, don't block. */ - if (signals != 0) + if (seq < (g1_start >> 1)) + { + /* If the group is closed already, + then this waiter originally had enough extra signals to + consume, up until the time its group was closed. */ + goto done; + } + + /* If there is an available signal, don't block. + If __g1_start has advanced at all, then we must be in G1 + by now, perhaps in the process of switching back to an older + G2, but in either case we're allowed to consume the available + signal and should not block anymore. */ + if ((int)(signals - lowseq) >= 2) break; /* No signals available after spinning, so prepare to block. We first acquire a group reference and use acquire MO for that so that we synchronize with the dummy read-modify-write in __condvar_quiesce_and_switch_g1 if we read from that. In turn, - in this case this will make us see the closed flag on __g_signals - that designates a concurrent attempt to reuse the group's slot. + in this case this will make us see the advancement of __g_signals + to the upcoming new g1_start that occurs with a concurrent + attempt to reuse the group's slot. We use acquire MO for the __g_signals check to make the __g1_start check work (see spinning above). Note that the group reference acquisition will not mask the @@ -482,15 +482,24 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, an atomic read-modify-write operation and thus extend the release sequence. */ atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); - if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0) - || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))) + signals = atomic_load_acquire (cond->__data.__g_signals + g); + g1_start = __condvar_load_g1_start_relaxed (cond); + lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + + if (seq < (g1_start >> 1)) { - /* Our group is closed. Wake up any signalers that might be - waiting. */ + /* group is closed already, so don't block */ __condvar_dec_grefs (cond, g, private); goto done; } + if ((int)(signals - lowseq) >= 2) + { + /* a signal showed up or G1/G2 switched after we grabbed the refcount */ + __condvar_dec_grefs (cond, g, private); + break; + } + // Now block. struct _pthread_cleanup_buffer buffer; struct _condvar_cleanup_buffer cbuffer; @@ -501,7 +510,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); err = __futex_abstimed_wait_cancelable64 ( - cond->__data.__g_signals + g, 0, clockid, abstime, private); + cond->__data.__g_signals + g, signals, clockid, abstime, private); __pthread_cleanup_pop (&buffer, 0); @@ -524,6 +533,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, signals = atomic_load_acquire (cond->__data.__g_signals + g); } + if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) + goto done; } /* Try to grab a signal. Use acquire MO so that we see an up-to-date value of __g1_start below (see spinning above for a similar case). In @@ -532,69 +543,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, &signals, signals - 2)); - /* We consumed a signal but we could have consumed from a more recent group - that aliased with ours due to being in the same group slot. If this - might be the case our group must be closed as visible through - __g1_start. */ - uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); - if (seq < (g1_start >> 1)) - { - /* We potentially stole a signal from a more recent group but we do not - know which group we really consumed from. - We do not care about groups older than current G1 because they are - closed; we could have stolen from these, but then we just add a - spurious wake-up for the current groups. - We will never steal a signal from current G2 that was really intended - for G2 because G2 never receives signals (until it becomes G1). We - could have stolen a signal from G2 that was conservatively added by a - previous waiter that also thought it stole a signal -- but given that - that signal was added unnecessarily, it's not a problem if we steal - it. - Thus, the remaining case is that we could have stolen from the current - G1, where "current" means the __g1_start value we observed. However, - if the current G1 does not have the same slot index as we do, we did - not steal from it and do not need to undo that. This is the reason - for putting a bit with G2's index into__g1_start as well. */ - if (((g1_start & 1) ^ 1) == g) - { - /* We have to conservatively undo our potential mistake of stealing - a signal. We can stop trying to do that when the current G1 - changes because other spinning waiters will notice this too and - __condvar_quiesce_and_switch_g1 has checked that there are no - futex waiters anymore before switching G1. - Relaxed MO is fine for the __g1_start load because we need to - merely be able to observe this fact and not have to observe - something else as well. - ??? Would it help to spin for a little while to see whether the - current G1 gets closed? This might be worthwhile if the group is - small or close to being closed. */ - unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g); - while (__condvar_load_g1_start_relaxed (cond) == g1_start) - { - /* Try to add a signal. We don't need to acquire the lock - because at worst we can cause a spurious wake-up. If the - group is in the process of being closed (LSB is true), this - has an effect similar to us adding a signal. */ - if (((s & 1) != 0) - || atomic_compare_exchange_weak_relaxed - (cond->__data.__g_signals + g, &s, s + 2)) - { - /* If we added a signal, we also need to add a wake-up on - the futex. We also need to do that if we skipped adding - a signal because the group is being closed because - while __condvar_quiesce_and_switch_g1 could have closed - the group, it might still be waiting for futex waiters to - leave (and one of those waiters might be the one we stole - the signal from, which cause it to block using the - futex). */ - futex_wake (cond->__data.__g_signals + g, 1, private); - break; - } - /* TODO Back off. */ - } - } - } - done: /* Confirm that we have been woken. We do that before acquiring the mutex commit 88d999d840e77c9917f08870094a23ce42294848 Author: Malte Skarupke Date: Wed Dec 4 07:55:22 2024 -0500 nptl: Update comments and indentation for new condvar implementation Some comments were wrong after the most recent commit. This fixes that. Also fixing indentation where it was using spaces instead of tabs. Signed-off-by: Malte Skarupke Reviewed-by: Carlos O'Donell (cherry picked from commit 0cc973160c23bb67f895bc887dd6942d29f8fee3) diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c index 4855b8899f..3475d15123 100644 --- a/nptl/pthread_cond_common.c +++ b/nptl/pthread_cond_common.c @@ -221,8 +221,9 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, * New waiters arriving concurrently with the group switching will all go into G2 until we atomically make the switch. Waiters existing in G2 are not affected. - * Waiters in G1 will be closed out immediately by the advancing of - __g_signals to the next "lowseq" (low 31 bits of the new g1_start), + * Waiters in G1 have already received a signal and been woken. If they + haven't woken yet, they will be closed out immediately by the advancing + of __g_signals to the next "lowseq" (low 31 bits of the new g1_start), which will prevent waiters from blocking using a futex on __g_signals since it provides enough signals for all possible remaining waiters. As a result, they can each consume a signal diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index 3d290e39c8..ad2cee7d59 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -249,7 +249,7 @@ __condvar_cleanup_waiting (void *arg) figure out whether they are in a group that has already been completely signaled (i.e., if the current G1 starts at a later position that the waiter's position). Waiters cannot determine whether they are currently - in G2 or G1 -- but they do not have too because all they are interested in + in G2 or G1 -- but they do not have to because all they are interested in is whether there are available signals, and they always start in G2 (whose group slot they know because of the bit in the waiter sequence. Signalers will simply fill the right group until it is completely signaled and can @@ -412,7 +412,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, } /* Now wait until a signal is available in our group or it is closed. - Acquire MO so that if we observe a value of zero written after group + Acquire MO so that if we observe (signals == lowseq) after group switching in __condvar_quiesce_and_switch_g1, we synchronize with that store and will see the prior update of __g1_start done while switching groups too. */ @@ -422,8 +422,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, { while (1) { - uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); - unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; /* Spin-wait first. Note that spinning first without checking whether a timeout @@ -447,21 +447,21 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, /* Reload signals. See above for MO. */ signals = atomic_load_acquire (cond->__data.__g_signals + g); - g1_start = __condvar_load_g1_start_relaxed (cond); - lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + g1_start = __condvar_load_g1_start_relaxed (cond); + lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; spin--; } - if (seq < (g1_start >> 1)) + if (seq < (g1_start >> 1)) { - /* If the group is closed already, + /* If the group is closed already, then this waiter originally had enough extra signals to consume, up until the time its group was closed. */ goto done; - } + } /* If there is an available signal, don't block. - If __g1_start has advanced at all, then we must be in G1 + If __g1_start has advanced at all, then we must be in G1 by now, perhaps in the process of switching back to an older G2, but in either case we're allowed to consume the available signal and should not block anymore. */ @@ -483,22 +483,23 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, sequence. */ atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); signals = atomic_load_acquire (cond->__data.__g_signals + g); - g1_start = __condvar_load_g1_start_relaxed (cond); - lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + g1_start = __condvar_load_g1_start_relaxed (cond); + lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; - if (seq < (g1_start >> 1)) + if (seq < (g1_start >> 1)) { - /* group is closed already, so don't block */ + /* group is closed already, so don't block */ __condvar_dec_grefs (cond, g, private); goto done; } if ((int)(signals - lowseq) >= 2) { - /* a signal showed up or G1/G2 switched after we grabbed the refcount */ + /* a signal showed up or G1/G2 switched after we grabbed the + refcount */ __condvar_dec_grefs (cond, g, private); break; - } + } // Now block. struct _pthread_cleanup_buffer buffer; @@ -536,10 +537,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) goto done; } - /* Try to grab a signal. Use acquire MO so that we see an up-to-date value - of __g1_start below (see spinning above for a similar case). In - particular, if we steal from a more recent group, we will also see a - more recent __g1_start below. */ + /* Try to grab a signal. See above for MO. (if we do another loop + iteration we need to see the correct value of g1_start) */ while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, &signals, signals - 2)); commit 136a29f9d0a3924828d5a16be82d054637517c95 Author: Malte Skarupke Date: Wed Dec 4 07:55:50 2024 -0500 nptl: Remove unnecessary catch-all-wake in condvar group switch This wake is unnecessary. We only switch groups after every sleeper in a group has been woken. Sure, they may take a while to actually wake up and may still hold a reference, but waking them a second time doesn't speed that up. Instead this just makes the code more complicated and may hide problems. In particular this safety wake wouldn't even have helped with the bug that was fixed by Barrus' patch: The bug there was that pthread_cond_signal would not switch g1 when it should, so we wouldn't even have entered this code path. Signed-off-by: Malte Skarupke Reviewed-by: Carlos O'Donell (cherry picked from commit b42cc6af11062c260c7dfa91f1c89891366fed3e) diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c index 3475d15123..30b8eee149 100644 --- a/nptl/pthread_cond_common.c +++ b/nptl/pthread_cond_common.c @@ -221,13 +221,7 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, * New waiters arriving concurrently with the group switching will all go into G2 until we atomically make the switch. Waiters existing in G2 are not affected. - * Waiters in G1 have already received a signal and been woken. If they - haven't woken yet, they will be closed out immediately by the advancing - of __g_signals to the next "lowseq" (low 31 bits of the new g1_start), - which will prevent waiters from blocking using a futex on - __g_signals since it provides enough signals for all possible - remaining waiters. As a result, they can each consume a signal - and they will eventually remove their group reference. */ + * Waiters in G1 have already received a signal and been woken. */ /* Update __g1_start, which finishes closing this group. The value we add will never be negative because old_orig_size can only be zero when we @@ -240,29 +234,6 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U; - /* If any waiters still hold group references (and thus could be blocked), - then wake them all up now and prevent any running ones from blocking. - This is effectively a catch-all for any possible current or future - bugs that can allow the group size to reach 0 before all G1 waiters - have been awakened or at least given signals to consume, or any - other case that can leave blocked (or about to block) older waiters.. */ - if ((atomic_fetch_or_release (cond->__data.__g_refs + g1, 0) >> 1) > 0) - { - /* First advance signals to the end of the group (i.e. enough signals - for the entire G1 group) to ensure that waiters which have not - yet blocked in the futex will not block. - Note that in the vast majority of cases, this should never - actually be necessary, since __g_signals will have enough - signals for the remaining g_refs waiters. As an optimization, - we could check this first before proceeding, although that - could still leave the potential for futex lost wakeup bugs - if the signal count was non-zero but the futex wakeup - was somehow lost. */ - atomic_store_release (cond->__data.__g_signals + g1, lowseq); - - futex_wake (cond->__data.__g_signals + g1, INT_MAX, private); - } - /* At this point, the old G1 is now a valid new G2 (but not in use yet). No old waiter can neither grab a signal nor acquire a reference without noticing that __g1_start is larger. commit 2a259b6d77dc5bdab5c8f4ee0e69572d5699d4bf Author: Malte Skarupke Date: Wed Dec 4 07:56:13 2024 -0500 nptl: Remove unnecessary quadruple check in pthread_cond_wait pthread_cond_wait was checking whether it was in a closed group no less than four times. Checking once is enough. Here are the four checks: 1. While spin-waiting. This was dead code: maxspin is set to 0 and has been for years. 2. Before deciding to go to sleep, and before incrementing grefs: I kept this 3. After incrementing grefs. There is no reason to think that the group would close while we do an atomic increment. Obviously it could close at any point, but that doesn't mean we have to recheck after every step. This check was equally good as check 2, except it has to do more work. 4. When we find ourselves in a group that has a signal. We only get here after we check that we're not in a closed group. There is no need to check again. The check would only have helped in cases where the compare_exchange in the next line would also have failed. Relying on the compare_exchange is fine. Removing the duplicate checks clarifies the code. Signed-off-by: Malte Skarupke Reviewed-by: Carlos O'Donell (cherry picked from commit 4f7b051f8ee3feff1b53b27a906f245afaa9cee1) diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index ad2cee7d59..cfdd13bb87 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -366,7 +366,6 @@ static __always_inline int __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, clockid_t clockid, const struct __timespec64 *abstime) { - const int maxspin = 0; int err; int result = 0; @@ -425,33 +424,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; - /* Spin-wait first. - Note that spinning first without checking whether a timeout - passed might lead to what looks like a spurious wake-up even - though we should return ETIMEDOUT (e.g., if the caller provides - an absolute timeout that is clearly in the past). However, - (1) spurious wake-ups are allowed, (2) it seems unlikely that a - user will (ab)use pthread_cond_wait as a check for whether a - point in time is in the past, and (3) spinning first without - having to compare against the current time seems to be the right - choice from a performance perspective for most use cases. */ - unsigned int spin = maxspin; - while (spin > 0 && ((int)(signals - lowseq) < 2)) - { - /* Check that we are not spinning on a group that's already - closed. */ - if (seq < (g1_start >> 1)) - break; - - /* TODO Back off. */ - - /* Reload signals. See above for MO. */ - signals = atomic_load_acquire (cond->__data.__g_signals + g); - g1_start = __condvar_load_g1_start_relaxed (cond); - lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; - spin--; - } - if (seq < (g1_start >> 1)) { /* If the group is closed already, @@ -482,24 +454,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, an atomic read-modify-write operation and thus extend the release sequence. */ atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); - signals = atomic_load_acquire (cond->__data.__g_signals + g); - g1_start = __condvar_load_g1_start_relaxed (cond); - lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; - - if (seq < (g1_start >> 1)) - { - /* group is closed already, so don't block */ - __condvar_dec_grefs (cond, g, private); - goto done; - } - - if ((int)(signals - lowseq) >= 2) - { - /* a signal showed up or G1/G2 switched after we grabbed the - refcount */ - __condvar_dec_grefs (cond, g, private); - break; - } // Now block. struct _pthread_cleanup_buffer buffer; @@ -533,9 +487,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, /* Reload signals. See above for MO. */ signals = atomic_load_acquire (cond->__data.__g_signals + g); } - - if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) - goto done; } /* Try to grab a signal. See above for MO. (if we do another loop iteration we need to see the correct value of g1_start) */ commit a2465f4293ecc37ac4650fbd02e517bc6fd801c6 Author: Malte Skarupke Date: Wed Dec 4 07:56:38 2024 -0500 nptl: Remove g_refs from condition variables This variable used to be needed to wait in group switching until all sleepers have confirmed that they have woken. This is no longer needed. Nothing waits on this variable so there is no need to track how many threads are currently asleep in each group. Signed-off-by: Malte Skarupke Reviewed-by: Carlos O'Donell (cherry picked from commit c36fc50781995e6758cae2b6927839d0157f213c) diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index cfdd13bb87..411fc0380b 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -143,23 +143,6 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, } } -/* Wake up any signalers that might be waiting. */ -static void -__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private) -{ - /* Release MO to synchronize-with the acquire load in - __condvar_quiesce_and_switch_g1. */ - if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3) - { - /* Clear the wake-up request flag before waking up. We do not need more - than relaxed MO and it doesn't matter if we apply this for an aliased - group because we wake all futex waiters right after clearing the - flag. */ - atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1); - futex_wake (cond->__data.__g_refs + g, INT_MAX, private); - } -} - /* Clean-up for cancellation of waiters waiting for normal signals. We cancel our registration as a waiter, confirm we have woken up, and re-acquire the mutex. */ @@ -171,8 +154,6 @@ __condvar_cleanup_waiting (void *arg) pthread_cond_t *cond = cbuffer->cond; unsigned g = cbuffer->wseq & 1; - __condvar_dec_grefs (cond, g, cbuffer->private); - __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private); /* FIXME With the current cancellation implementation, it is possible that a thread is cancelled after it has returned from a syscall. This could @@ -327,15 +308,6 @@ __condvar_cleanup_waiting (void *arg) sufficient because if a waiter can see a sufficiently large value, it could have also consume a signal in the waiters group. - It is essential that the last field in pthread_cond_t is __g_signals[1]: - The previous condvar used a pointer-sized field in pthread_cond_t, so a - PTHREAD_COND_INITIALIZER from that condvar implementation might only - initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes - in total instead of the 48 we need). __g_signals[1] is not accessed before - the first group switch (G2 starts at index 0), which will set its value to - zero after a harmless fetch-or whose return value is ignored. This - effectively completes initialization. - Limitations: * This condvar isn't designed to allow for more than @@ -440,21 +412,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, if ((int)(signals - lowseq) >= 2) break; - /* No signals available after spinning, so prepare to block. - We first acquire a group reference and use acquire MO for that so - that we synchronize with the dummy read-modify-write in - __condvar_quiesce_and_switch_g1 if we read from that. In turn, - in this case this will make us see the advancement of __g_signals - to the upcoming new g1_start that occurs with a concurrent - attempt to reuse the group's slot. - We use acquire MO for the __g_signals check to make the - __g1_start check work (see spinning above). - Note that the group reference acquisition will not mask the - release MO when decrementing the reference count because we use - an atomic read-modify-write operation and thus extend the release - sequence. */ - atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); - // Now block. struct _pthread_cleanup_buffer buffer; struct _condvar_cleanup_buffer cbuffer; @@ -471,18 +428,11 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) { - __condvar_dec_grefs (cond, g, private); - /* If we timed out, we effectively cancel waiting. Note that - we have decremented __g_refs before cancellation, so that a - deadlock between waiting for quiescence of our group in - __condvar_quiesce_and_switch_g1 and us trying to acquire - the lock during cancellation is not possible. */ + /* If we timed out, we effectively cancel waiting. */ __condvar_cancel_waiting (cond, seq, g, private); result = err; goto done; } - else - __condvar_dec_grefs (cond, g, private); /* Reload signals. See above for MO. */ signals = atomic_load_acquire (cond->__data.__g_signals + g); diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c index 1336e9c79d..bdcb45c536 100644 --- a/nptl/tst-cond22.c +++ b/nptl/tst-cond22.c @@ -106,13 +106,13 @@ do_test (void) status = 1; } - printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n", + printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u, %u/%u, %u, %u }\n", c.__data.__wseq.__value32.__high, c.__data.__wseq.__value32.__low, c.__data.__g1_start.__value32.__high, c.__data.__g1_start.__value32.__low, - c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], - c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], + c.__data.__g_signals[0], c.__data.__g_size[0], + c.__data.__g_signals[1], c.__data.__g_size[1], c.__data.__g1_orig_size, c.__data.__wrefs); if (pthread_create (&th, NULL, tf, (void *) 1l) != 0) @@ -152,13 +152,13 @@ do_test (void) status = 1; } - printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n", + printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u, %u/%u, %u, %u }\n", c.__data.__wseq.__value32.__high, c.__data.__wseq.__value32.__low, c.__data.__g1_start.__value32.__high, c.__data.__g1_start.__value32.__low, - c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], - c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], + c.__data.__g_signals[0], c.__data.__g_size[0], + c.__data.__g_signals[1], c.__data.__g_size[1], c.__data.__g1_orig_size, c.__data.__wrefs); return status; diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h index df54eef6f7..a3d482f80f 100644 --- a/sysdeps/nptl/bits/thread-shared-types.h +++ b/sysdeps/nptl/bits/thread-shared-types.h @@ -95,8 +95,7 @@ struct __pthread_cond_s { __atomic_wide_counter __wseq; __atomic_wide_counter __g1_start; - unsigned int __g_refs[2] __LOCK_ALIGNMENT; - unsigned int __g_size[2]; + unsigned int __g_size[2] __LOCK_ALIGNMENT; unsigned int __g1_orig_size; unsigned int __wrefs; unsigned int __g_signals[2]; diff --git a/sysdeps/nptl/pthread.h b/sysdeps/nptl/pthread.h index 3d4f4a756c..9af75d6eae 100644 --- a/sysdeps/nptl/pthread.h +++ b/sysdeps/nptl/pthread.h @@ -152,7 +152,7 @@ enum /* Conditional variable handling. */ -#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, {0, 0}, 0, 0, {0, 0} } } +#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0} } } /* Cleanup buffers */ commit fa110993a6390ae5c97dff613ef02b59ec78c5da Author: Malte Skarupke Date: Wed Dec 4 08:03:44 2024 -0500 nptl: Use a single loop in pthread_cond_wait instaed of a nested loop The loop was a little more complicated than necessary. There was only one break statement out of the inner loop, and the outer loop was nearly empty. So just remove the outer loop, moving its code to the one break statement in the inner loop. This allows us to replace all gotos with break statements. Signed-off-by: Malte Skarupke Reviewed-by: Carlos O'Donell (cherry picked from commit 929a4764ac90382616b6a21f099192b2475da674) diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index 411fc0380b..683cb2b133 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -382,17 +382,15 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, return err; } - /* Now wait until a signal is available in our group or it is closed. - Acquire MO so that if we observe (signals == lowseq) after group - switching in __condvar_quiesce_and_switch_g1, we synchronize with that - store and will see the prior update of __g1_start done while switching - groups too. */ - unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); - - do - { + while (1) { + /* Now wait until a signal is available in our group or it is closed. + Acquire MO so that if we observe (signals == lowseq) after group + switching in __condvar_quiesce_and_switch_g1, we synchronize with that + store and will see the prior update of __g1_start done while switching + groups too. */ + unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; @@ -401,7 +399,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, /* If the group is closed already, then this waiter originally had enough extra signals to consume, up until the time its group was closed. */ - goto done; + break; } /* If there is an available signal, don't block. @@ -410,7 +408,16 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, G2, but in either case we're allowed to consume the available signal and should not block anymore. */ if ((int)(signals - lowseq) >= 2) - break; + { + /* Try to grab a signal. See above for MO. (if we do another loop + iteration we need to see the correct value of g1_start) */ + if (atomic_compare_exchange_weak_acquire ( + cond->__data.__g_signals + g, + &signals, signals - 2)) + break; + else + continue; + } // Now block. struct _pthread_cleanup_buffer buffer; @@ -431,19 +438,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, /* If we timed out, we effectively cancel waiting. */ __condvar_cancel_waiting (cond, seq, g, private); result = err; - goto done; + break; } - - /* Reload signals. See above for MO. */ - signals = atomic_load_acquire (cond->__data.__g_signals + g); } - } - /* Try to grab a signal. See above for MO. (if we do another loop - iteration we need to see the correct value of g1_start) */ - while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, - &signals, signals - 2)); - - done: /* Confirm that we have been woken. We do that before acquiring the mutex to allow for execution of pthread_cond_destroy while having acquired the commit afbf0d46850dcd1b626d892ad8fde2162067ddc7 Author: Malte Skarupke Date: Wed Dec 4 08:04:10 2024 -0500 nptl: Fix indentation In my previous change I turned a nested loop into a simple loop. I'm doing the resulting indentation changes in a separate commit to make the diff on the previous commit easier to review. Signed-off-by: Malte Skarupke Reviewed-by: Carlos O'Donell (cherry picked from commit ee6c14ed59d480720721aaacc5fb03213dc153da) diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index 683cb2b133..7fc9dadf15 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -383,65 +383,65 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, } - while (1) - { - /* Now wait until a signal is available in our group or it is closed. - Acquire MO so that if we observe (signals == lowseq) after group - switching in __condvar_quiesce_and_switch_g1, we synchronize with that - store and will see the prior update of __g1_start done while switching - groups too. */ - unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); - uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); - unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; - - if (seq < (g1_start >> 1)) - { - /* If the group is closed already, - then this waiter originally had enough extra signals to - consume, up until the time its group was closed. */ - break; - } - - /* If there is an available signal, don't block. - If __g1_start has advanced at all, then we must be in G1 - by now, perhaps in the process of switching back to an older - G2, but in either case we're allowed to consume the available - signal and should not block anymore. */ - if ((int)(signals - lowseq) >= 2) - { - /* Try to grab a signal. See above for MO. (if we do another loop - iteration we need to see the correct value of g1_start) */ - if (atomic_compare_exchange_weak_acquire ( - cond->__data.__g_signals + g, + while (1) + { + /* Now wait until a signal is available in our group or it is closed. + Acquire MO so that if we observe (signals == lowseq) after group + switching in __condvar_quiesce_and_switch_g1, we synchronize with that + store and will see the prior update of __g1_start done while switching + groups too. */ + unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + + if (seq < (g1_start >> 1)) + { + /* If the group is closed already, + then this waiter originally had enough extra signals to + consume, up until the time its group was closed. */ + break; + } + + /* If there is an available signal, don't block. + If __g1_start has advanced at all, then we must be in G1 + by now, perhaps in the process of switching back to an older + G2, but in either case we're allowed to consume the available + signal and should not block anymore. */ + if ((int)(signals - lowseq) >= 2) + { + /* Try to grab a signal. See above for MO. (if we do another loop + iteration we need to see the correct value of g1_start) */ + if (atomic_compare_exchange_weak_acquire ( + cond->__data.__g_signals + g, &signals, signals - 2)) - break; - else - continue; - } - - // Now block. - struct _pthread_cleanup_buffer buffer; - struct _condvar_cleanup_buffer cbuffer; - cbuffer.wseq = wseq; - cbuffer.cond = cond; - cbuffer.mutex = mutex; - cbuffer.private = private; - __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); - - err = __futex_abstimed_wait_cancelable64 ( - cond->__data.__g_signals + g, signals, clockid, abstime, private); - - __pthread_cleanup_pop (&buffer, 0); - - if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) - { - /* If we timed out, we effectively cancel waiting. */ - __condvar_cancel_waiting (cond, seq, g, private); - result = err; break; - } + else + continue; } + // Now block. + struct _pthread_cleanup_buffer buffer; + struct _condvar_cleanup_buffer cbuffer; + cbuffer.wseq = wseq; + cbuffer.cond = cond; + cbuffer.mutex = mutex; + cbuffer.private = private; + __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); + + err = __futex_abstimed_wait_cancelable64 ( + cond->__data.__g_signals + g, signals, clockid, abstime, private); + + __pthread_cleanup_pop (&buffer, 0); + + if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) + { + /* If we timed out, we effectively cancel waiting. */ + __condvar_cancel_waiting (cond, seq, g, private); + result = err; + break; + } + } + /* Confirm that we have been woken. We do that before acquiring the mutex to allow for execution of pthread_cond_destroy while having acquired the mutex. */ commit 2ad69497346cc20ef4d568108f1de49b2f451c55 Author: Malte Skarupke Date: Wed Dec 4 08:04:54 2024 -0500 nptl: rename __condvar_quiesce_and_switch_g1 This function no longer waits for threads to leave g1, so rename it to __condvar_switch_g1 Signed-off-by: Malte Skarupke Reviewed-by: Carlos O'Donell (cherry picked from commit 4b79e27a5073c02f6bff9aa8f4791230a0ab1867) diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c index aada91639a..38bba17bfc 100644 --- a/nptl/pthread_cond_broadcast.c +++ b/nptl/pthread_cond_broadcast.c @@ -60,7 +60,7 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) cond->__data.__g_size[g1] << 1); cond->__data.__g_size[g1] = 0; - /* We need to wake G1 waiters before we quiesce G1 below. */ + /* We need to wake G1 waiters before we switch G1 below. */ /* TODO Only set it if there are indeed futex waiters. We could also try to move this out of the critical section in cases when G2 is empty (and we don't need to quiesce). */ @@ -69,7 +69,7 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) /* G1 is complete. Step (2) is next unless there are no waiters in G2, in which case we can stop. */ - if (__condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private)) + if (__condvar_switch_g1 (cond, wseq, &g1, private)) { /* Step (3): Send signals to all waiters in the old G2 / new G1. */ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c index 30b8eee149..5044273cc2 100644 --- a/nptl/pthread_cond_common.c +++ b/nptl/pthread_cond_common.c @@ -189,16 +189,15 @@ __condvar_get_private (int flags) return FUTEX_SHARED; } -/* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to - leave G1, converts G1 into a fresh G2, and then switches group roles so that - the former G2 becomes the new G1 ending at the current __wseq value when we - eventually make the switch (WSEQ is just an observation of __wseq by the - signaler). +/* This closes G1 (whose index is in G1INDEX), converts G1 into a fresh G2, + and then switches group roles so that the former G2 becomes the new G1 + ending at the current __wseq value when we eventually make the switch + (WSEQ is just an observation of __wseq by the signaler). If G2 is empty, it will not switch groups because then it would create an empty G1 which would require switching groups again on the next signal. Returns false iff groups were not switched because G2 was empty. */ static bool __attribute__ ((unused)) -__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, +__condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, unsigned int *g1index, int private) { unsigned int g1 = *g1index; @@ -214,8 +213,7 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + cond->__data.__g_size[g1 ^ 1]) == 0) return false; - /* Now try to close and quiesce G1. We have to consider the following kinds - of waiters: + /* We have to consider the following kinds of waiters: * Waiters from less recent groups than G1 are not affected because nothing will change for them apart from __g1_start getting larger. * New waiters arriving concurrently with the group switching will all go @@ -223,12 +221,12 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, are not affected. * Waiters in G1 have already received a signal and been woken. */ - /* Update __g1_start, which finishes closing this group. The value we add - will never be negative because old_orig_size can only be zero when we - switch groups the first time after a condvar was initialized, in which - case G1 will be at index 1 and we will add a value of 1. - Relaxed MO is fine because the change comes with no additional - constraints that others would have to observe. */ + /* Update __g1_start, which closes this group. The value we add will never + be negative because old_orig_size can only be zero when we switch groups + the first time after a condvar was initialized, in which case G1 will be + at index 1 and we will add a value of 1. Relaxed MO is fine because the + change comes with no additional constraints that others would have to + observe. */ __condvar_add_g1_start_relaxed (cond, (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c index 43d6286ecd..f095497142 100644 --- a/nptl/pthread_cond_signal.c +++ b/nptl/pthread_cond_signal.c @@ -69,18 +69,17 @@ ___pthread_cond_signal (pthread_cond_t *cond) bool do_futex_wake = false; /* If G1 is still receiving signals, we put the signal there. If not, we - check if G2 has waiters, and if so, quiesce and switch G1 to the former - G2; if this results in a new G1 with waiters (G2 might have cancellations - already, see __condvar_quiesce_and_switch_g1), we put the signal in the - new G1. */ + check if G2 has waiters, and if so, switch G1 to the former G2; if this + results in a new G1 with waiters (G2 might have cancellations already, + see __condvar_switch_g1), we put the signal in the new G1. */ if ((cond->__data.__g_size[g1] != 0) - || __condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private)) + || __condvar_switch_g1 (cond, wseq, &g1, private)) { /* Add a signal. Relaxed MO is fine because signaling does not need to - establish a happens-before relation (see above). We do not mask the - release-MO store when initializing a group in - __condvar_quiesce_and_switch_g1 because we use an atomic - read-modify-write and thus extend that store's release sequence. */ + establish a happens-before relation (see above). We do not mask the + release-MO store when initializing a group in __condvar_switch_g1 + because we use an atomic read-modify-write and thus extend that + store's release sequence. */ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2); cond->__data.__g_size[g1]--; /* TODO Only set it if there are indeed futex waiters. */ diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index 7fc9dadf15..80bb728211 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -354,8 +354,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, because we do not need to establish any happens-before relation with signalers (see __pthread_cond_signal); modification order alone establishes a total order of waiters/signals. We do need acquire MO - to synchronize with group reinitialization in - __condvar_quiesce_and_switch_g1. */ + to synchronize with group reinitialization in __condvar_switch_g1. */ uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2); /* Find our group's index. We always go into what was G2 when we acquired our position. */ @@ -387,9 +386,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, { /* Now wait until a signal is available in our group or it is closed. Acquire MO so that if we observe (signals == lowseq) after group - switching in __condvar_quiesce_and_switch_g1, we synchronize with that - store and will see the prior update of __g1_start done while switching - groups too. */ + switching in __condvar_switch_g1, we synchronize with that store and + will see the prior update of __g1_start done while switching groups + too. */ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; commit 7f71824b8039b8afc150dd5c881b61faf10675ef Author: Malte Skarupke Date: Wed Dec 4 08:05:40 2024 -0500 nptl: Use all of g1_start and g_signals The LSB of g_signals was unused. The LSB of g1_start was used to indicate which group is G2. This was used to always go to sleep in pthread_cond_wait if a waiter is in G2. A comment earlier in the file says that this is not correct to do: "Waiters cannot determine whether they are currently in G2 or G1 -- but they do not have to because all they are interested in is whether there are available signals" I either would have had to update the comment, or get rid of the check. I chose to get rid of the check. In fact I don't quite know why it was there. There will never be available signals for group G2, so we didn't need the special case. Even if there were, this would just be a spurious wake. This might have caught some cases where the count has wrapped around, but it wouldn't reliably do that, (and even if it did, why would you want to force a sleep in that case?) and we don't support that many concurrent waiters anyway. Getting rid of it allows us to use one more bit, making us more robust to wraparound. Signed-off-by: Malte Skarupke Reviewed-by: Carlos O'Donell (cherry picked from commit 91bb902f58264a2fd50fbce8f39a9a290dd23706) diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c index 38bba17bfc..51afa62adf 100644 --- a/nptl/pthread_cond_broadcast.c +++ b/nptl/pthread_cond_broadcast.c @@ -57,7 +57,7 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) { /* Add as many signals as the remaining size of the group. */ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, - cond->__data.__g_size[g1] << 1); + cond->__data.__g_size[g1]); cond->__data.__g_size[g1] = 0; /* We need to wake G1 waiters before we switch G1 below. */ @@ -73,7 +73,7 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) { /* Step (3): Send signals to all waiters in the old G2 / new G1. */ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, - cond->__data.__g_size[g1] << 1); + cond->__data.__g_size[g1]); cond->__data.__g_size[g1] = 0; /* TODO Only set it if there are indeed futex waiters. */ do_futex_wake = true; diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c index 5044273cc2..389402913c 100644 --- a/nptl/pthread_cond_common.c +++ b/nptl/pthread_cond_common.c @@ -208,9 +208,9 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, behavior. Note that this works correctly for a zero-initialized condvar too. */ unsigned int old_orig_size = __condvar_get_orig_size (cond); - uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; - if (((unsigned) (wseq - old_g1_start - old_orig_size) - + cond->__data.__g_size[g1 ^ 1]) == 0) + uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond); + uint64_t new_g1_start = old_g1_start + old_orig_size; + if (((unsigned) (wseq - new_g1_start) + cond->__data.__g_size[g1 ^ 1]) == 0) return false; /* We have to consider the following kinds of waiters: @@ -221,16 +221,10 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, are not affected. * Waiters in G1 have already received a signal and been woken. */ - /* Update __g1_start, which closes this group. The value we add will never - be negative because old_orig_size can only be zero when we switch groups - the first time after a condvar was initialized, in which case G1 will be - at index 1 and we will add a value of 1. Relaxed MO is fine because the - change comes with no additional constraints that others would have to - observe. */ - __condvar_add_g1_start_relaxed (cond, - (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); - - unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U; + /* Update __g1_start, which closes this group. Relaxed MO is fine because + the change comes with no additional constraints that others would have + to observe. */ + __condvar_add_g1_start_relaxed (cond, old_orig_size); /* At this point, the old G1 is now a valid new G2 (but not in use yet). No old waiter can neither grab a signal nor acquire a reference without @@ -242,13 +236,13 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, g1 ^= 1; *g1index ^= 1; - /* Now advance the new G1 g_signals to the new lowseq, giving it + /* Now advance the new G1 g_signals to the new g1_start, giving it an effective signal count of 0 to start. */ - atomic_store_release (cond->__data.__g_signals + g1, lowseq); + atomic_store_release (cond->__data.__g_signals + g1, (unsigned)new_g1_start); /* These values are just observed by signalers, and thus protected by the lock. */ - unsigned int orig_size = wseq - (old_g1_start + old_orig_size); + unsigned int orig_size = wseq - new_g1_start; __condvar_set_orig_size (cond, orig_size); /* Use and addition to not loose track of cancellations in what was previously G2. */ diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c index f095497142..fa3a5c3d8f 100644 --- a/nptl/pthread_cond_signal.c +++ b/nptl/pthread_cond_signal.c @@ -80,7 +80,7 @@ ___pthread_cond_signal (pthread_cond_t *cond) release-MO store when initializing a group in __condvar_switch_g1 because we use an atomic read-modify-write and thus extend that store's release sequence. */ - atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2); + atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 1); cond->__data.__g_size[g1]--; /* TODO Only set it if there are indeed futex waiters. */ do_futex_wake = true; diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index 80bb728211..0f1dfcb595 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -84,7 +84,7 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, not hold a reference on the group. */ __condvar_acquire_lock (cond, private); - uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); if (g1_start > seq) { /* Our group is closed, so someone provided enough signals for it. @@ -259,7 +259,6 @@ __condvar_cleanup_waiting (void *arg) * Waiters fetch-add while having acquire the mutex associated with the condvar. Signalers load it and fetch-xor it concurrently. __g1_start: Starting position of G1 (inclusive) - * LSB is index of current G2. * Modified by signalers while having acquired the condvar-internal lock and observed concurrently by waiters. __g1_orig_size: Initial size of G1 @@ -280,11 +279,9 @@ __condvar_cleanup_waiting (void *arg) * Reference count used by waiters concurrently with signalers that have acquired the condvar-internal lock. __g_signals: The number of signals that can still be consumed, relative to - the current g1_start. (i.e. bits 31 to 1 of __g_signals are bits - 31 to 1 of g1_start with the signal count added) + the current g1_start. (i.e. g1_start with the signal count added) * Used as a futex word by waiters. Used concurrently by waiters and signalers. - * LSB is currently reserved and 0. __g_size: Waiters remaining in this group (i.e., which have not been signaled yet. * Accessed by signalers and waiters that cancel waiting (both do so only @@ -391,9 +388,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, too. */ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); - unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; - if (seq < (g1_start >> 1)) + if (seq < g1_start) { /* If the group is closed already, then this waiter originally had enough extra signals to @@ -406,13 +402,13 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, by now, perhaps in the process of switching back to an older G2, but in either case we're allowed to consume the available signal and should not block anymore. */ - if ((int)(signals - lowseq) >= 2) + if ((int)(signals - (unsigned int)g1_start) > 0) { /* Try to grab a signal. See above for MO. (if we do another loop iteration we need to see the correct value of g1_start) */ if (atomic_compare_exchange_weak_acquire ( cond->__data.__g_signals + g, - &signals, signals - 2)) + &signals, signals - 1)) break; else continue; commit 8d3dd23e3de8b4c6e4b94f8bbfab971c3b8a55be Author: Florian Weimer Date: Thu Mar 13 06:07:07 2025 +0100 nptl: PTHREAD_COND_INITIALIZER compatibility with pre-2.41 versions (bug 32786) The new initializer and struct layout does not initialize the __g_signals field in the old struct layout before the change in commit c36fc50781995e6758cae2b6927839d0157f213c ("nptl: Remove g_refs from condition variables"). Bring back fields at the end of struct __pthread_cond_s, so that they are again zero-initialized. Reviewed-by: Sam James diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h index a3d482f80f..bccc2003ec 100644 --- a/sysdeps/nptl/bits/thread-shared-types.h +++ b/sysdeps/nptl/bits/thread-shared-types.h @@ -99,6 +99,8 @@ struct __pthread_cond_s unsigned int __g1_orig_size; unsigned int __wrefs; unsigned int __g_signals[2]; + unsigned int __unused_initialized_1; + unsigned int __unused_initialized_2; }; typedef unsigned int __tss_t; diff --git a/sysdeps/nptl/pthread.h b/sysdeps/nptl/pthread.h index 9af75d6eae..e0f24418fe 100644 --- a/sysdeps/nptl/pthread.h +++ b/sysdeps/nptl/pthread.h @@ -152,7 +152,7 @@ enum /* Conditional variable handling. */ -#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0} } } +#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0}, 0, 0 } } /* Cleanup buffers */ commit 33b33e9dd0ff26158b1b83cc4347a39c073e490e Author: Arjun Shankar Date: Fri Oct 18 16:03:25 2024 +0200 libio: Fix a deadlock after fork in popen popen modifies its file handler book-keeping under a lock that wasn't being taken during fork. This meant that a concurrent popen and fork could end up copying the lock in a "locked" state into the fork child, where subsequently calling popen would lead to a deadlock due to the already (spuriously) held lock. This commit fixes the deadlock by appropriately taking the lock before fork, and releasing/resetting it in the parent/child after the fork. A new test for concurrent popen and fork is also added. It consistently hangs (and therefore fails via timeout) without the fix applied. Reviewed-by: Florian Weimer (cherry picked from commit 9f0d2c0ee6c728643fcf9a4879e9f20f5e45ce5f) diff --git a/libio/Makefile b/libio/Makefile index 5292baa4e0..7faba230ac 100644 --- a/libio/Makefile +++ b/libio/Makefile @@ -117,6 +117,7 @@ tests = \ tst-mmap-offend \ tst-mmap-setvbuf \ tst-mmap2-eofsync \ + tst-popen-fork \ tst-popen1 \ tst-setvbuf1 \ tst-sprintf-chk-ub \ diff --git a/libio/iopopen.c b/libio/iopopen.c index d01cb0648e..352513a291 100644 --- a/libio/iopopen.c +++ b/libio/iopopen.c @@ -57,6 +57,26 @@ unlock (void *not_used) } #endif +/* These lock/unlock/resetlock functions are used during fork. */ + +void +_IO_proc_file_chain_lock (void) +{ + _IO_lock_lock (proc_file_chain_lock); +} + +void +_IO_proc_file_chain_unlock (void) +{ + _IO_lock_unlock (proc_file_chain_lock); +} + +void +_IO_proc_file_chain_resetlock (void) +{ + _IO_lock_init (proc_file_chain_lock); +} + /* POSIX states popen shall ensure that any streams from previous popen() calls that remain open in the parent process should be closed in the new child process. diff --git a/libio/libioP.h b/libio/libioP.h index 616253fcd0..a83a411fdf 100644 --- a/libio/libioP.h +++ b/libio/libioP.h @@ -429,6 +429,12 @@ libc_hidden_proto (_IO_list_resetlock) extern void _IO_enable_locks (void) __THROW; libc_hidden_proto (_IO_enable_locks) +/* Functions for operating popen's proc_file_chain_lock during fork. */ + +extern void _IO_proc_file_chain_lock (void) __THROW attribute_hidden; +extern void _IO_proc_file_chain_unlock (void) __THROW attribute_hidden; +extern void _IO_proc_file_chain_resetlock (void) __THROW attribute_hidden; + /* Default jumptable functions. */ extern int _IO_default_underflow (FILE *) __THROW; diff --git a/libio/tst-popen-fork.c b/libio/tst-popen-fork.c new file mode 100644 index 0000000000..1df30fc6c0 --- /dev/null +++ b/libio/tst-popen-fork.c @@ -0,0 +1,80 @@ +/* Test concurrent popen and fork. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +static void +popen_and_pclose (void) +{ + FILE *f = popen ("true", "r"); + TEST_VERIFY_EXIT (f != NULL); + pclose (f); + return; +} + +static atomic_bool done = ATOMIC_VAR_INIT (0); + +static void * +popen_and_pclose_forever (__attribute__ ((unused)) + void *arg) +{ + while (!atomic_load_explicit (&done, memory_order_acquire)) + popen_and_pclose (); + return NULL; +} + +static int +do_test (void) +{ + + /* Repeatedly call popen in a loop during the entire test. */ + pthread_t t = xpthread_create (NULL, popen_and_pclose_forever, NULL); + + /* Repeatedly fork off and reap child processes one-by-one. + Each child calls popen once, then exits, leading to the possibility + that a child forks *during* our own popen call, thus inheriting any + intermediate popen state, possibly including lock state(s). */ + for (int i = 0; i < 100; i++) + { + int cpid = xfork (); + + if (cpid == 0) + { + popen_and_pclose (); + _exit (0); + } + else + xwaitpid (cpid, NULL, 0); + } + + /* Stop calling popen. */ + atomic_store_explicit (&done, 1, memory_order_release); + xpthread_join (t); + + return 0; +} + +#include diff --git a/posix/fork.c b/posix/fork.c index 298765a1ff..cf9b80e7c0 100644 --- a/posix/fork.c +++ b/posix/fork.c @@ -62,6 +62,7 @@ __libc_fork (void) call_function_static_weak (__nss_database_fork_prepare_parent, &nss_database_data); + _IO_proc_file_chain_lock (); _IO_list_lock (); /* Acquire malloc locks. This needs to come last because fork @@ -92,6 +93,7 @@ __libc_fork (void) /* Reset locks in the I/O code. */ _IO_list_resetlock (); + _IO_proc_file_chain_resetlock (); call_function_static_weak (__nss_database_fork_subprocess, &nss_database_data); @@ -121,6 +123,7 @@ __libc_fork (void) /* We execute this even if the 'fork' call failed. */ _IO_list_unlock (); + _IO_proc_file_chain_unlock (); } /* Run the handlers registered for the parent. */ commit 7c3c9ae28685a9142a8cfa3521bbca74c1007d0b Author: Arjun Shankar Date: Fri Oct 25 09:33:45 2024 +0200 libio: Correctly link tst-popen-fork against libpthread tst-popen-fork failed to build for Hurd due to not being linked with libpthread. This commit fixes that. Tested with build-many-glibcs.py for i686-gnu. Reviewed-by: Florian Weimer (cherry picked from commit 6a290b2895b77be839fcb7c44a6a9879560097ad) diff --git a/libio/Makefile b/libio/Makefile index 7faba230ac..f2e98f96eb 100644 --- a/libio/Makefile +++ b/libio/Makefile @@ -142,6 +142,8 @@ tests = \ tst_wscanf \ # tests +$(objpfx)tst-popen-fork: $(shared-thread-library) + tests-internal = tst-vtables tst-vtables-interposed ifeq (yes,$(build-shared)) commit 8667345b83c8ca528a093d4db53f57a1bb1688e4 Author: Florian Weimer Date: Thu Feb 13 21:56:52 2025 +0100 elf: Keep using minimal malloc after early DTV resize (bug 32412) If an auditor loads many TLS-using modules during startup, it is possible to trigger DTV resizing. Previously, the DTV was marked as allocated by the main malloc afterwards, even if the minimal malloc was still in use. With this change, _dl_resize_dtv marks the resized DTV as allocated with the minimal malloc. The new test reuses TLS-using modules from other auditing tests. Reviewed-by: DJ Delorie (cherry picked from commit aa3d7bd5299b33bffc118aa618b59bfa66059bcb) diff --git a/elf/Makefile b/elf/Makefile index dc686c3bff..be64c59887 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -378,6 +378,7 @@ tests += \ tst-align3 \ tst-audit-tlsdesc \ tst-audit-tlsdesc-dlopen \ + tst-audit-tlsdesc-dlopen2 \ tst-audit1 \ tst-audit2 \ tst-audit8 \ @@ -817,6 +818,7 @@ modules-names += \ tst-auditmanymod8 \ tst-auditmanymod9 \ tst-auditmod-tlsdesc \ + tst-auditmod-tlsdesc2 \ tst-auditmod1 \ tst-auditmod11 \ tst-auditmod12 \ @@ -3040,6 +3042,9 @@ $(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so tst-audit-tlsdesc-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-auditmod-tlsdesc.so tst-audit-tlsdesc-dlopen-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so +$(objpfx)tst-audit-tlsdesc-dlopen2.out: $(objpfx)tst-auditmod-tlsdesc2.so \ + $(patsubst %, $(objpfx)%.so, $(tlsmod17a-modules)) +tst-audit-tlsdesc-dlopen2-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc2.so $(objpfx)tst-dlmopen-twice.out: \ $(objpfx)tst-dlmopen-twice-mod1.so \ diff --git a/elf/dl-tls.c b/elf/dl-tls.c index 3d529b722c..b13e752358 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -528,6 +528,13 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) if (newp == NULL) oom (); memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t)); +#ifdef SHARED + /* Auditors can trigger a DTV resize event while the full malloc + is not yet in use. Mark the new DTV allocation as the + initial allocation. */ + if (!__rtld_malloc_is_complete ()) + GL(dl_initial_dtv) = &newp[1]; +#endif } else { diff --git a/elf/tst-audit-tlsdesc-dlopen2.c b/elf/tst-audit-tlsdesc-dlopen2.c new file mode 100644 index 0000000000..7ba2c4129a --- /dev/null +++ b/elf/tst-audit-tlsdesc-dlopen2.c @@ -0,0 +1,46 @@ +/* Loading TLS-using modules from auditors (bug 32412). Main program. + Copyright (C) 2021-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +static int +do_test (void) +{ + puts ("info: start of main program"); + + /* Load TLS-using modules, to trigger DTV resizing. The dynamic + linker will load them again (requiring their own TLS) because the + dlopen calls from the auditor were in the auditing namespace. */ + for (int i = 1; i <= 19; ++i) + { + char dso[30]; + snprintf (dso, sizeof (dso), "tst-tlsmod17a%d.so", i); + char sym[30]; + snprintf (sym, sizeof(sym), "tlsmod17a%d", i); + + void *handle = xdlopen (dso, RTLD_LAZY); + int (*func) (void) = xdlsym (handle, sym); + /* Trigger TLS allocation. */ + func (); + } + + return 0; +} + +#include diff --git a/elf/tst-auditmod-tlsdesc2.c b/elf/tst-auditmod-tlsdesc2.c new file mode 100644 index 0000000000..50275cd34d --- /dev/null +++ b/elf/tst-auditmod-tlsdesc2.c @@ -0,0 +1,59 @@ +/* Loading TLS-using modules from auditors (bug 32412). Audit module. + Copyright (C) 2021-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +unsigned int +la_version (unsigned int version) +{ + /* Open some modules, to trigger DTV resizing before the switch to + the main malloc. */ + for (int i = 1; i <= 19; ++i) + { + char dso[30]; + snprintf (dso, sizeof (dso), "tst-tlsmod17a%d.so", i); + char sym[30]; + snprintf (sym, sizeof(sym), "tlsmod17a%d", i); + + void *handle = dlopen (dso, RTLD_LAZY); + if (handle == NULL) + { + printf ("error: dlmopen from auditor: %s\n", dlerror ()); + fflush (stdout); + _exit (1); + } + int (*func) (void) = dlsym (handle, sym); + if (func == NULL) + { + printf ("error: dlsym from auditor: %s\n", dlerror ()); + fflush (stdout); + _exit (1); + } + /* Trigger TLS allocation. */ + func (); + } + + puts ("info: TLS-using modules loaded from auditor"); + fflush (stdout); + + return LAV_CURRENT; +} commit b3002f303cedb8262cbc1ec22999ea36482efa0e Author: Florian Weimer Date: Tue May 20 19:36:02 2025 +0200 support: Use const char * argument in support_capture_subprogram_self_sgid The function does not modify the passed-in string, so make this clear via the prototype. Reviewed-by: Carlos O'Donell (cherry picked from commit f0c09fe61678df6f7f18fe1ebff074e62fa5ca7a) diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h index 93b7245d2a..5406d9f6c0 100644 --- a/support/capture_subprocess.h +++ b/support/capture_subprocess.h @@ -45,8 +45,7 @@ struct support_capture_subprocess support_capture_subprogram /* Copy the running program into a setgid binary and run it with CHILD_ID argument. If execution is successful, return the exit status of the child program, otherwise return a non-zero failure exit code. */ -int support_capture_subprogram_self_sgid - (char *child_id); +int support_capture_subprogram_self_sgid (const char *child_id); /* Deallocate the subprocess data captured by support_capture_subprocess. */ diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c index 53847194cb..2383481911 100644 --- a/support/support_capture_subprocess.c +++ b/support/support_capture_subprocess.c @@ -110,7 +110,7 @@ support_capture_subprogram (const char *file, char *const argv[], safely make it SGID with the TARGET group ID. Then runs the executable. */ static int -copy_and_spawn_sgid (char *child_id, gid_t gid) +copy_and_spawn_sgid (const char *child_id, gid_t gid) { char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", test_dir, (intmax_t) getpid ()); @@ -182,7 +182,7 @@ copy_and_spawn_sgid (char *child_id, gid_t gid) ret = 0; infd = outfd = -1; - char * const args[] = {execname, child_id, NULL}; + char * const args[] = {execname, (char *) child_id, NULL}; status = support_subprogram_wait (args[0], args); @@ -211,7 +211,7 @@ err: } int -support_capture_subprogram_self_sgid (char *child_id) +support_capture_subprogram_self_sgid (const char *child_id) { gid_t target = 0; const int count = 64; commit 61dcce21e06834f7248a8d516c9ec20788fc728c Author: Florian Weimer Date: Mon Dec 23 13:57:55 2024 +0100 support: Add support_record_failure_barrier This can be used to stop execution after a TEST_COMPARE_BLOB failure, for example. (cherry picked from commit d0b8aa6de4529231fadfe604ac2c434e559c2d9e) diff --git a/support/check.h b/support/check.h index 7ea22c7a2c..8f41e5b99f 100644 --- a/support/check.h +++ b/support/check.h @@ -207,6 +207,9 @@ void support_record_failure_reset (void); failures or not. */ int support_record_failure_is_failed (void); +/* Terminate the process if any failures have been encountered so far. */ +void support_record_failure_barrier (void); + __END_DECLS #endif /* SUPPORT_CHECK_H */ diff --git a/support/support_record_failure.c b/support/support_record_failure.c index 978123701d..72ee2b232f 100644 --- a/support/support_record_failure.c +++ b/support/support_record_failure.c @@ -112,3 +112,13 @@ support_record_failure_is_failed (void) synchronization for reliable test error reporting anyway. */ return __atomic_load_n (&state->failed, __ATOMIC_RELAXED); } + +void +support_record_failure_barrier (void) +{ + if (__atomic_load_n (&state->failed, __ATOMIC_RELAXED)) + { + puts ("error: exiting due to previous errors"); + exit (1); + } +} commit 079ac4a172a8f6ba37acf1e80e57f5042d2c7561 Author: Florian Weimer Date: Tue May 20 19:45:06 2025 +0200 elf: Test case for bug 32976 (CVE-2025-4802) Check that LD_LIBRARY_PATH is ignored for AT_SECURE statically linked binaries, using support_capture_subprogram_self_sgid. Reviewed-by: Carlos O'Donell (cherry picked from commit d8f7a79335b0d861c12c42aec94c04cd5bb181e2) diff --git a/elf/Makefile b/elf/Makefile index be64c59887..afd4eb6fdd 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -266,6 +266,7 @@ tests-static-normal := \ tst-array1-static \ tst-array5-static \ tst-dl-iter-static \ + tst-dlopen-sgid \ tst-dst-static \ tst-env-setuid-static \ tst-getauxval-static \ @@ -859,6 +860,7 @@ modules-names += \ tst-dlmopen-twice-mod1 \ tst-dlmopen-twice-mod2 \ tst-dlmopen1mod \ + tst-dlopen-sgid-mod \ tst-dlopen-tlsreinitmod1 \ tst-dlopen-tlsreinitmod2 \ tst-dlopen-tlsreinitmod3 \ @@ -3153,3 +3155,5 @@ $(objpfx)tst-dlopen-tlsreinit3.out: $(objpfx)tst-auditmod1.so tst-dlopen-tlsreinit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so $(objpfx)tst-dlopen-tlsreinit4.out: $(objpfx)tst-auditmod1.so tst-dlopen-tlsreinit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so + +$(objpfx)tst-dlopen-sgid.out: $(objpfx)tst-dlopen-sgid-mod.so diff --git a/elf/tst-dlopen-sgid-mod.c b/elf/tst-dlopen-sgid-mod.c new file mode 100644 index 0000000000..5eb79eef48 --- /dev/null +++ b/elf/tst-dlopen-sgid-mod.c @@ -0,0 +1 @@ +/* Opening this object should not succeed. */ diff --git a/elf/tst-dlopen-sgid.c b/elf/tst-dlopen-sgid.c new file mode 100644 index 0000000000..47829a405e --- /dev/null +++ b/elf/tst-dlopen-sgid.c @@ -0,0 +1,104 @@ +/* Test case for ignored LD_LIBRARY_PATH in static startug (bug 32976). + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This is the name of our test object. Use a custom module for + testing, so that this object does not get picked up from the system + path. */ +static const char dso_name[] = "tst-dlopen-sgid-mod.so"; + +/* Used to mark the recursive invocation. */ +static const char magic_argument[] = "run-actual-test"; + +static int +do_test (void) +{ +/* Pathname of the directory that receives the shared objects this + test attempts to load. */ + char *libdir = support_create_temp_directory ("tst-dlopen-sgid-"); + + /* This is supposed to be ignored and stripped. */ + TEST_COMPARE (setenv ("LD_LIBRARY_PATH", libdir, 1), 0); + + /* Copy of libc.so.6. */ + { + char *from = xasprintf ("%s/%s", support_objdir_root, LIBC_SO); + char *to = xasprintf ("%s/%s", libdir, LIBC_SO); + add_temp_file (to); + support_copy_file (from, to); + free (to); + free (from); + } + + /* Copy of the test object. */ + { + char *from = xasprintf ("%s/elf/%s", support_objdir_root, dso_name); + char *to = xasprintf ("%s/%s", libdir, dso_name); + add_temp_file (to); + support_copy_file (from, to); + free (to); + free (from); + } + + TEST_COMPARE (support_capture_subprogram_self_sgid (magic_argument), 0); + + free (libdir); + + return 0; +} + +static void +alternative_main (int argc, char **argv) +{ + if (argc == 2 && strcmp (argv[1], magic_argument) == 0) + { + if (getgid () == getegid ()) + /* This can happen if the file system is mounted nosuid. */ + FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", + (intmax_t) getgid ()); + + /* Should be removed due to SGID. */ + TEST_COMPARE_STRING (getenv ("LD_LIBRARY_PATH"), NULL); + + TEST_VERIFY (dlopen (dso_name, RTLD_NOW) == NULL); + { + const char *message = dlerror (); + TEST_COMPARE_STRING (message, + "tst-dlopen-sgid-mod.so:" + " cannot open shared object file:" + " No such file or directory"); + } + + support_record_failure_barrier (); + exit (EXIT_SUCCESS); + } +} + +#define PREPARE alternative_main +#include commit 56e75b810ac39b0e390be5b66397dca0cdfa4d80 Author: Sunil K Pandey Date: Tue May 20 10:07:27 2025 -0700 x86_64: Fix typo in ifunc-impl-list.c. Fix wcsncpy and wcpncpy typo in ifunc-impl-list.c. Reviewed-by: H.J. Lu (cherry picked from commit f2aeb6ff941dccc4c777b5621e77addea6cc076c) diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 0bbb71bbbf..3db45db39b 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -922,7 +922,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (BMI2)), __wcsncpy_avx2) - X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy, + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy, 1, __wcsncpy_generic)) @@ -952,7 +952,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (BMI2)), __wcpncpy_avx2) - X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy, + X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy, 1, __wcpncpy_generic)) commit c8e10f14328518954072df64aafd574e67cfdde5 Author: Florian Weimer Date: Wed May 21 08:43:32 2025 +0200 elf: Fix subprocess status handling for tst-dlopen-sgid (bug 32987) This should really move into support_capture_subprogram_self_sgid. Reviewed-by: Sam James (cherry picked from commit 35fc356fa3b4f485bd3ba3114c9f774e5df7d3c2) diff --git a/NEWS b/NEWS index 7a6985f5dd..4b290ad4bf 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,7 @@ The following bugs are resolved with this release: [32245] glibc -Wstringop-overflow= build failure on hppa [32470] x86: Avoid integer truncation with large cache sizes [32810] Crash on x86-64 if XSAVEC disable via tunable + [32987] elf: Fix subprocess status handling for tst-dlopen-sgid Version 2.40 diff --git a/elf/tst-dlopen-sgid.c b/elf/tst-dlopen-sgid.c index 47829a405e..5688b79f2e 100644 --- a/elf/tst-dlopen-sgid.c +++ b/elf/tst-dlopen-sgid.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include /* This is the name of our test object. Use a custom module for @@ -66,10 +68,16 @@ do_test (void) free (from); } - TEST_COMPARE (support_capture_subprogram_self_sgid (magic_argument), 0); - free (libdir); + int status = support_capture_subprogram_self_sgid (magic_argument); + + if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) + return EXIT_UNSUPPORTED; + + if (!WIFEXITED (status)) + FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); + return 0; } commit 42a5a940c974d02540c8da26d6374c744d148cb9 Author: Carlos O'Donell Date: Wed Jun 11 09:19:17 2025 -0400 ppc64le: Revert "powerpc: Optimized strncmp for power10" (CVE-2025-5745) This reverts commit 23f0d81608d0ca6379894ef81670cf30af7fd081 Reason for revert: Power10 strncmp clobbers non-volatile vector registers (Bug 33060) Tested on ppc64le with no regressions. (cherry picked from commit 63c60101ce7c5eac42be90f698ba02099b41b965) diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S deleted file mode 100644 index d4ba76acae..0000000000 --- a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S +++ /dev/null @@ -1,271 +0,0 @@ -/* Optimized strncmp implementation for PowerPC64/POWER10. - Copyright (C) 2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include - -/* Implements the function - - int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) - - The implementation uses unaligned doubleword access to avoid specialized - code paths depending of data alignment for first 32 bytes and uses - vectorised loops after that. */ - -#ifndef STRNCMP -# define STRNCMP strncmp -#endif - -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define COMPARE_16(vreg1,vreg2,offset) \ - lxv vreg1+32,offset(r3); \ - lxv vreg2+32,offset(r4); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(different); \ - cmpldi cr7,r5,16; \ - ble cr7,L(ret0); \ - addi r5,r5,-16; - -#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ - vcmpnezb. v7,vreg1+1,vreg2+1; \ - bne cr6,L(label1); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(label2); \ - cmpldi cr7,r5,32; \ - ble cr7,L(ret0); \ - addi r5,r5,-32; - -#define TAIL_FIRST_16B(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - cmpld cr7,r5,r6; \ - ble cr7,L(ret0); \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; - -#define TAIL_SECOND_16B(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - addi r0,r6,16; \ - cmpld cr7,r5,r0; \ - ble cr7,L(ret0); \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; - -#define CHECK_N_BYTES(reg1,reg2,len_reg) \ - sldi r6,len_reg,56; \ - lxvl 32+v4,reg1,r6; \ - lxvl 32+v5,reg2,r6; \ - add reg1,reg1,len_reg; \ - add reg2,reg2,len_reg; \ - vcmpnezb v7,v4,v5; \ - vctzlsbb r6,v7; \ - cmpld cr7,r6,len_reg; \ - blt cr7,L(different); \ - cmpld cr7,r5,len_reg; \ - ble cr7,L(ret0); \ - sub r5,r5,len_reg; \ - - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - .machine power9 -ENTRY_TOCLESS (STRNCMP, 4) - /* Check if size is 0. */ - cmpdi cr0,r5,0 - beq cr0,L(ret0) - andi. r7,r3,4095 - andi. r8,r4,4095 - cmpldi cr0,r7,4096-16 - cmpldi cr1,r8,4096-16 - bgt cr0,L(crosses) - bgt cr1,L(crosses) - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - -L(crosses): - andi. r7,r3,15 - subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */ - andi. r9,r4,15 - subfic r8,r9,16 /* r8(nalign2) = 16 - (str2 & 15). */ - cmpld cr7,r7,r8 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) - - /* nalign2 is minimum and s2 pointer is aligned. */ - CHECK_N_BYTES(r3,r4,r8) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r3,63 /* Determine offset into 64B hunk. */ - andi. r8,r3,15 /* The offset into the 16B hunk. */ - neg r7,r3 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - b L(compare_64B_unaligned) - - /* nalign1 is minimum and s1 pointer is aligned. */ -L(nalign1_min): - CHECK_N_BYTES(r3,r4,r7) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r4,63 /* Determine offset into 64B hunk. */ - andi. r8,r4,15 /* The offset into the 16B hunk. */ - neg r7,r4 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - - .p2align 5 -L(compare_64B_unaligned): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - bdnz L(compare_64B_unaligned) - - /* Cross the page boundary of s2, carefully. Only for first - iteration we have to get the count of 64B blocks to be checked. - From second iteration and beyond, loop counter is always 63. */ -L(compare_64_pagecross): - li r11, 63 - mtctr r11 - cmpldi r10,16 - ble L(cross_4) - cmpldi r10,32 - ble L(cross_3) - cmpldi r10,48 - ble L(cross_2) -L(cross_1): - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - b L(compare_64B_unaligned) -L(cross_2): - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - b L(compare_64B_unaligned) -L(cross_3): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - b L(compare_64B_unaligned) -L(cross_4): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - b L(compare_64B_unaligned) - -L(same_aligned): - CHECK_N_BYTES(r3,r4,r7) - /* Align s1 to 32B and adjust s2 address. - Use lxvp only if both s1 and s2 are 32B aligned. */ - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r5,r5,32 - - clrldi r6,r3,59 - subfic r7,r6,32 - add r3,r3,r7 - add r4,r4,r7 - subf r5,r7,r5 - andi. r7,r4,0x1F - beq cr0,L(32B_aligned_loop) - - .p2align 5 -L(16B_aligned_loop): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - b L(16B_aligned_loop) - - /* Calculate and return the difference. */ -L(different): - TAIL_FIRST_16B(v4,v5) - - .p2align 5 -L(32B_aligned_loop): - COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) - addi r3,r3,128 - addi r4,r4,128 - b L(32B_aligned_loop) - -L(tail1): TAIL_FIRST_16B(v15,v17) -L(tail2): TAIL_SECOND_16B(v14,v16) -L(tail3): TAIL_FIRST_16B(v19,v21) -L(tail4): TAIL_SECOND_16B(v18,v20) -L(tail5): TAIL_FIRST_16B(v23,v25) -L(tail6): TAIL_SECOND_16B(v22,v24) -L(tail7): TAIL_FIRST_16B(v27,v29) -L(tail8): TAIL_SECOND_16B(v26,v28) - - .p2align 5 -L(ret0): - li r3,0 - blr - -END(STRNCMP) -libc_hidden_builtin_def(strncmp) diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index b847c19049..a38ff46448 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -34,7 +34,7 @@ ifneq (,$(filter %le,$(config-machine))) sysdep_routines += memchr-power10 memcmp-power10 memcpy-power10 \ memmove-power10 memset-power10 rawmemchr-power9 \ rawmemchr-power10 strcmp-power9 strcmp-power10 \ - strncmp-power9 strncmp-power10 strcpy-power9 stpcpy-power9 \ + strncmp-power9 strcpy-power9 stpcpy-power9 \ strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10 endif CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 2bb47d3527..30fd89e109 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -164,9 +164,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ IFUNC_IMPL (i, name, strncmp, #ifdef __LITTLE_ENDIAN__ - IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX, - __strncmp_power10) IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC, __strncmp_power9) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S deleted file mode 100644 index d7026c12e2..0000000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (C) 2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define STRNCMP __strncmp_power10 - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#include -#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c index a5ed67f766..6178f4a432 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c @@ -29,7 +29,6 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden; extern __typeof (strncmp) __strncmp_power8 attribute_hidden; # ifdef __LITTLE_ENDIAN__ extern __typeof (strncmp) __strncmp_power9 attribute_hidden; -extern __typeof (strncmp) __strncmp_power10 attribute_hidden; # endif # undef strncmp @@ -37,9 +36,6 @@ extern __typeof (strncmp) __strncmp_power10 attribute_hidden; ifunc symbol properly. */ libc_ifunc_redirected (__redirect_strncmp, strncmp, # ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __strncmp_power10 : (hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC) ? __strncmp_power9 : commit 2ad6e55ea5cb23af5af7af35d5f80cd93032f96a Author: Carlos O'Donell Date: Wed Jun 11 09:43:50 2025 -0400 ppc64le: Revert "powerpc: Fix performance issues of strcmp power10" (CVE-2025-5702) This reverts commit 90bcc8721ef82b7378d2b080141228660e862d56 This change is in the chain of the final revert that fixes the CVE i.e. 3367d8e180848030d1646f088759f02b8dfe0d6f Reason for revert: Power10 strcmp clobbers non-volatile vector registers (Bug 33056) Tested on ppc64le with no regressions. (cherry picked from commit c22de63588df7a8a0edceea9bb02534064c9d201) diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S index f0d6732a25..00f1e9c170 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S +++ b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S @@ -62,7 +62,7 @@ lxvl 32+v5,reg2,r0; \ add reg1,reg1,len_reg; \ add reg2,reg2,len_reg; \ - vcmpnezb v7,v4,v5; \ + vcmpnezb. v7,v4,v5; \ vctzlsbb r6,v7; \ cmpld cr7,r6,len_reg; \ blt cr7,L(different); \ @@ -72,110 +72,70 @@ .machine power9 ENTRY_TOCLESS (STRCMP, 4) - andi. r7,r3,4095 - andi. r8,r4,4095 - cmpldi cr0,r7,4096-16 - cmpldi cr1,r8,4096-16 - bgt cr0,L(crosses) - bgt cr1,L(crosses) - COMPARE_16(v4,v5,0) - -L(crosses): - andi. r7,r3,15 - subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */ - andi. r9,r4,15 - subfic r5,r9,16 /* r5(nalign2) = 16 - (str2 & 15). */ - cmpld cr7,r7,r5 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) + li r11,16 + /* eq bit of cr1 used as swap status flag to indicate if + source pointers were swapped. */ + crclr 4*cr1+eq + vspltisb v19,-1 + andi. r7,r3,15 + sub r7,r11,r7 /* r7(nalign1) = 16 - (str1 & 15). */ + andi. r9,r4,15 + sub r5,r11,r9 /* r5(nalign2) = 16 - (str2 & 15). */ + cmpld cr7,r7,r5 + beq cr7,L(same_aligned) + blt cr7,L(nalign1_min) + /* Swap r3 and r4, and r7 and r5 such that r3 and r7 hold the + pointer which is closer to the next 16B boundary so that only + one CHECK_N_BYTES is needed before entering the loop below. */ + mr r8,r4 + mr r4,r3 + mr r3,r8 + mr r12,r7 + mr r7,r5 + mr r5,r12 + crset 4*cr1+eq /* Set bit on swapping source pointers. */ - /* nalign2 is minimum and s2 pointer is aligned. */ - CHECK_N_BYTES(r3,r4,r5) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r3,63 /* Determine offset into 64B hunk. */ - andi. r8,r3,15 /* The offset into the 16B hunk. */ - neg r7,r3 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r3-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - b L(compare_64B_unaligned) - - /* nalign1 is minimum and s1 pointer is aligned. */ + .p2align 5 L(nalign1_min): CHECK_N_BYTES(r3,r4,r7) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r4,63 /* Determine offset into 64B hunk. */ - andi. r8,r4,15 /* The offset into the 16B hunk. */ - neg r7,r4 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 .p2align 5 -L(compare_64B_unaligned): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - bdnz L(compare_64B_unaligned) +L(s1_aligned): + /* r9 and r5 is number of bytes to be read after and before + page boundary correspondingly. */ + sub r5,r5,r7 + subfic r9,r5,16 + /* Now let r7 hold the count of quadwords which can be + checked without crossing a page boundary. quadword offset is + (str2>>4)&0xFF. */ + rlwinm r7,r4,28,0xFF + /* Below check is required only for first iteration. For second + iteration and beyond, the new loop counter is always 255. */ + cmpldi r7,255 + beq L(L3) + /* Get the initial loop count by 255-((str2>>4)&0xFF). */ + subfic r11,r7,255 - /* Cross the page boundary of s2, carefully. Only for first - iteration we have to get the count of 64B blocks to be checked. - From second iteration and beyond, loop counter is always 63. */ -L(compare_64_pagecross): - li r11, 63 + .p2align 5 +L(L1): mtctr r11 - cmpldi r10,16 - ble L(cross_4) - cmpldi r10,32 - ble L(cross_3) - cmpldi r10,48 - ble L(cross_2) -L(cross_1): - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - b L(compare_64B_unaligned) -L(cross_2): - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - b L(compare_64B_unaligned) -L(cross_3): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) + + .p2align 5 +L(L2): + COMPARE_16(v4,v5,0) /* Load 16B blocks using lxv. */ addi r3,r3,16 addi r4,r4,16 - b L(compare_64B_unaligned) -L(cross_4): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 + bdnz L(L2) + /* Cross the page boundary of s2, carefully. */ + + .p2align 5 +L(L3): + CHECK_N_BYTES(r3,r4,r5) CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - b L(compare_64B_unaligned) + li r11,255 /* Load the new loop counter. */ + b L(L1) + .p2align 5 L(same_aligned): CHECK_N_BYTES(r3,r4,r7) /* Align s1 to 32B and adjust s2 address. @@ -208,7 +168,18 @@ L(16B_aligned_loop): /* Calculate and return the difference. */ L(different): - TAIL(v4,v5) + vctzlsbb r6,v7 + vextubrx r5,r6,v4 + vextubrx r4,r6,v5 + bt 4*cr1+eq,L(swapped) + subf r3,r4,r5 + blr + + /* If src pointers were swapped, then swap the + indices and calculate the return value. */ +L(swapped): + subf r3,r5,r4 + blr .p2align 5 L(32B_aligned_loop): commit 672f31b90e501b4ba10ba12ab4c6051f77589912 Author: Carlos O'Donell Date: Wed Jun 11 09:33:45 2025 -0400 ppc64le: Revert "powerpc : Add optimized memchr for POWER10" (Bug 33059) This reverts commit b9182c793caa05df5d697427c0538936e6396d4b Reason for revert: Power10 memchr clobbers v20 vector register (Bug 33059) This is not a security issue, unlike CVE-2025-5745 and CVE-2025-5702. Tested on ppc64le without regression. (cherry picked from commit a7877bb6685300f159fa095c9f50b22b112cddb8) diff --git a/sysdeps/powerpc/powerpc64/le/power10/memchr.S b/sysdeps/powerpc/powerpc64/le/power10/memchr.S deleted file mode 100644 index 53e5716d72..0000000000 --- a/sysdeps/powerpc/powerpc64/le/power10/memchr.S +++ /dev/null @@ -1,315 +0,0 @@ -/* Optimized memchr implementation for POWER10 LE. - Copyright (C) 2021-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include - -# ifndef MEMCHR -# define MEMCHR __memchr -# endif -# define M_VREG_ZERO v20 -# define M_OFF_START_LOOP 256 -# define MEMCHR_SUBTRACT_VECTORS \ - vsububm v4,v4,v18; \ - vsububm v5,v5,v18; \ - vsububm v6,v6,v18; \ - vsububm v7,v7,v18; -# define M_TAIL(vreg,increment) \ - vctzlsbb r4,vreg; \ - cmpld r5,r4; \ - ble L(null); \ - addi r4,r4,increment; \ - add r3,r6,r4; \ - blr - -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define M_VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define M_LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define CHECK16B(vreg,offset,addr,label) \ - lxv vreg+32,offset(addr); \ - vcmpequb. vreg,vreg,v18; \ - bne cr6,L(label); \ - cmpldi r5,16; \ - ble L(null); \ - addi r5,r5,-16; - -/* Load 4 quadwords, merge into one VR for speed and check for NULLs. r6 has # - of bytes already checked. */ -#define CHECK64B(offset,addr,label) \ - M_LXVP(v4+32,offset,addr); \ - M_LXVP(v6+32,offset+32,addr); \ - MEMCHR_SUBTRACT_VECTORS; \ - vminub v14,v4,v5; \ - vminub v15,v6,v7; \ - vminub v16,v14,v15; \ - vcmpequb. v0,v16,M_VREG_ZERO; \ - beq cr6,$+12; \ - li r7,offset; \ - b L(label); \ - cmpldi r5,64; \ - ble L(null); \ - addi r5,r5,-64 - -/* Implements the function - void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]). */ - - .machine power9 - -ENTRY_TOCLESS (MEMCHR) - CALL_MCOUNT 3 - - cmpldi r5,0 - beq L(null) - mr r0,r5 - xori r6,r4,0xff - - mtvsrd v18+32,r4 /* matching char in v18 */ - mtvsrd v19+32,r6 /* non matching char in v19 */ - - vspltb v18,v18,7 /* replicate */ - vspltb v19,v19,7 /* replicate */ - vspltisb M_VREG_ZERO,0 - - /* Next 16B-aligned address. Prepare address for L(aligned). */ - addi r6,r3,16 - clrrdi r6,r6,4 - - /* Align data and fill bytes not loaded with non matching char. */ - lvx v0,0,r3 - lvsr v1,0,r3 - vperm v0,v19,v0,v1 - - vcmpequb. v6,v0,v18 - bne cr6,L(found) - sub r4,r6,r3 - cmpld r5,r4 - ble L(null) - sub r5,r5,r4 - - /* Test up to OFF_START_LOOP-16 bytes in 16B chunks. The main loop is - optimized for longer strings, so checking the first bytes in 16B - chunks benefits a lot small strings. */ - .p2align 5 -L(aligned): - cmpldi r5,0 - beq L(null) - - CHECK16B(v0,0,r6,tail1) - CHECK16B(v1,16,r6,tail2) - CHECK16B(v2,32,r6,tail3) - CHECK16B(v3,48,r6,tail4) - CHECK16B(v4,64,r6,tail5) - CHECK16B(v5,80,r6,tail6) - CHECK16B(v6,96,r6,tail7) - CHECK16B(v7,112,r6,tail8) - CHECK16B(v8,128,r6,tail9) - CHECK16B(v9,144,r6,tail10) - CHECK16B(v10,160,r6,tail11) - CHECK16B(v0,176,r6,tail12) - CHECK16B(v1,192,r6,tail13) - CHECK16B(v2,208,r6,tail14) - CHECK16B(v3,224,r6,tail15) - - cmpdi cr5,r4,0 /* Check if c == 0. This will be useful to - choose how we will perform the main loop. */ - - /* Prepare address for the loop. */ - addi r4,r3,M_OFF_START_LOOP - clrrdi r4,r4,6 - sub r6,r4,r3 - sub r5,r0,r6 - addi r6,r4,128 - - /* If c == 0, use the loop without the vsububm. */ - beq cr5,L(loop) - - /* This is very similar to the block after L(loop), the difference is - that here MEMCHR_SUBTRACT_VECTORS is not empty, and we subtract - each byte loaded by the char we are looking for, this way we can keep - using vminub to merge the results and checking for nulls. */ - .p2align 5 -L(memchr_loop): - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - b L(memchr_loop) - /* Switch to a more aggressive approach checking 64B each time. Use 2 - pointers 128B apart and unroll the loop once to make the pointer - updates and usages separated enough to avoid stalls waiting for - address calculation. */ - .p2align 5 -L(loop): -#undef MEMCHR_SUBTRACT_VECTORS -#define MEMCHR_SUBTRACT_VECTORS /* nothing */ - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - b L(loop) - - .p2align 5 -L(pre_tail_64b): - mr r6,r4 -L(tail_64b): - /* OK, we found a null byte. Let's look for it in the current 64-byte - block and mark it in its corresponding VR. lxvp vx,0(ry) puts the - low 16B bytes into vx+1, and the high into vx, so the order here is - v5, v4, v7, v6. */ - vcmpequb v1,v5,M_VREG_ZERO - vcmpequb v2,v4,M_VREG_ZERO - vcmpequb v3,v7,M_VREG_ZERO - vcmpequb v4,v6,M_VREG_ZERO - - /* Take into account the other 64B blocks we had already checked. */ - add r6,r6,r7 - /* Extract first bit of each byte. */ - M_VEXTRACTBM(r8,v1) - M_VEXTRACTBM(r9,v2) - M_VEXTRACTBM(r10,v3) - M_VEXTRACTBM(r11,v4) - - /* Shift each value into their corresponding position. */ - sldi r9,r9,16 - sldi r10,r10,32 - sldi r11,r11,48 - - /* Merge the results. */ - or r8,r8,r9 - or r9,r10,r11 - or r11,r9,r8 - - cnttzd r0,r11 /* Count trailing zeros before the match. */ - cmpld r5,r0 - ble L(null) - add r3,r6,r0 /* Compute final address. */ - blr - - .p2align 5 -L(tail1): - M_TAIL(v0,0) - - .p2align 5 -L(tail2): - M_TAIL(v1,16) - - .p2align 5 -L(tail3): - M_TAIL(v2,32) - - .p2align 5 -L(tail4): - M_TAIL(v3,48) - - .p2align 5 -L(tail5): - M_TAIL(v4,64) - - .p2align 5 -L(tail6): - M_TAIL(v5,80) - - .p2align 5 -L(tail7): - M_TAIL(v6,96) - - .p2align 5 -L(tail8): - M_TAIL(v7,112) - - .p2align 5 -L(tail9): - M_TAIL(v8,128) - - .p2align 5 -L(tail10): - M_TAIL(v9,144) - - .p2align 5 -L(tail11): - M_TAIL(v10,160) - - .p2align 5 -L(tail12): - M_TAIL(v0,176) - - .p2align 5 -L(tail13): - M_TAIL(v1,192) - - .p2align 5 -L(tail14): - M_TAIL(v2,208) - - .p2align 5 -L(tail15): - M_TAIL(v3,224) - - .p2align 5 -L(found): - vctzlsbb r7,v6 - cmpld r5,r7 - ble L(null) - add r3,r3,r7 - blr - - .p2align 5 -L(null): - li r3,0 - blr - -END (MEMCHR) - -weak_alias (__memchr, memchr) -libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index a38ff46448..fa1107dfd9 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -31,10 +31,10 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ strncase-power8 ifneq (,$(filter %le,$(config-machine))) -sysdep_routines += memchr-power10 memcmp-power10 memcpy-power10 \ - memmove-power10 memset-power10 rawmemchr-power9 \ - rawmemchr-power10 strcmp-power9 strcmp-power10 \ - strncmp-power9 strcpy-power9 stpcpy-power9 \ +sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \ + rawmemchr-power9 rawmemchr-power10 \ + strcmp-power9 strcmp-power10 strncmp-power9 \ + strcpy-power9 stpcpy-power9 \ strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10 endif CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 30fd89e109..9b3e617306 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -226,12 +226,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c. */ IFUNC_IMPL (i, name, memchr, -#ifdef __LITTLE_ENDIAN__ - IFUNC_IMPL_ADD (array, i, memchr, - hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX, - __memchr_power10) -#endif IFUNC_IMPL_ADD (array, i, memchr, hwcap2 & PPC_FEATURE2_ARCH_2_07 && hwcap & PPC_FEATURE_HAS_ALTIVEC, diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S b/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S deleted file mode 100644 index 7d35ef28a9..0000000000 --- a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S +++ /dev/null @@ -1,28 +0,0 @@ -/* Optimized memchr implementation for POWER10/PPC64. - Copyright (C) 2016-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define MEMCHR __memchr_power10 - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) -#undef weak_alias -#define weak_alias(name,alias) - -#include -#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/sysdeps/powerpc/powerpc64/multiarch/memchr.c index 57d23e7b18..b4655dfcaa 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memchr.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memchr.c @@ -25,23 +25,15 @@ extern __typeof (__memchr) __memchr_ppc attribute_hidden; extern __typeof (__memchr) __memchr_power7 attribute_hidden; extern __typeof (__memchr) __memchr_power8 attribute_hidden; -# ifdef __LITTLE_ENDIAN__ -extern __typeof (__memchr) __memchr_power10 attribute_hidden; -# endif /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ libc_ifunc (__memchr, -# ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __memchr_power10 : -# endif - (hwcap2 & PPC_FEATURE2_ARCH_2_07 - && hwcap & PPC_FEATURE_HAS_ALTIVEC) - ? __memchr_power8 : - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __memchr_power7 - : __memchr_ppc); + (hwcap2 & PPC_FEATURE2_ARCH_2_07 + && hwcap & PPC_FEATURE_HAS_ALTIVEC) + ? __memchr_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memchr_power7 + : __memchr_ppc); weak_alias (__memchr, memchr) libc_hidden_builtin_def (memchr) commit 7e12550b8e3a11764a4a9090ce6bd3fc23fc8a8e Author: Carlos O'Donell Date: Mon Jun 16 13:09:57 2025 -0400 ppc64le: Revert "powerpc: Optimized strcmp for power10" (CVE-2025-5702) This reverts commit 3367d8e180848030d1646f088759f02b8dfe0d6f Reason for revert: Power10 strcmp clobbers non-volatile vector registers (Bug 33056) Tested on ppc64le without regression. (cherry picked from commit 15808c77b35319e67ee0dc8f984a9a1a434701bc) diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S deleted file mode 100644 index 00f1e9c170..0000000000 --- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S +++ /dev/null @@ -1,204 +0,0 @@ -/* Optimized strcmp implementation for PowerPC64/POWER10. - Copyright (C) 2021-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ -#include - -#ifndef STRCMP -# define STRCMP strcmp -#endif - -/* Implements the function - int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]). */ - -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define COMPARE_16(vreg1,vreg2,offset) \ - lxv vreg1+32,offset(r3); \ - lxv vreg2+32,offset(r4); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(different); \ - -#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ - vcmpnezb. v7,vreg1+1,vreg2+1; \ - bne cr6,L(label1); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(label2); \ - -#define TAIL(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; \ - -#define CHECK_N_BYTES(reg1,reg2,len_reg) \ - sldi r0,len_reg,56; \ - lxvl 32+v4,reg1,r0; \ - lxvl 32+v5,reg2,r0; \ - add reg1,reg1,len_reg; \ - add reg2,reg2,len_reg; \ - vcmpnezb. v7,v4,v5; \ - vctzlsbb r6,v7; \ - cmpld cr7,r6,len_reg; \ - blt cr7,L(different); \ - - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - - .machine power9 -ENTRY_TOCLESS (STRCMP, 4) - li r11,16 - /* eq bit of cr1 used as swap status flag to indicate if - source pointers were swapped. */ - crclr 4*cr1+eq - vspltisb v19,-1 - andi. r7,r3,15 - sub r7,r11,r7 /* r7(nalign1) = 16 - (str1 & 15). */ - andi. r9,r4,15 - sub r5,r11,r9 /* r5(nalign2) = 16 - (str2 & 15). */ - cmpld cr7,r7,r5 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) - /* Swap r3 and r4, and r7 and r5 such that r3 and r7 hold the - pointer which is closer to the next 16B boundary so that only - one CHECK_N_BYTES is needed before entering the loop below. */ - mr r8,r4 - mr r4,r3 - mr r3,r8 - mr r12,r7 - mr r7,r5 - mr r5,r12 - crset 4*cr1+eq /* Set bit on swapping source pointers. */ - - .p2align 5 -L(nalign1_min): - CHECK_N_BYTES(r3,r4,r7) - - .p2align 5 -L(s1_aligned): - /* r9 and r5 is number of bytes to be read after and before - page boundary correspondingly. */ - sub r5,r5,r7 - subfic r9,r5,16 - /* Now let r7 hold the count of quadwords which can be - checked without crossing a page boundary. quadword offset is - (str2>>4)&0xFF. */ - rlwinm r7,r4,28,0xFF - /* Below check is required only for first iteration. For second - iteration and beyond, the new loop counter is always 255. */ - cmpldi r7,255 - beq L(L3) - /* Get the initial loop count by 255-((str2>>4)&0xFF). */ - subfic r11,r7,255 - - .p2align 5 -L(L1): - mtctr r11 - - .p2align 5 -L(L2): - COMPARE_16(v4,v5,0) /* Load 16B blocks using lxv. */ - addi r3,r3,16 - addi r4,r4,16 - bdnz L(L2) - /* Cross the page boundary of s2, carefully. */ - - .p2align 5 -L(L3): - CHECK_N_BYTES(r3,r4,r5) - CHECK_N_BYTES(r3,r4,r9) - li r11,255 /* Load the new loop counter. */ - b L(L1) - - .p2align 5 -L(same_aligned): - CHECK_N_BYTES(r3,r4,r7) - /* Align s1 to 32B and adjust s2 address. - Use lxvp only if both s1 and s2 are 32B aligned. */ - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - - clrldi r6,r3,59 - subfic r5,r6,32 - add r3,r3,r5 - add r4,r4,r5 - andi. r5,r4,0x1F - beq cr0,L(32B_aligned_loop) - - .p2align 5 -L(16B_aligned_loop): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - b L(16B_aligned_loop) - - /* Calculate and return the difference. */ -L(different): - vctzlsbb r6,v7 - vextubrx r5,r6,v4 - vextubrx r4,r6,v5 - bt 4*cr1+eq,L(swapped) - subf r3,r4,r5 - blr - - /* If src pointers were swapped, then swap the - indices and calculate the return value. */ -L(swapped): - subf r3,r5,r4 - blr - - .p2align 5 -L(32B_aligned_loop): - COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) - addi r3,r3,128 - addi r4,r4,128 - b L(32B_aligned_loop) - -L(tail1): TAIL(v15,v17) -L(tail2): TAIL(v14,v16) -L(tail3): TAIL(v19,v21) -L(tail4): TAIL(v18,v20) -L(tail5): TAIL(v23,v25) -L(tail6): TAIL(v22,v24) -L(tail7): TAIL(v27,v29) -L(tail8): TAIL(v26,v28) - -END (STRCMP) -libc_hidden_builtin_def (strcmp) diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index fa1107dfd9..9f15f3207f 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -33,8 +33,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ ifneq (,$(filter %le,$(config-machine))) sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \ rawmemchr-power9 rawmemchr-power10 \ - strcmp-power9 strcmp-power10 strncmp-power9 \ - strcpy-power9 stpcpy-power9 \ + strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 \ strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10 endif CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 9b3e617306..78443b7f34 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -377,10 +377,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */ IFUNC_IMPL (i, name, strcmp, #ifdef __LITTLE_ENDIAN__ - IFUNC_IMPL_ADD (array, i, strcmp, - (hwcap2 & PPC_FEATURE2_ARCH_3_1) - && (hwcap & PPC_FEATURE_HAS_VSX), - __strcmp_power10) IFUNC_IMPL_ADD (array, i, strcmp, hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC, diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S deleted file mode 100644 index 1a9f6069f5..0000000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S +++ /dev/null @@ -1,26 +0,0 @@ -/* Optimized strcmp implementation for POWER10/PPC64. - Copyright (C) 2021-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define STRCMP __strcmp_power10 - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#include -#endif /* __LITTLE_ENDIAN__ && IS_IN (libc) */ diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c index ff32496fab..06b9b4090f 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c @@ -29,16 +29,12 @@ extern __typeof (strcmp) __strcmp_power7 attribute_hidden; extern __typeof (strcmp) __strcmp_power8 attribute_hidden; # ifdef __LITTLE_ENDIAN__ extern __typeof (strcmp) __strcmp_power9 attribute_hidden; -extern __typeof (strcmp) __strcmp_power10 attribute_hidden; # endif # undef strcmp libc_ifunc_redirected (__redirect_strcmp, strcmp, # ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __strcmp_power10 : (hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC) ? __strcmp_power9 : commit 23a02e382c8ffebfed00a082d8898f1aa468b5da Author: Florian Weimer Date: Wed May 21 16:47:34 2025 +0200 support: Pick group in support_capture_subprogram_self_sgid if UID == 0 When running as root, it is likely that we can run under any group. Pick a harmless group from /etc/group in this case. Reviewed-by: Carlos O'Donell (cherry picked from commit 2f769cec448d84a62b7dd0d4ff56978fe22c0cd6) diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c index 2383481911..1cb344eb04 100644 --- a/support/support_capture_subprocess.c +++ b/support/support_capture_subprocess.c @@ -21,7 +21,11 @@ #include #include +#include +#include +#include #include +#include #include #include #include @@ -210,10 +214,48 @@ err: return status; } +/* Returns true if a group with NAME has been found, and writes its + GID to *TARGET. */ +static bool +find_sgid_group (gid_t *target, const char *name) +{ + /* Do not use getgrname_r because it does not work in statically + linked binaries if the system libc is different. */ + FILE *fp = fopen ("/etc/group", "rce"); + if (fp == NULL) + return false; + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + bool ok = false; + struct scratch_buffer buf; + scratch_buffer_init (&buf); + while (true) + { + struct group grp; + struct group *result = NULL; + int status = fgetgrent_r (fp, &grp, buf.data, buf.length, &result); + if (status == 0 && result != NULL) + { + if (strcmp (result->gr_name, name) == 0) + { + *target = result->gr_gid; + ok = true; + break; + } + } + else if (errno != ERANGE) + break; + else if (!scratch_buffer_grow (&buf)) + break; + } + scratch_buffer_free (&buf); + fclose (fp); + return ok; +} + int support_capture_subprogram_self_sgid (const char *child_id) { - gid_t target = 0; const int count = 64; gid_t groups[count]; @@ -225,6 +267,7 @@ support_capture_subprogram_self_sgid (const char *child_id) (intmax_t) getuid ()); gid_t current = getgid (); + gid_t target = current; for (int i = 0; i < ret; ++i) { if (groups[i] != current) @@ -234,9 +277,16 @@ support_capture_subprogram_self_sgid (const char *child_id) } } - if (target == 0) - FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n", - (intmax_t) getuid ()); + if (target == current) + { + /* If running as root, try to find a harmless group for SGID. */ + if (getuid () != 0 + || (!find_sgid_group (&target, "nogroup") + && !find_sgid_group (&target, "bin") + && !find_sgid_group (&target, "daemon"))) + FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n", + (intmax_t) getuid ()); + } return copy_and_spawn_sgid (child_id, target); } commit dbc83657e290bdad3245259be80fb84cbe10304c Author: Florian Weimer Date: Thu May 22 14:36:37 2025 +0200 Fix error reporting (false negatives) in SGID tests And simplify the interface of support_capture_subprogram_self_sgid. Use the existing framework for temporary directories (now with mode 0700) and directory/file deletion. Handle all execution errors within support_capture_subprogram_self_sgid. In particular, this includes test failures because the invoked program did not exit with exit status zero. Existing tests that expect exit status 42 are adjusted to use zero instead. In addition, fix callers not to call exit (0) with test failures pending (which may mask them, especially when running with --direct). Fixes commit 35fc356fa3b4f485bd3ba3114c9f774e5df7d3c2 ("elf: Fix subprocess status handling for tst-dlopen-sgid (bug 32987)"). Reviewed-by: Carlos O'Donell (cherry picked from commit 3a3fb2ed83f79100c116c824454095ecfb335ad7) diff --git a/elf/tst-dlopen-sgid.c b/elf/tst-dlopen-sgid.c index 5688b79f2e..8aec52e19f 100644 --- a/elf/tst-dlopen-sgid.c +++ b/elf/tst-dlopen-sgid.c @@ -70,13 +70,7 @@ do_test (void) free (libdir); - int status = support_capture_subprogram_self_sgid (magic_argument); - - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - return EXIT_UNSUPPORTED; - - if (!WIFEXITED (status)) - FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); + support_capture_subprogram_self_sgid (magic_argument); return 0; } diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index a47219047f..233eec7631 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -105,10 +105,7 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - - /* Special return code to make sure that the child executed all the way - through. */ - exit (42); + return 0; } else { @@ -127,18 +124,7 @@ do_test (int argc, char **argv) continue; } - int status = support_capture_subprogram_self_sgid (buf); - - /* Bail out early if unsupported. */ - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - return EXIT_UNSUPPORTED; - - if (WEXITSTATUS (status) != 42) - { - printf (" [%d] child failed with status %d\n", i, - WEXITSTATUS (status)); - support_record_failure (); - } + support_capture_subprogram_self_sgid (buf); } return 0; } diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c index 59f2ffeb88..ee3f058468 100644 --- a/elf/tst-env-setuid.c +++ b/elf/tst-env-setuid.c @@ -147,10 +147,7 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - - /* Special return code to make sure that the child executed all the way - through. */ - exit (42); + return 0; } else { @@ -174,17 +171,7 @@ do_test (int argc, char **argv) free (profilepath); } - int status = support_capture_subprogram_self_sgid (SETGID_CHILD); - - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - exit (EXIT_UNSUPPORTED); - - if (WEXITSTATUS (status) != 42) - { - printf (" child failed with status %d\n", - WEXITSTATUS (status)); - support_record_failure (); - } + support_capture_subprogram_self_sgid (SETGID_CHILD); return 0; } diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c index cc26ed6d15..cefee58d46 100644 --- a/stdlib/tst-secure-getenv.c +++ b/stdlib/tst-secure-getenv.c @@ -57,13 +57,7 @@ do_test (void) exit (1); } - int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT); - - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - return EXIT_UNSUPPORTED; - - if (!WIFEXITED (status)) - FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); + support_capture_subprogram_self_sgid (MAGIC_ARGUMENT); return 0; } @@ -82,6 +76,7 @@ alternative_main (int argc, char **argv) if (secure_getenv ("PATH") != NULL) FAIL_EXIT (4, "PATH variable not filtered out\n"); + support_record_failure_barrier (); exit (EXIT_SUCCESS); } } diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h index 5406d9f6c0..57bb941e7d 100644 --- a/support/capture_subprocess.h +++ b/support/capture_subprocess.h @@ -42,10 +42,12 @@ struct support_capture_subprocess support_capture_subprocess struct support_capture_subprocess support_capture_subprogram (const char *file, char *const argv[], char *const envp[]); -/* Copy the running program into a setgid binary and run it with CHILD_ID - argument. If execution is successful, return the exit status of the child - program, otherwise return a non-zero failure exit code. */ -int support_capture_subprogram_self_sgid (const char *child_id); +/* Copy the running program into a setgid binary and run it with + CHILD_ID argument. If the program exits with a non-zero status, + exit with that exit status (or status 1 if the program did not exit + normally). If the test cannot be performed, exit with + EXIT_UNSUPPORTED. */ +void support_capture_subprogram_self_sgid (const char *child_id); /* Deallocate the subprocess data captured by support_capture_subprocess. */ diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c index 1cb344eb04..cbc6951064 100644 --- a/support/support_capture_subprocess.c +++ b/support/support_capture_subprocess.c @@ -31,6 +31,7 @@ #include #include #include +#include #include static void @@ -113,105 +114,44 @@ support_capture_subprogram (const char *file, char *const argv[], /* Copies the executable into a restricted directory, so that we can safely make it SGID with the TARGET group ID. Then runs the executable. */ -static int +static void copy_and_spawn_sgid (const char *child_id, gid_t gid) { - char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", - test_dir, (intmax_t) getpid ()); + char *dirname = support_create_temp_directory ("tst-glibc-sgid-"); char *execname = xasprintf ("%s/bin", dirname); - int infd = -1; - int outfd = -1; - int ret = 1, status = 1; - - TEST_VERIFY (mkdir (dirname, 0700) == 0); - if (support_record_failure_is_failed ()) - goto err; + add_temp_file (execname); - infd = open ("/proc/self/exe", O_RDONLY); - if (infd < 0) + if (access ("/proc/self/exe", R_OK) != 0) FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n"); - outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); - TEST_VERIFY (outfd >= 0); - if (support_record_failure_is_failed ()) - goto err; - - char buf[4096]; - for (;;) - { - ssize_t rdcount = read (infd, buf, sizeof (buf)); - TEST_VERIFY (rdcount >= 0); - if (support_record_failure_is_failed ()) - goto err; - if (rdcount == 0) - break; - char *p = buf; - char *end = buf + rdcount; - while (p != end) - { - ssize_t wrcount = write (outfd, buf, end - p); - if (wrcount == 0) - errno = ENOSPC; - TEST_VERIFY (wrcount > 0); - if (support_record_failure_is_failed ()) - goto err; - p += wrcount; - } - } + support_copy_file ("/proc/self/exe", execname); - bool chowned = false; - TEST_VERIFY ((chowned = fchown (outfd, getuid (), gid) == 0) - || errno == EPERM); - if (support_record_failure_is_failed ()) - goto err; - else if (!chowned) - { - ret = 77; - goto err; - } + if (chown (execname, getuid (), gid) != 0) + FAIL_UNSUPPORTED ("cannot change group of \"%s\" to %jd: %m", + execname, (intmax_t) gid); - TEST_VERIFY (fchmod (outfd, 02750) == 0); - if (support_record_failure_is_failed ()) - goto err; - TEST_VERIFY (close (outfd) == 0); - if (support_record_failure_is_failed ()) - goto err; - TEST_VERIFY (close (infd) == 0); - if (support_record_failure_is_failed ()) - goto err; + if (chmod (execname, 02750) != 0) + FAIL_UNSUPPORTED ("cannot make \"%s\" SGID: %m ", execname); /* We have the binary, now spawn the subprocess. Avoid using support_subprogram because we only want the program exit status, not the contents. */ - ret = 0; - infd = outfd = -1; char * const args[] = {execname, (char *) child_id, NULL}; + int status = support_subprogram_wait (args[0], args); - status = support_subprogram_wait (args[0], args); + free (execname); + free (dirname); -err: - if (outfd >= 0) - close (outfd); - if (infd >= 0) - close (infd); - if (execname != NULL) - { - unlink (execname); - free (execname); - } - if (dirname != NULL) + if (WIFEXITED (status)) { - rmdir (dirname); - free (dirname); + if (WEXITSTATUS (status) == 0) + return; + else + exit (WEXITSTATUS (status)); } - - if (ret == 77) - FAIL_UNSUPPORTED ("Failed to make sgid executable for test\n"); - if (ret != 0) - FAIL_EXIT1 ("Failed to make sgid executable for test\n"); - - return status; + else + FAIL_EXIT1 ("subprogram failed with status %d", status); } /* Returns true if a group with NAME has been found, and writes its @@ -253,7 +193,7 @@ find_sgid_group (gid_t *target, const char *name) return ok; } -int +void support_capture_subprogram_self_sgid (const char *child_id) { const int count = 64; @@ -288,7 +228,7 @@ support_capture_subprogram_self_sgid (const char *child_id) (intmax_t) getuid ()); } - return copy_and_spawn_sgid (child_id, target); + copy_and_spawn_sgid (child_id, target); } void commit 2eb180377b96771b8368b0915669c8c7b267e739 Author: Florian Weimer Date: Mon Jul 21 21:43:49 2025 +0200 posix: Fix double-free after allocation failure in regcomp (bug 33185) If a memory allocation failure occurs during bracket expression parsing in regcomp, a double-free error may result. Reported-by: Anastasia Belova Co-authored-by: Paul Eggert Reviewed-by: Andreas K. Huettel (cherry picked from commit 7ea06e994093fa0bcca0d0ee2c1db271d8d7885d) diff --git a/NEWS b/NEWS index 4b290ad4bf..253b07ae99 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,7 @@ The following bugs are resolved with this release: [32470] x86: Avoid integer truncation with large cache sizes [32810] Crash on x86-64 if XSAVEC disable via tunable [32987] elf: Fix subprocess status handling for tst-dlopen-sgid + [33185] Fix double-free after allocation failure in regcomp Version 2.40 diff --git a/posix/Makefile b/posix/Makefile index 2c598cd20a..830278a423 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -303,6 +303,7 @@ tests := \ tst-posix_spawn-setsid \ tst-preadwrite \ tst-preadwrite64 \ + tst-regcomp-bracket-free \ tst-regcomp-truncated \ tst-regex \ tst-regex2 \ diff --git a/posix/regcomp.c b/posix/regcomp.c index 5380d3c7b9..6595bb3c0d 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -3384,6 +3384,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, { #ifdef RE_ENABLE_I18N free_charset (mbcset); + mbcset = NULL; #endif /* Build a tree for simple bracket. */ br_token.type = SIMPLE_BRACKET; @@ -3399,7 +3400,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, parse_bracket_exp_free_return: re_free (sbcset); #ifdef RE_ENABLE_I18N - free_charset (mbcset); + if (__glibc_likely (mbcset != NULL)) + free_charset (mbcset); #endif /* RE_ENABLE_I18N */ return NULL; } diff --git a/posix/tst-regcomp-bracket-free.c b/posix/tst-regcomp-bracket-free.c new file mode 100644 index 0000000000..3c091d8c44 --- /dev/null +++ b/posix/tst-regcomp-bracket-free.c @@ -0,0 +1,176 @@ +/* Test regcomp bracket parsing with injected allocation failures (bug 33185). + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This test invokes regcomp multiple times, failing one memory + allocation in each call. The function call should fail with + REG_ESPACE (or succeed if it can recover from the allocation + failure). Previously, there was double-free bug. */ + +#include +#include +#include +#include +#include +#include +#include + +/* Data structure allocated via MAP_SHARED, so that writes from the + subprocess are visible. */ +struct shared_data +{ + /* Number of tracked allocations performed so far. */ + volatile unsigned int allocation_count; + + /* If this number is reached, one allocation fails. */ + volatile unsigned int failing_allocation; + + /* The subprocess stores the expected name here. */ + char name[100]; +}; + +/* Allocation count in shared mapping. */ +static struct shared_data *shared; + +/* Returns true if a failure should be injected for this allocation. */ +static bool +fail_this_allocation (void) +{ + if (shared != NULL) + { + unsigned int count = shared->allocation_count; + shared->allocation_count = count + 1; + return count == shared->failing_allocation; + } + else + return false; +} + +/* Failure-injecting wrappers for allocation functions used by glibc. */ + +void * +malloc (size_t size) +{ + if (fail_this_allocation ()) + { + errno = ENOMEM; + return NULL; + } + extern __typeof (malloc) __libc_malloc; + return __libc_malloc (size); +} + +void * +calloc (size_t a, size_t b) +{ + if (fail_this_allocation ()) + { + errno = ENOMEM; + return NULL; + } + extern __typeof (calloc) __libc_calloc; + return __libc_calloc (a, b); +} + +void * +realloc (void *ptr, size_t size) +{ + if (fail_this_allocation ()) + { + errno = ENOMEM; + return NULL; + } + extern __typeof (realloc) __libc_realloc; + return __libc_realloc (ptr, size); +} + +/* No-op subprocess to verify that support_isolate_in_subprocess does + not perform any heap allocations. */ +static void +no_op (void *ignored) +{ +} + +/* Perform a regcomp call in a subprocess. Used to count its + allocations. */ +static void +initialize (void *regexp1) +{ + const char *regexp = regexp1; + + shared->allocation_count = 0; + + regex_t reg; + TEST_COMPARE (regcomp (®, regexp, 0), 0); +} + +/* Perform regcomp in a subprocess with fault injection. */ +static void +test_in_subprocess (void *regexp1) +{ + const char *regexp = regexp1; + unsigned int inject_at = shared->failing_allocation; + + regex_t reg; + int ret = regcomp (®, regexp, 0); + + if (ret != 0) + { + TEST_COMPARE (ret, REG_ESPACE); + printf ("info: allocation %u failure results in return value %d," + " error %s (%d)\n", + inject_at, ret, strerrorname_np (errno), errno); + } +} + +static int +do_test (void) +{ + char regexp[] = "[:alpha:]"; + + shared = support_shared_allocate (sizeof (*shared)); + + /* Disable fault injection. */ + shared->failing_allocation = ~0U; + + support_isolate_in_subprocess (no_op, NULL); + TEST_COMPARE (shared->allocation_count, 0); + + support_isolate_in_subprocess (initialize, regexp); + + /* The number of allocations in the successful case, plus some + slack. Once the number of expected allocations is exceeded, + injecting further failures does not make a difference. */ + unsigned int maximum_allocation_count = shared->allocation_count; + printf ("info: successful call performs %u allocations\n", + maximum_allocation_count); + maximum_allocation_count += 10; + + for (unsigned int inject_at = 0; inject_at <= maximum_allocation_count; + ++inject_at) + { + shared->allocation_count = 0; + shared->failing_allocation = inject_at; + support_isolate_in_subprocess (test_in_subprocess, regexp); + } + + support_shared_free (shared); + + return 0; +} + +#include