diff --git a/ADVISORIES b/ADVISORIES new file mode 100644 index 0000000000..d4e33f2df3 --- /dev/null +++ b/ADVISORIES @@ -0,0 +1,2 @@ +For the GNU C Library Security Advisories, see the git master branch: +https://sourceware.org/git/?p=glibc.git;a=tree;f=advisories;hb=HEAD diff --git a/NEWS b/NEWS index 31281ac408..595998fc6e 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,33 @@ See the end for copying conditions. Please send GNU C library bug reports via using `glibc' in the "product" field. +Version 2.40.1 + +The following bugs are resolved with this release: + + [19341] ctype: Fallback initialization of TLS using relocations + [27821] ungetc: Fix backup buffer leak on program exit + [30081] resolv: Do not wait for non-existing second DNS response after error + [31394] clone on sparc might fail with -EFAULT for no valid reason + [31717] elf: Avoid re-initializing already allocated TLS in dlopen + [31890] resolv: Allow short error responses to match any DNS query + [31943] _dl_find_object can fail if ld.so contains gaps between load segments + [31968] mremap implementation in C does not handle arguments correctly + [32026] strerror/strsignal TLS not handled correctly for secondary namespaces + [32052] Name space violation in fortify wrappers + [32137] libio: Attempt wide backup free only for non-legacy code + [32214] Fix missing randomness in __gen_tempname + [32231] elf: Change ldconfig auxcache magic number + [32245] glibc -Wstringop-overflow= build failure on hppa + [32470] x86: Avoid integer truncation with large cache sizes + [32483] ctype macros segfault in multithreaded programs with multiple libc.so + [32810] Crash on x86-64 if XSAVEC disable via tunable + [32987] elf: Fix subprocess status handling for tst-dlopen-sgid + [33185] Fix double-free after allocation failure in regcomp + [33234] Use TLS initial-exec model for __libc_tsd_CTYPE_* thread variables + [33361] nss: Group merge does not react to ERANGE during merge + [33601] aarch64: Do not link conform tests with -Wl,-z,force-bti + Version 2.40 Major new features: diff --git a/advisories/GLIBC-SA-2023-0001 b/advisories/GLIBC-SA-2023-0001 deleted file mode 100644 index 3d19c91b6a..0000000000 --- a/advisories/GLIBC-SA-2023-0001 +++ /dev/null @@ -1,14 +0,0 @@ -printf: incorrect output for integers with thousands separator and width field - -When the printf family of functions is called with a format specifier -that uses an (enable grouping) and a minimum width -specifier, the resulting output could be larger than reasonably expected -by a caller that computed a tight bound on the buffer size. The -resulting larger than expected output could result in a buffer overflow -in the printf family of functions. - -CVE-Id: CVE-2023-25139 -Public-Date: 2023-02-02 -Vulnerable-Commit: e88b9f0e5cc50cab57a299dc7efe1a4eb385161d (2.37) -Fix-Commit: c980549cc6a1c03c23cc2fe3e7b0fe626a0364b0 (2.38) -Fix-Commit: 07b9521fc6369d000216b96562ff7c0ed32a16c4 (2.37-4) diff --git a/advisories/GLIBC-SA-2023-0002 b/advisories/GLIBC-SA-2023-0002 deleted file mode 100644 index 5122669a64..0000000000 --- a/advisories/GLIBC-SA-2023-0002 +++ /dev/null @@ -1,15 +0,0 @@ -getaddrinfo: Stack read overflow in no-aaaa mode - -If the system is configured in no-aaaa mode via /etc/resolv.conf, -getaddrinfo is called for the AF_UNSPEC address family, and a DNS -response is received over TCP that is larger than 2048 bytes, -getaddrinfo may potentially disclose stack contents via the returned -address data, or crash. - -CVE-Id: CVE-2023-4527 -Public-Date: 2023-09-12 -Vulnerable-Commit: f282cdbe7f436c75864e5640a409a10485e9abb2 (2.36) -Fix-Commit: bd77dd7e73e3530203be1c52c8a29d08270cb25d (2.39) -Fix-Commit: 4ea972b7edd7e36610e8cde18bf7a8149d7bac4f (2.36-113) -Fix-Commit: b7529346025a130fee483d42178b5c118da971bb (2.37-38) -Fix-Commit: b25508dd774b617f99419bdc3cf2ace4560cd2d6 (2.38-19) diff --git a/advisories/GLIBC-SA-2023-0003 b/advisories/GLIBC-SA-2023-0003 deleted file mode 100644 index d3aef80348..0000000000 --- a/advisories/GLIBC-SA-2023-0003 +++ /dev/null @@ -1,15 +0,0 @@ -getaddrinfo: Potential use-after-free - -When an NSS plugin only implements the _gethostbyname2_r and -_getcanonname_r callbacks, getaddrinfo could use memory that was freed -during buffer resizing, potentially causing a crash or read or write to -arbitrary memory. - -CVE-Id: CVE-2023-4806 -Public-Date: 2023-09-12 -Fix-Commit: 973fe93a5675c42798b2161c6f29c01b0e243994 (2.39) -Fix-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420) -Fix-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270) -Fix-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115) -Fix-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39) -Fix-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20) diff --git a/advisories/GLIBC-SA-2023-0004 b/advisories/GLIBC-SA-2023-0004 deleted file mode 100644 index 5286a7aa54..0000000000 --- a/advisories/GLIBC-SA-2023-0004 +++ /dev/null @@ -1,16 +0,0 @@ -tunables: local privilege escalation through buffer overflow - -If a tunable of the form NAME=NAME=VAL is passed in the environment of a -setuid program and NAME is valid, it may result in a buffer overflow, -which could be exploited to achieve escalated privileges. This flaw was -introduced in glibc 2.34. - -CVE-Id: CVE-2023-4911 -Public-Date: 2023-10-03 -Vulnerable-Commit: 2ed18c5b534d9e92fc006202a5af0df6b72e7aca (2.34) -Fix-Commit: 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa (2.39) -Fix-Commit: dcc367f148bc92e7f3778a125f7a416b093964d9 (2.34-423) -Fix-Commit: c84018a05aec80f5ee6f682db0da1130b0196aef (2.35-274) -Fix-Commit: 22955ad85186ee05834e47e665056148ca07699c (2.36-118) -Fix-Commit: b4e23c75aea756b4bddc4abcf27a1c6dca8b6bd3 (2.37-45) -Fix-Commit: 750a45a783906a19591fb8ff6b7841470f1f5701 (2.38-27) diff --git a/advisories/GLIBC-SA-2023-0005 b/advisories/GLIBC-SA-2023-0005 deleted file mode 100644 index cc4eb90b82..0000000000 --- a/advisories/GLIBC-SA-2023-0005 +++ /dev/null @@ -1,18 +0,0 @@ -getaddrinfo: DoS due to memory leak - -The fix for CVE-2023-4806 introduced a memory leak when an application -calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED -flags set. - -CVE-Id: CVE-2023-5156 -Public-Date: 2023-09-25 -Vulnerable-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420) -Vulnerable-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270) -Vulnerable-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115) -Vulnerable-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39) -Vulnerable-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20) -Fix-Commit: 8006457ab7e1cd556b919f477348a96fe88f2e49 (2.34-421) -Fix-Commit: 17092c0311f954e6f3c010f73ce3a78c24ac279a (2.35-272) -Fix-Commit: 856bac55f98dc840e7c27cfa82262b933385de90 (2.36-116) -Fix-Commit: 4473d1b87d04b25cdd0e0354814eeaa421328268 (2.37-42) -Fix-Commit: 5ee59ca371b99984232d7584fe2b1a758b4421d3 (2.38-24) diff --git a/advisories/GLIBC-SA-2024-0001 b/advisories/GLIBC-SA-2024-0001 deleted file mode 100644 index 28931c75ae..0000000000 --- a/advisories/GLIBC-SA-2024-0001 +++ /dev/null @@ -1,15 +0,0 @@ -syslog: Heap buffer overflow in __vsyslog_internal - -__vsyslog_internal did not handle a case where printing a SYSLOG_HEADER -containing a long program name failed to update the required buffer -size, leading to the allocation and overflow of a too-small buffer on -the heap. - -CVE-Id: CVE-2023-6246 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: 6bd0e4efcc78f3c0115e5ea9739a1642807450da (2.39) -Fix-Commit: 23514c72b780f3da097ecf33a793b7ba9c2070d2 (2.38-42) -Fix-Commit: 97a4292aa4a2642e251472b878d0ec4c46a0e59a (2.37-57) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: d1a83b6767f68b3cb5b4b4ea2617254acd040c82 (2.36-126) diff --git a/advisories/GLIBC-SA-2024-0002 b/advisories/GLIBC-SA-2024-0002 deleted file mode 100644 index 940bfcf2fc..0000000000 --- a/advisories/GLIBC-SA-2024-0002 +++ /dev/null @@ -1,15 +0,0 @@ -syslog: Heap buffer overflow in __vsyslog_internal - -__vsyslog_internal used the return value of snprintf/vsnprintf to -calculate buffer sizes for memory allocation. If these functions (for -any reason) failed and returned -1, the resulting buffer would be too -small to hold output. - -CVE-Id: CVE-2023-6779 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: 7e5a0c286da33159d47d0122007aac016f3e02cd (2.39) -Fix-Commit: d0338312aace5bbfef85e03055e1212dd0e49578 (2.38-43) -Fix-Commit: 67062eccd9a65d7fda9976a56aeaaf6c25a80214 (2.37-58) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: 2bc9d7c002bdac38b5c2a3f11b78e309d7765b83 (2.36-127) diff --git a/advisories/GLIBC-SA-2024-0003 b/advisories/GLIBC-SA-2024-0003 deleted file mode 100644 index b43a5150ab..0000000000 --- a/advisories/GLIBC-SA-2024-0003 +++ /dev/null @@ -1,13 +0,0 @@ -syslog: Integer overflow in __vsyslog_internal - -__vsyslog_internal calculated a buffer size by adding two integers, but -did not first check if the addition would overflow. - -CVE-Id: CVE-2023-6780 -Public-Date: 2024-01-30 -Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) -Fix-Commit: ddf542da94caf97ff43cc2875c88749880b7259b (2.39) -Fix-Commit: d37c2b20a4787463d192b32041c3406c2bd91de0 (2.38-44) -Fix-Commit: 2b58cba076e912961ceaa5fa58588e4b10f791c0 (2.37-59) -Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) -Fix-Commit: b9b7d6a27aa0632f334352fa400771115b3c69b7 (2.36-128) diff --git a/advisories/GLIBC-SA-2024-0004 b/advisories/GLIBC-SA-2024-0004 deleted file mode 100644 index 08df2b3118..0000000000 --- a/advisories/GLIBC-SA-2024-0004 +++ /dev/null @@ -1,28 +0,0 @@ -ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence - -The iconv() function in the GNU C Library versions 2.39 and older may -overflow the output buffer passed to it by up to 4 bytes when converting -strings to the ISO-2022-CN-EXT character set, which may be used to -crash an application or overwrite a neighbouring variable. - -ISO-2022-CN-EXT uses escape sequences to indicate character set changes -(as specified by RFC 1922). While the SOdesignation has the expected -bounds checks, neither SS2designation nor SS3designation have its; -allowing a write overflow of 1, 2, or 3 bytes with fixed values: -'$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'. - -CVE-Id: CVE-2024-2961 -Public-Date: 2024-04-17 -Vulnerable-Commit: 755104edc75c53f4a0e7440334e944ad3c6b32fc (2.1.93-169) -Fix-Commit: f9dc609e06b1136bb0408be9605ce7973a767ada (2.40) -Fix-Commit: 31da30f23cddd36db29d5b6a1c7619361b271fb4 (2.39-31) -Fix-Commit: e1135387deded5d73924f6ca20c72a35dc8e1bda (2.38-66) -Fix-Commit: 89ce64b269a897a7780e4c73a7412016381c6ecf (2.37-89) -Fix-Commit: 4ed98540a7fd19f458287e783ae59c41e64df7b5 (2.36-164) -Fix-Commit: 36280d1ce5e245aabefb877fe4d3c6cff95dabfa (2.35-315) -Fix-Commit: a8b0561db4b9847ebfbfec20075697d5492a363c (2.34-459) -Fix-Commit: ed4f16ff6bed3037266f1fa682ebd32a18fce29c (2.33-263) -Fix-Commit: 682ad4c8623e611a971839990ceef00346289cc9 (2.32-140) -Fix-Commit: 3703c32a8d304c1ee12126134ce69be965f38000 (2.31-154) - -Reported-By: Charles Fol diff --git a/advisories/GLIBC-SA-2024-0005 b/advisories/GLIBC-SA-2024-0005 deleted file mode 100644 index a59596610a..0000000000 --- a/advisories/GLIBC-SA-2024-0005 +++ /dev/null @@ -1,22 +0,0 @@ -nscd: Stack-based buffer overflow in netgroup cache - -If the Name Service Cache Daemon's (nscd) fixed size cache is exhausted -by client requests then a subsequent client request for netgroup data -may result in a stack-based buffer overflow. This flaw was introduced -in glibc 2.15 when the cache was added to nscd. - -This vulnerability is only present in the nscd binary. - -CVE-Id: CVE-2024-33599 -Public-Date: 2024-04-23 -Vulnerable-Commit: 684ae515993269277448150a1ca70db3b94aa5bd (2.15) -Fix-Commit: 69c58d5ef9f584ea198bd00f7964d364d0e6b921 (2.31-155) -Fix-Commit: a77064893bfe8a701770e2f53a4d33805bc47a5a (2.32-141) -Fix-Commit: 5c75001a96abcd50cbdb74df24c3f013188d076e (2.33-264) -Fix-Commit: 52f73e5c4e29b14e79167272297977f360ae1e97 (2.34-460) -Fix-Commit: 7a95873543ce225376faf13bb71c43dea6d24f86 (2.35-316) -Fix-Commit: caa3151ca460bdd9330adeedd68c3112d97bffe4 (2.36-165) -Fix-Commit: f75c298e747b2b8b41b1c2f551c011a52c41bfd1 (2.37-91) -Fix-Commit: 5968aebb86164034b8f8421b4abab2f837a5bdaf (2.38-72) -Fix-Commit: 1263d583d2e28afb8be53f8d6922f0842036f35d (2.39-35) -Fix-Commit: 87801a8fd06db1d654eea3e4f7626ff476a9bdaa (2.40) diff --git a/advisories/GLIBC-SA-2024-0006 b/advisories/GLIBC-SA-2024-0006 deleted file mode 100644 index d44148d3d9..0000000000 --- a/advisories/GLIBC-SA-2024-0006 +++ /dev/null @@ -1,32 +0,0 @@ -nscd: Null pointer crash after notfound response - -If the Name Service Cache Daemon's (nscd) cache fails to add a not-found -netgroup response to the cache, the client request can result in a null -pointer dereference. This flaw was introduced in glibc 2.15 when the -cache was added to nscd. - -This vulnerability is only present in the nscd binary. - -CVE-Id: CVE-2024-33600 -Public-Date: 2024-04-24 -Vulnerable-Commit: 684ae515993269277448150a1ca70db3b94aa5bd (2.15) -Fix-Commit: b048a482f088e53144d26a61c390bed0210f49f2 (2.40) -Fix-Commit: 7835b00dbce53c3c87bbbb1754a95fb5e58187aa (2.40) -Fix-Commit: c99f886de54446cd4447db6b44be93dabbdc2f8b (2.39-37) -Fix-Commit: 5a508e0b508c8ad53bd0d2fb48fd71b242626341 (2.39-36) -Fix-Commit: 2ae9446c1b7a3064743b4a51c0bbae668ee43e4c (2.38-74) -Fix-Commit: 541ea5172aa658c4bd5c6c6d6fd13903c3d5bb0a (2.38-73) -Fix-Commit: a8070b31043c7585c36ba68a74298c4f7af075c3 (2.37-93) -Fix-Commit: 5eea50c4402e39588de98aa1d4469a79774703d4 (2.37-92) -Fix-Commit: f205b3af56740e3b014915b1bd3b162afe3407ef (2.36-167) -Fix-Commit: c34f470a615b136170abd16142da5dd0c024f7d1 (2.36-166) -Fix-Commit: bafadc589fbe21ae330e8c2af74db9da44a17660 (2.35-318) -Fix-Commit: 4370bef52b0f3f3652c6aa13d7a9bb3ac079746d (2.35-317) -Fix-Commit: 1f94122289a9bf7dba573f5d60327aaa2b85cf2e (2.34-462) -Fix-Commit: 966d6ac9e40222b84bb21674cc4f83c8d72a5a26 (2.34-461) -Fix-Commit: e3eef1b8fbdd3a7917af466ca9c4b7477251ca79 (2.33-266) -Fix-Commit: f20a8d696b13c6261b52a6434899121f8b19d5a7 (2.33-265) -Fix-Commit: be602180146de37582a3da3a0caa4b719645de9c (2.32-143) -Fix-Commit: 394eae338199078b7961b051c191539870742d7b (2.32-142) -Fix-Commit: 8d7949183760170c61e55def723c1d8050187874 (2.31-157) -Fix-Commit: 304ce5fe466c4762b21b36c26926a4657b59b53e (2.31-156) diff --git a/advisories/GLIBC-SA-2024-0007 b/advisories/GLIBC-SA-2024-0007 deleted file mode 100644 index b6928fa27a..0000000000 --- a/advisories/GLIBC-SA-2024-0007 +++ /dev/null @@ -1,28 +0,0 @@ -nscd: netgroup cache may terminate daemon on memory allocation failure - -The Name Service Cache Daemon's (nscd) netgroup cache uses xmalloc or -xrealloc and these functions may terminate the process due to a memory -allocation failure resulting in a denial of service to the clients. The -flaw was introduced in glibc 2.15 when the cache was added to nscd. - -This vulnerability is only present in the nscd binary. - -Subsequent refactoring of the netgroup cache only added more uses of -xmalloc and xrealloc. Uses of xmalloc and xrealloc in other parts of -nscd only occur during startup of the daemon and so are not affected by -client requests that could trigger an out of memory followed by -termination. - -CVE-Id: CVE-2024-33601 -Public-Date: 2024-04-24 -Vulnerable-Commit: 684ae515993269277448150a1ca70db3b94aa5bd (2.15) -Fix-Commit: c04a21e050d64a1193a6daab872bca2528bda44b (2.40) -Fix-Commit: a9a8d3eebb145779a18d90e3966009a1daa63cd8 (2.39-38) -Fix-Commit: 71af8ca864345d39b746d5cee84b94b430fad5db (2.38-75) -Fix-Commit: 6e106dc214d6a033a4e945d1c6cf58061f1c5f1f (2.37-94) -Fix-Commit: b6742463694b1dfdd5120b91ee21cf05d15ec2e2 (2.36-168) -Fix-Commit: 7a5864cac60e06000394128a5a2817b03542f5a3 (2.35-319) -Fix-Commit: 86f1d5f4129c373ac6fb6df5bcf38273838843cb (2.34-463) -Fix-Commit: 4d27d4b9a188786fc6a56745506cec2acfc51f83 (2.33-267) -Fix-Commit: 3ed195a8ec89da281e3c4bf887a13d281b72d8f4 (2.32-144) -Fix-Commit: bbf5a58ccb55679217f94de706164d15372fbbc0 (2.31-158) diff --git a/advisories/GLIBC-SA-2024-0008 b/advisories/GLIBC-SA-2024-0008 deleted file mode 100644 index d93e2a6f0b..0000000000 --- a/advisories/GLIBC-SA-2024-0008 +++ /dev/null @@ -1,26 +0,0 @@ -nscd: netgroup cache assumes NSS callback uses in-buffer strings - -The Name Service Cache Daemon's (nscd) netgroup cache can corrupt memory -when the NSS callback does not store all strings in the provided buffer. -The flaw was introduced in glibc 2.15 when the cache was added to nscd. - -This vulnerability is only present in the nscd binary. - -There is no guarantee from the NSS callback API that the returned -strings are all within the buffer. However, the netgroup cache code -assumes that the NSS callback uses in-buffer strings and if it doesn't -the buffer resizing logic could lead to potential memory corruption. - -CVE-Id: CVE-2024-33602 -Public-Date: 2024-04-24 -Vulnerable-Commit: 684ae515993269277448150a1ca70db3b94aa5bd (2.15) -Fix-Commit: c04a21e050d64a1193a6daab872bca2528bda44b (2.40) -Fix-Commit: a9a8d3eebb145779a18d90e3966009a1daa63cd8 (2.39-38) -Fix-Commit: 71af8ca864345d39b746d5cee84b94b430fad5db (2.38-75) -Fix-Commit: 6e106dc214d6a033a4e945d1c6cf58061f1c5f1f (2.37-94) -Fix-Commit: b6742463694b1dfdd5120b91ee21cf05d15ec2e2 (2.36-168) -Fix-Commit: 7a5864cac60e06000394128a5a2817b03542f5a3 (2.35-319) -Fix-Commit: 86f1d5f4129c373ac6fb6df5bcf38273838843cb (2.34-463) -Fix-Commit: 4d27d4b9a188786fc6a56745506cec2acfc51f83 (2.33-267) -Fix-Commit: 3ed195a8ec89da281e3c4bf887a13d281b72d8f4 (2.32-144) -Fix-Commit: bbf5a58ccb55679217f94de706164d15372fbbc0 (2.31-158) diff --git a/advisories/README b/advisories/README deleted file mode 100644 index b8f8a829ca..0000000000 --- a/advisories/README +++ /dev/null @@ -1,77 +0,0 @@ -GNU C Library Security Advisory Format -====================================== - -Security advisories in this directory follow a simple git commit log -format, with a heading and free-format description augmented with tags -to allow parsing key information. References to code changes are -specific to the glibc repository and follow a specific format: - - Tag-name: (release-version) - -The indicates a specific commit in the repository. The -release-version indicates the publicly consumable release in which this -commit is known to exist. The release-version is derived from the -git-describe format, (i.e. stripped out from glibc-2.34.NNN-gxxxx) and -is of the form 2.34-NNN. If the -NNN suffix is absent, it means that -the change is in that release tarball, otherwise the change is on the -release/2.YY/master branch and not in any released tarball. - -The following tags are currently being used: - -CVE-Id: -This is the CVE-Id assigned under the CVE Program -(https://www.cve.org/). - -Public-Date: -The date this issue became publicly known. - -Vulnerable-Commit: -The commit that introduced this vulnerability. There could be multiple -entries, one for each release branch in the glibc repository; the -release-version portion of this tag should tell you which branch this is -on. - -Fix-Commit: -The commit that fixed this vulnerability. There could be multiple -entries for each release branch in the glibc repository, indicating that -all of those commits contributed to fixing that issue in each of those -branches. - -Reported-By: -The entity that reported this issue. There could be multiple entries, one for -each reporter. - -Adding an Advisory ------------------- - -An advisory for a CVE needs to be added on the master branch in two steps: - -1. Add the text of the advisory without any Fix-Commit tags along with - the fix for the CVE. Add the Vulnerable-Commit tag, if applicable. - The advisories directory does not exist in release branches, so keep - the advisory text commit distinct from the code changes, to ease - backports. Ask for the GLIBC-SA advisory number from the security - team. - -2. Finish all backports on release branches and then back on the msater - branch, add all commit refs to the advisory using the Fix-Commit - tags. Don't bother adding the release-version subscript since the - next step will overwrite it. - -3. Run the process-advisories.sh script in the scripts directory on the - advisory: - - scripts/process-advisories.sh update GLIBC-SA-YYYY-NNNN - - (replace YYYY-NNNN with the actual advisory number). - -4. Verify the updated advisory and push the result. - -Getting a NEWS snippet from advisories --------------------------------------- - -Run: - - scripts/process-advisories.sh news - -and copy the content into the NEWS file. diff --git a/assert/Makefile b/assert/Makefile index 35dc908ddb..c0fe660bd6 100644 --- a/assert/Makefile +++ b/assert/Makefile @@ -38,6 +38,7 @@ tests := \ test-assert-perr \ tst-assert-c++ \ tst-assert-g++ \ + tst-assert-sa-2025-0001 \ # tests ifeq ($(have-cxx-thread_local),yes) diff --git a/assert/assert.c b/assert/assert.c index c29629f5f6..b6e37d694c 100644 --- a/assert/assert.c +++ b/assert/assert.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,8 @@ __assert_fail_base (const char *fmt, const char *assertion, const char *file, (void) __fxprintf (NULL, "%s", str); (void) fflush (stderr); - total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1); + total = ALIGN_UP (total + sizeof (struct abort_msg_s) + 1, + GLRO(dl_pagesize)); struct abort_msg_s *buf = __mmap (NULL, total, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); if (__glibc_likely (buf != MAP_FAILED)) diff --git a/assert/tst-assert-sa-2025-0001.c b/assert/tst-assert-sa-2025-0001.c new file mode 100644 index 0000000000..102cb0078d --- /dev/null +++ b/assert/tst-assert-sa-2025-0001.c @@ -0,0 +1,92 @@ +/* Test for CVE-2025-0395. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Test that a large enough __progname does not result in a buffer overflow + when printing an assertion failure. This was CVE-2025-0395. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern const char *__progname; + +int +do_test (int argc, char **argv) +{ + + support_need_proc ("Reads /proc/self/maps to add guards to writable maps."); + ignore_stderr (); + + /* XXX assumes that the assert is on a 2 digit line number. */ + const char *prompt = ": %s:99: do_test: Assertion `argc < 1' failed.\n"; + + int ret = fprintf (stderr, prompt, __FILE__); + if (ret < 0) + FAIL_EXIT1 ("fprintf failed: %m\n"); + + size_t pagesize = getpagesize (); + size_t namesize = pagesize - 1 - ret; + + /* Alter the progname so that the assert message fills the entire page. */ + char progname[namesize]; + memset (progname, 'A', namesize - 1); + progname[namesize - 1] = '\0'; + __progname = progname; + + FILE *f = xfopen ("/proc/self/maps", "r"); + char *line = NULL; + size_t len = 0; + uintptr_t prev_to = 0; + + /* Pad the beginning of every writable mapping with a PROT_NONE map. This + ensures that the mmap in the assert_fail path never ends up below a + writable map and will terminate immediately in case of a buffer + overflow. */ + while (xgetline (&line, &len, f)) + { + uintptr_t from, to; + char perm[4]; + + sscanf (line, "%" SCNxPTR "-%" SCNxPTR " %c%c%c%c ", + &from, &to, + &perm[0], &perm[1], &perm[2], &perm[3]); + + bool writable = (memchr (perm, 'w', 4) != NULL); + + if (prev_to != 0 && from - prev_to > pagesize && writable) + xmmap ((void *) from - pagesize, pagesize, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + + prev_to = to; + } + + xfclose (f); + + assert (argc < 1); + return 0; +} + +#define EXPECTED_SIGNAL SIGABRT +#define TEST_FUNCTION_ARGV do_test +#include diff --git a/benchtests/atanh-inputs b/benchtests/atanh-inputs index 455aa65b65..4985293254 100644 --- a/benchtests/atanh-inputs +++ b/benchtests/atanh-inputs @@ -1,6 +1,7 @@ ## args: double ## ret: double ## includes: math.h +## name: workload-random 0x1.5a2730bacd94ap-1 -0x1.b57eb40fc048ep-21 -0x1.c0b185fb450e2p-17 diff --git a/benchtests/sinh-inputs b/benchtests/sinh-inputs index 7b1ac46a39..2fcb2fabf8 100644 --- a/benchtests/sinh-inputs +++ b/benchtests/sinh-inputs @@ -1,6 +1,7 @@ ## args: double ## ret: double ## includes: math.h +## name: workload-random 0x1.bcb6129b5ff2bp8 -0x1.63057386325ebp9 0x1.62f1d7dc4e8bfp9 diff --git a/configure b/configure index 1d543548cd..77e1601250 100755 --- a/configure +++ b/configure @@ -4901,6 +4901,9 @@ with_fp_cond=1 # A preconfigure script may define another name to TLS descriptor variant mtls_descriptor=gnu2 +# A preconfigure script may define another name to traditional TLS variant +mtls_traditional=gnu + if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null` then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5 @@ -7223,6 +7226,39 @@ printf "%s\n" "$libc_cv_mtls_descriptor" >&6; } config_vars="$config_vars have-mtls-descriptor = $libc_cv_mtls_descriptor" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for traditional tls support" >&5 +printf %s "checking for traditional tls support... " >&6; } +if test ${libc_cv_test_mtls_traditional+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat > conftest.c <&5 + (eval $ac_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } +then + libc_cv_test_mtls_traditional=$mtls_traditional +else + libc_cv_test_mtls_traditional=no +fi +rm -f conftest* ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_test_mtls_traditional" >&5 +printf "%s\n" "$libc_cv_test_mtls_traditional" >&6; } +config_vars="$config_vars +have-test-mtls-traditional = $libc_cv_test_mtls_traditional" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5 printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; } if test ${libc_cv_wno_ignored_attributes+y} diff --git a/configure.ac b/configure.ac index 9cbc0bf68f..eb4f26d592 100644 --- a/configure.ac +++ b/configure.ac @@ -469,6 +469,9 @@ with_fp_cond=1 # A preconfigure script may define another name to TLS descriptor variant mtls_descriptor=gnu2 +# A preconfigure script may define another name to traditional TLS variant +mtls_traditional=gnu + dnl Let sysdeps/*/preconfigure act here. LIBC_PRECONFIGURE([$srcdir], [for sysdeps]) @@ -1334,6 +1337,28 @@ rm -f conftest*]) AC_SUBST(libc_cv_mtls_descriptor) LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor]) +dnl Check if CC supports traditional tls. +AC_CACHE_CHECK([for traditional tls support], + libc_cv_test_mtls_traditional, +[dnl +cat > conftest.c <&AS_MESSAGE_LOG_FD]) +then + libc_cv_test_mtls_traditional=$mtls_traditional +else + libc_cv_test_mtls_traditional=no +fi +rm -f conftest*]) +LIBC_CONFIG_VAR([have-test-mtls-traditional], + [$libc_cv_test_mtls_traditional]) + dnl clang emits an warning for a double alias redirection, to warn the dnl original symbol is sed even when weak definition overrides it. dnl It is a usual pattern for weak_alias, where multiple alias point to diff --git a/ctype/Makefile b/ctype/Makefile index 3e09938bd1..b7cd5f2282 100644 --- a/ctype/Makefile +++ b/ctype/Makefile @@ -36,6 +36,23 @@ aux := ctype-info tests := \ test_ctype \ + tst-ctype-tls-dlmopen \ + tst-ctype-tls-dlopen-static \ # tests +tests-static := \ + tst-ctype-tls-dlopen-static \ + # tests-static + +modules-names := \ + tst-ctype-tls-mod \ + # modules-names + include ../Rules + +$(objpfx)tst-ctype-tls-dlmopen: $(shared-thread-library) +$(objpfx)tst-ctype-tls-dlmopen.out: $(objpfx)tst-ctype-tls-mod.so +$(objpfx)tst-ctype-tls-dlopen-static: $(static-thread-library) +$(objpfx)tst-ctype-tls-dlopen-static.out: $(objpfx)tst-ctype-tls-mod.so +tst-ctype-tls-dlopen-static-ENV = \ + LD_LIBRARY_PATH=$(ld-library-path):$(common-objpfx):$(common-objpfx)elf diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c index 9032547567..b6cdf7eb66 100644 --- a/ctype/ctype-info.c +++ b/ctype/ctype-info.c @@ -19,20 +19,28 @@ #include #include -__libc_tsd_define (, const uint16_t *, CTYPE_B) -__libc_tsd_define (, const int32_t *, CTYPE_TOLOWER) -__libc_tsd_define (, const int32_t *, CTYPE_TOUPPER) +/* Fallback initialization using relocations. See the _nl_C_locobj + initializers in locale/xlocale.c. Usually, this is overwritten by + __ctype_init before user code runs, but this does not happen for + threads in secondary namespaces. With the initializers, secondary + namespaces at least get locale data from the C locale. */ +__thread const uint16_t * __libc_tsd_CTYPE_B attribute_tls_model_ie + = (const uint16_t *) _nl_C_LC_CTYPE_class + 128; +__thread const int32_t * __libc_tsd_CTYPE_TOLOWER attribute_tls_model_ie + = (const int32_t *) _nl_C_LC_CTYPE_tolower + 128; +__thread const int32_t * __libc_tsd_CTYPE_TOUPPER attribute_tls_model_ie + = (const int32_t *) _nl_C_LC_CTYPE_toupper + 128; void __ctype_init (void) { - const uint16_t **bp = __libc_tsd_address (const uint16_t *, CTYPE_B); - *bp = (const uint16_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS) + 128; - const int32_t **up = __libc_tsd_address (const int32_t *, CTYPE_TOUPPER); - *up = ((int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOUPPER) + 128); - const int32_t **lp = __libc_tsd_address (const int32_t *, CTYPE_TOLOWER); - *lp = ((int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOLOWER) + 128); + __libc_tsd_CTYPE_B + = ((const uint16_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS)) + 128; + __libc_tsd_CTYPE_TOUPPER + = ((const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOUPPER)) + 128; + __libc_tsd_CTYPE_TOLOWER = + ((const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOLOWER)) + 128; } libc_hidden_def (__ctype_init) @@ -41,10 +49,7 @@ libc_hidden_def (__ctype_init) #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) /* Defined in locale/C-ctype.c. */ -extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; extern const char _nl_C_LC_CTYPE_class32[] attribute_hidden; -extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden; -extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden; extern const char _nl_C_LC_CTYPE_class_upper[] attribute_hidden; extern const char _nl_C_LC_CTYPE_class_lower[] attribute_hidden; extern const char _nl_C_LC_CTYPE_class_alpha[] attribute_hidden; diff --git a/ctype/tst-ctype-tls-dlmopen.c b/ctype/tst-ctype-tls-dlmopen.c new file mode 100644 index 0000000000..f7eeb65551 --- /dev/null +++ b/ctype/tst-ctype-tls-dlmopen.c @@ -0,0 +1,2 @@ +#define DO_STATIC_TEST 0 +#include "tst-ctype-tls-skeleton.c" diff --git a/ctype/tst-ctype-tls-dlopen-static.c b/ctype/tst-ctype-tls-dlopen-static.c new file mode 100644 index 0000000000..c2c09c362c --- /dev/null +++ b/ctype/tst-ctype-tls-dlopen-static.c @@ -0,0 +1,2 @@ +#define DO_STATIC_TEST 1 +#include "tst-ctype-tls-skeleton.c" diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S b/ctype/tst-ctype-tls-mod.c similarity index 66% rename from sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S rename to ctype/tst-ctype-tls-mod.c index 7d35ef28a9..52cbb9dcb6 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S +++ b/ctype/tst-ctype-tls-mod.c @@ -1,5 +1,5 @@ -/* Optimized memchr implementation for POWER10/PPC64. - Copyright (C) 2016-2024 Free Software Foundation, Inc. +/* Wrappers for macros in a secondary namespace. + Copyright (C) 2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,13 +16,22 @@ License along with the GNU C Library; if not, see . */ -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define MEMCHR __memchr_power10 +#include -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) -#undef weak_alias -#define weak_alias(name,alias) +int +my_isalpha (int ch) +{ + return isalpha (ch); +} -#include -#endif +int +my_toupper (int ch) +{ + return toupper (ch); +} + +int +my_tolower (int ch) +{ + return tolower (ch); +} diff --git a/ctype/tst-ctype-tls-skeleton.c b/ctype/tst-ctype-tls-skeleton.c new file mode 100644 index 0000000000..8c53e35899 --- /dev/null +++ b/ctype/tst-ctype-tls-skeleton.c @@ -0,0 +1,67 @@ +/* Test that in a secondary namespace works. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Before this file is included, define DO_STATIC_TEST to 0 or 1. + With 0, dlmopen is used for the test. With 1, dlopen is used. */ + +#include +#include +#include +#include +#include +#include + +static int (*my_isalpha) (int); +static int (*my_toupper) (int); +static int (*my_tolower) (int); + +static void * +checks (void *ignore) +{ + TEST_VERIFY (my_isalpha ('a')); + TEST_VERIFY (!my_isalpha ('0')); + TEST_COMPARE (my_toupper ('a'), 'A'); + TEST_COMPARE (my_toupper ('A'), 'A'); + TEST_COMPARE (my_tolower ('a'), 'a'); + TEST_COMPARE (my_tolower ('A'), 'a'); + return NULL; +} + +static int +do_test (void) +{ + char *dso = xasprintf ("%s/ctype/tst-ctype-tls-mod.so", support_objdir_root); +#if DO_STATIC_TEST + void *handle = xdlopen (dso, RTLD_LAZY); +#else + void *handle = xdlmopen (LM_ID_NEWLM, dso, RTLD_LAZY); +#endif + my_isalpha = xdlsym (handle, "my_isalpha"); + my_toupper = xdlsym (handle, "my_toupper"); + my_tolower = xdlsym (handle, "my_tolower"); + + checks (NULL); + xpthread_join (xpthread_create (NULL, checks, NULL)); + + xdlclose (handle); + free (dso); + + return 0; +} + +#include diff --git a/debug/Makefile b/debug/Makefile index 3903cc97a3..76c311d284 100644 --- a/debug/Makefile +++ b/debug/Makefile @@ -287,6 +287,7 @@ tests = \ tst-fortify-wide \ tst-longjmp_chk \ tst-longjmp_chk2 \ + tst-longjmp_chk3 \ tst-realpath-chk \ tst-sprintf-fortify-rdonly \ tst-sprintf-fortify-unchecked \ diff --git a/debug/pcprofiledump.c b/debug/pcprofiledump.c index 049a9c2744..94530f0cf9 100644 --- a/debug/pcprofiledump.c +++ b/debug/pcprofiledump.c @@ -75,6 +75,44 @@ static struct argp argp = options, parse_opt, args_doc, doc, NULL, more_help }; +/* Try to read SIZE bytes from FD and store them on BUF. Terminate + the process upon read error. Also terminate the process if less + than SIZE bytes are remaining in the file. If !IN_HEADER, do not + terminate the process if the end of the file is encountered + immediately, before any bytes are read. + + Returns true if SIZE bytes have been read, and false if no bytes + have been read due to an end-of-file condition. */ +static bool +read_exactly (int fd, void *buffer, size_t size, bool in_header) +{ + char *p = buffer; + char *end = p + size; + while (p < end) + { + ssize_t ret = TEMP_FAILURE_RETRY (read (fd, p, end - p)); + if (ret < 0) + { + if (in_header) + error (EXIT_FAILURE, errno, _("cannot read header")); + else + error (EXIT_FAILURE, errno, _("cannot read pointer pair")); + } + if (ret == 0) + { + if (p == buffer && !in_header) + /* Nothing has been read. */ + return false; + if (in_header) + error (EXIT_FAILURE, 0, _("unexpected end of file in header")); + else + error (EXIT_FAILURE, 0, + _("unexpected end of file in pointer pair")); + } + p += ret; + } + return true; +} int main (int argc, char *argv[]) @@ -110,8 +148,7 @@ main (int argc, char *argv[]) /* Read the first 4-byte word. It contains the information about the word size and the endianness. */ uint32_t word; - if (TEMP_FAILURE_RETRY (read (fd, &word, 4)) != 4) - error (EXIT_FAILURE, errno, _("cannot read header")); + read_exactly (fd, &word, sizeof (word), true); /* Check whether we have to swap the byte order. */ int must_swap = (word & 0x0fffffff) == bswap_32 (0xdeb00000); @@ -121,56 +158,30 @@ main (int argc, char *argv[]) /* We have two loops, one for 32 bit pointers, one for 64 bit pointers. */ if (word == 0xdeb00004) { - union - { - uint32_t ptrs[2]; - char bytes[8]; - } pair; + uint32_t ptrs[2]; while (1) { - size_t len = sizeof (pair); - size_t n; - - while (len > 0 - && (n = TEMP_FAILURE_RETRY (read (fd, &pair.bytes[8 - len], - len))) != 0) - len -= n; - - if (len != 0) - /* Nothing to read. */ + if (!read_exactly (fd, ptrs, sizeof (ptrs), false)) break; printf ("this = %#010" PRIx32 ", caller = %#010" PRIx32 "\n", - must_swap ? bswap_32 (pair.ptrs[0]) : pair.ptrs[0], - must_swap ? bswap_32 (pair.ptrs[1]) : pair.ptrs[1]); + must_swap ? bswap_32 (ptrs[0]) : ptrs[0], + must_swap ? bswap_32 (ptrs[1]) : ptrs[1]); } } else if (word == 0xdeb00008) { - union - { - uint64_t ptrs[2]; - char bytes[16]; - } pair; + uint64_t ptrs[2]; while (1) { - size_t len = sizeof (pair); - size_t n; - - while (len > 0 - && (n = TEMP_FAILURE_RETRY (read (fd, &pair.bytes[8 - len], - len))) != 0) - len -= n; - - if (len != 0) - /* Nothing to read. */ + if (!read_exactly (fd, ptrs, sizeof (ptrs), false)) break; printf ("this = %#018" PRIx64 ", caller = %#018" PRIx64 "\n", - must_swap ? bswap_64 (pair.ptrs[0]) : pair.ptrs[0], - must_swap ? bswap_64 (pair.ptrs[1]) : pair.ptrs[1]); + must_swap ? bswap_64 (ptrs[0]) : ptrs[0], + must_swap ? bswap_64 (ptrs[1]) : ptrs[1]); } } else diff --git a/debug/tst-fortify-syslog.c b/debug/tst-fortify-syslog.c index a7ddbf7c6b..2712acf689 100644 --- a/debug/tst-fortify-syslog.c +++ b/debug/tst-fortify-syslog.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -46,18 +45,13 @@ handler (int sig) _exit (127); } -#define FAIL() \ - do { \ - printf ("Failure on line %d\n", __LINE__); \ - support_record_failure (); \ - } while (0) #define CHK_FAIL_START \ chk_fail_ok = 1; \ if (! setjmp (chk_fail_buf)) \ { #define CHK_FAIL_END \ chk_fail_ok = 0; \ - FAIL (); \ + FAIL ("not supposed to reach here"); \ } static void diff --git a/debug/tst-longjmp_chk3.c b/debug/tst-longjmp_chk3.c index 9ff9977207..9b9db3b9e9 100644 --- a/debug/tst-longjmp_chk3.c +++ b/debug/tst-longjmp_chk3.c @@ -18,9 +18,13 @@ #include #include +#include #include +#include -static char buf[SIGSTKSZ * 4]; +#include + +static char *buf; static jmp_buf jb; static void @@ -49,8 +53,10 @@ do_test (void) set_fortify_handler (handler); /* Create a valid signal stack and enable it. */ + size_t bufsize = SIGSTKSZ * 4; + buf = xmalloc (bufsize); ss.ss_sp = buf; - ss.ss_size = sizeof (buf); + ss.ss_size = bufsize; ss.ss_flags = 0; if (sigaltstack (&ss, NULL) < 0) { @@ -65,8 +71,8 @@ do_test (void) /* Shrink the signal stack so the jmpbuf is now invalid. We adjust the start & end to handle stacks that grow up & down. */ - ss.ss_sp = buf + sizeof (buf) / 2; - ss.ss_size = sizeof (buf) / 4; + ss.ss_sp = buf + bufsize / 2; + ss.ss_size = bufsize / 4; if (sigaltstack (&ss, NULL) < 0) { printf ("second sigaltstack failed: %m\n"); diff --git a/elf/Makefile b/elf/Makefile index a3475f3fb5..0303e08557 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -34,7 +34,6 @@ routines = \ dl-addr \ dl-addr-obj \ dl-early_allocate \ - dl-find_object \ dl-iteratephdr \ dl-libc \ dl-origin \ @@ -58,9 +57,11 @@ dl-routines = \ dl-close \ dl-debug \ dl-debug-symbols \ + dl-debug_state \ dl-deps \ dl-exception \ dl-execstack \ + dl-find_object \ dl-fini \ dl-init \ dl-load \ @@ -266,6 +267,7 @@ tests-static-normal := \ tst-array1-static \ tst-array5-static \ tst-dl-iter-static \ + tst-dlopen-sgid \ tst-dst-static \ tst-env-setuid-static \ tst-getauxval-static \ @@ -378,6 +380,7 @@ tests += \ tst-align3 \ tst-audit-tlsdesc \ tst-audit-tlsdesc-dlopen \ + tst-audit-tlsdesc-dlopen2 \ tst-audit1 \ tst-audit2 \ tst-audit8 \ @@ -414,8 +417,16 @@ tests += \ tst-dlmopen1 \ tst-dlmopen3 \ tst-dlmopen4 \ + tst-dlmopen4-nonpic \ + tst-dlmopen4-pic \ + tst-dlopen-auditdup \ + tst-dlopen-constructor-null \ tst-dlopen-self \ tst-dlopen-tlsmodid \ + tst-dlopen-tlsreinit1 \ + tst-dlopen-tlsreinit2 \ + tst-dlopen-tlsreinit3 \ + tst-dlopen-tlsreinit4 \ tst-dlopenfail \ tst-dlopenfail-2 \ tst-dlopenrpath \ @@ -448,6 +459,9 @@ tests += \ tst-recursive-tls \ tst-relsort1 \ tst-ro-dynamic \ + tst-rtld-no-malloc \ + tst-rtld-no-malloc-audit \ + tst-rtld-no-malloc-preload \ tst-rtld-run-static \ tst-single_threaded \ tst-single_threaded-pthread \ @@ -472,6 +486,7 @@ tests += \ tst-tls19 \ tst-tls20 \ tst-tls21 \ + tst-tls23 \ tst-tlsalign \ tst-tlsalign-extern \ tst-tlsgap \ @@ -479,6 +494,7 @@ tests += \ tst-unique2 \ tst-unwind-ctor \ tst-unwind-main \ + tst-version-hash-zero \ unload3 \ unload4 \ unload5 \ @@ -509,6 +525,8 @@ tests-internal += \ tst-dl_find_object \ tst-dl_find_object-threads \ tst-dlmopen2 \ + tst-link-map-contiguous-ldso \ + tst-link-map-contiguous-libc \ tst-ptrguard1 \ tst-stackguard1 \ tst-tls-surplus \ @@ -520,6 +538,10 @@ tests-internal += \ unload2 \ # tests-internal +ifeq ($(build-hardcoded-path-in-tests),yes) +tests-internal += tst-link-map-contiguous-main +endif + tests-container += \ tst-dlopen-self-container \ tst-dlopen-tlsmodid-container \ @@ -813,6 +835,7 @@ modules-names += \ tst-auditmanymod8 \ tst-auditmanymod9 \ tst-auditmod-tlsdesc \ + tst-auditmod-tlsdesc2 \ tst-auditmod1 \ tst-auditmod11 \ tst-auditmod12 \ @@ -853,6 +876,14 @@ modules-names += \ tst-dlmopen-twice-mod1 \ tst-dlmopen-twice-mod2 \ tst-dlmopen1mod \ + tst-dlopen-auditdup-auditmod \ + tst-dlopen-auditdupmod \ + tst-dlopen-constructor-null-mod1 \ + tst-dlopen-constructor-null-mod2 \ + tst-dlopen-sgid-mod \ + tst-dlopen-tlsreinitmod1 \ + tst-dlopen-tlsreinitmod2 \ + tst-dlopen-tlsreinitmod3 \ tst-dlopenfaillinkmod \ tst-dlopenfailmod1 \ tst-dlopenfailmod2 \ @@ -934,6 +965,7 @@ modules-names += \ tst-tls19mod3 \ tst-tls20mod-bad \ tst-tls21mod \ + tst-tls23-mod \ tst-tlsalign-lib \ tst-tlsgap-mod0 \ tst-tlsgap-mod1 \ @@ -964,6 +996,9 @@ modules-names += \ tst-unique2mod1 \ tst-unique2mod2 \ tst-unwind-ctor-lib \ + tst-version-hash-zero-linkmod \ + tst-version-hash-zero-mod \ + tst-version-hash-zero-refmod \ unload2dep \ unload2mod \ unload3mod1 \ @@ -2050,6 +2085,13 @@ $(objpfx)tst-dlmopen3.out: $(objpfx)tst-dlmopen1mod.so $(objpfx)tst-dlmopen4.out: $(objpfx)tst-dlmopen1mod.so +CFLAGS-tst-dlmopen4-pic.c += -fPIC +$(objpfx)tst-dlmopen4-pic.out: $(objpfx)tst-dlmopen1mod.so + +CFLAGS-tst-dlmopen4-nonpic.c += -fno-pie +tst-dlmopen4-nonpic-no-pie = yes +$(objpfx)tst-dlmopen4-nonpic.out: $(objpfx)tst-dlmopen1mod.so + $(objpfx)tst-audit1.out: $(objpfx)tst-auditmod1.so tst-audit1-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so @@ -3033,6 +3075,9 @@ $(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so tst-audit-tlsdesc-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-auditmod-tlsdesc.so tst-audit-tlsdesc-dlopen-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so +$(objpfx)tst-audit-tlsdesc-dlopen2.out: $(objpfx)tst-auditmod-tlsdesc2.so \ + $(patsubst %, $(objpfx)%.so, $(tlsmod17a-modules)) +tst-audit-tlsdesc-dlopen2-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc2.so $(objpfx)tst-dlmopen-twice.out: \ $(objpfx)tst-dlmopen-twice-mod1.so \ @@ -3118,3 +3163,85 @@ $(objpfx)tst-recursive-tls.out: \ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) $(objpfx)tst-recursive-tlsmod%.os: tst-recursive-tlsmodN.c $(compile-command.c) -DVAR=thread_$* -DFUNC=get_threadvar_$* + +# Order matters here. The test needs the constructor for +# tst-dlopen-tlsreinitmod2.so to be called first. +LDFLAGS-tst-dlopen-tlsreinitmod1.so = -Wl,--no-as-needed +$(objpfx)tst-dlopen-tlsreinitmod1.so: \ + $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so +LDFLAGS-tst-dlopen-tlsreinit2 = -Wl,--no-as-needed +$(objpfx)tst-dlopen-tlsreinit2: \ + $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so +LDFLAGS-tst-dlopen-tlsreinit4 = -Wl,--no-as-needed +$(objpfx)tst-dlopen-tlsreinit4: \ + $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so +# tst-dlopen-tlsreinitmod2.so is underlinked and refers to +# tst-dlopen-tlsreinitmod3.so. The dependency is provided via +# $(objpfx)tst-dlopen-tlsreinitmod1.so. +tst-dlopen-tlsreinitmod2.so-no-z-defs = yes +$(objpfx)tst-dlopen-tlsreinit1.out: $(objpfx)tst-dlopen-tlsreinitmod1.so \ + $(objpfx)tst-dlopen-tlsreinitmod2.so $(objpfx)tst-dlopen-tlsreinitmod3.so +# Reuse an audit module which provides ample debug logging. +$(objpfx)tst-dlopen-tlsreinit3.out: $(objpfx)tst-auditmod1.so +tst-dlopen-tlsreinit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so +$(objpfx)tst-dlopen-tlsreinit4.out: $(objpfx)tst-auditmod1.so +tst-dlopen-tlsreinit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so + +$(objpfx)tst-dlopen-sgid.out: $(objpfx)tst-dlopen-sgid-mod.so + +tst-dlopen-auditdup-ENV = LD_AUDIT=$(objpfx)tst-dlopen-auditdup-auditmod.so +$(objpfx)tst-dlopen-auditdup.out: \ + $(objpfx)tst-dlopen-auditdupmod.so $(objpfx)tst-dlopen-auditdup-auditmod.so + +# Reuse an audit module which provides ample debug logging. +tst-rtld-no-malloc-audit-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so + +# Any shared object should do. +tst-rtld-no-malloc-preload-ENV = LD_PRELOAD=$(objpfx)tst-auditmod1.so + +$(objpfx)tst-tls23: $(shared-thread-library) +$(objpfx)tst-tls23.out: $(objpfx)tst-tls23-mod.so + +ifneq (no,$(have-test-mtls-traditional)) +CFLAGS-tst-tls23-mod.c += -mtls-dialect=$(have-test-mtls-traditional) +endif + +$(objpfx)tst-version-hash-zero.out: \ + $(objpfx)tst-version-hash-zero-mod.so \ + $(objpfx)tst-version-hash-zero-refmod.so +LDFLAGS-tst-version-hash-zero-mod.so = \ + -Wl,--version-script=tst-version-hash-zero-mod.map +# The run-time test module tst-version-hash-zero-refmod.so is linked +# to a stub module, tst-version-hash-zero-linkmod.so, to produce an +# expected relocation error. +$(objpfx)tst-version-hash-zero-refmod.so: \ + $(objpfx)tst-version-hash-zero-linkmod.so +LDFLAGS-tst-version-hash-zero-linkmod.so = \ + -Wl,--version-script=tst-version-hash-zero-linkmod.map \ + -Wl,--soname=tst-version-hash-zero-mod.so +$(objpfx)tst-version-hash-zero-refmod.so: \ + $(objpfx)tst-version-hash-zero-linkmod.so +tst-version-hash-zero-refmod.so-no-z-defs = yes + +# These rules link and run the special elf/tst-nolink-libc-* tests if +# a port adds them to the tests variables. Neither test variant is +# linked against libc.so, but tst-nolink-libc-1 is linked against +# ld.so. The test is always run directly, not under the dynamic +# linker. +CFLAGS-tst-nolink-libc.c += $(no-stack-protector) +$(objpfx)tst-nolink-libc-1: $(objpfx)tst-nolink-libc.o $(objpfx)ld.so + $(LINK.o) -nostdlib -nostartfiles -o $@ $< \ + -Wl,--dynamic-linker=$(objpfx)ld.so,--no-as-needed $(objpfx)ld.so +$(objpfx)tst-nolink-libc-1.out: $(objpfx)tst-nolink-libc-1 $(objpfx)ld.so + $< > $@ 2>&1; $(evaluate-test) +$(objpfx)tst-nolink-libc-2: $(objpfx)tst-nolink-libc.o + $(LINK.o) -nostdlib -nostartfiles -o $@ $< \ + -Wl,--dynamic-linker=$(objpfx)ld.so +$(objpfx)tst-nolink-libc-2.out: $(objpfx)tst-nolink-libc-2 $(objpfx)ld.so + $< > $@ 2>&1; $(evaluate-test) + +$(objpfx)tst-dlopen-constructor-null: \ + $(objpfx)tst-dlopen-constructor-null-mod1.so \ + $(objpfx)tst-dlopen-constructor-null-mod2.so +$(objpfx)tst-dlopen-constructor-null-mod2.so: \ + $(objpfx)tst-dlopen-constructor-null-mod1.so diff --git a/elf/cache.c b/elf/cache.c index 8a618e11fa..62d681df42 100644 --- a/elf/cache.c +++ b/elf/cache.c @@ -820,7 +820,7 @@ struct aux_cache_entry struct aux_cache_entry *next; }; -#define AUX_CACHEMAGIC "glibc-ld.so.auxcache-1.0" +#define AUX_CACHEMAGIC "glibc-ld.so.auxcache-2.0" struct aux_cache_file_entry { diff --git a/elf/dl-close.c b/elf/dl-close.c index 88226245eb..fb27a1231c 100644 --- a/elf/dl-close.c +++ b/elf/dl-close.c @@ -264,6 +264,12 @@ _dl_close_worker (struct link_map *map, bool force) _dl_catch_exception (NULL, _dl_call_fini, imap); #ifdef SHARED + /* Auditing checkpoint: we will start deleting objects. + This is supposed to happen before la_objclose (see _dl_fini), + but only once per non-recursive dlclose call. */ + if (!unload_any) + _dl_audit_activity_nsid (nsid, LA_ACT_DELETE); + /* Auditing checkpoint: we remove an object. */ _dl_audit_objclose (imap); #endif @@ -424,15 +430,10 @@ _dl_close_worker (struct link_map *map, bool force) if (!unload_any) goto out; -#ifdef SHARED - /* Auditing checkpoint: we will start deleting objects. */ - _dl_audit_activity_nsid (nsid, LA_ACT_DELETE); -#endif - - /* Notify the debugger we are about to remove some loaded objects. */ + /* Notify the debugger we are about to remove some loaded objects. + LA_ACT_DELETE has already been signalled above for !unload_any. */ struct r_debug *r = _dl_debug_update (nsid); - r->r_state = RT_DELETE; - _dl_debug_state (); + _dl_debug_change_state (r, RT_DELETE); LIBC_PROBE (unmap_start, 2, nsid, r); if (unload_global) @@ -723,6 +724,10 @@ _dl_close_worker (struct link_map *map, bool force) /* TLS is cleaned up for the unloaded modules. */ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + /* Notify the debugger those objects are finalized and gone. */ + _dl_debug_change_state (r, RT_CONSISTENT); + LIBC_PROBE (unmap_complete, 2, nsid, r); + #ifdef SHARED /* Auditing checkpoint: we have deleted all objects. Also, do not notify auditors of the cleanup of a failed audit module loading attempt. */ @@ -735,11 +740,6 @@ _dl_close_worker (struct link_map *map, bool force) --GL(dl_nns); while (GL(dl_ns)[GL(dl_nns) - 1]._ns_loaded == NULL); - /* Notify the debugger those objects are finalized and gone. */ - r->r_state = RT_CONSISTENT; - _dl_debug_state (); - LIBC_PROBE (unmap_complete, 2, nsid, r); - /* Recheck if we need to retry, release the lock. */ out: if (dl_close_state == rerun) diff --git a/elf/dl-debug-symbols.S b/elf/dl-debug-symbols.S index 4e35adef5d..33f0fc77de 100644 --- a/elf/dl-debug-symbols.S +++ b/elf/dl-debug-symbols.S @@ -38,3 +38,4 @@ _r_debug: _r_debug_extended: .zero R_DEBUG_EXTENDED_SIZE +rtld_hidden_def (_r_debug) diff --git a/elf/dl-debug.c b/elf/dl-debug.c index ef56de7a29..df36d61dcb 100644 --- a/elf/dl-debug.c +++ b/elf/dl-debug.c @@ -16,6 +16,7 @@ License along with the GNU C Library; if not, see . */ +#include #include @@ -30,23 +31,86 @@ extern const int verify_link_map_members[(VERIFY_MEMBER (l_addr) && VERIFY_MEMBER (l_prev)) ? 1 : -1]; +#ifdef SHARED +/* r_debug structs for secondary namespaces. The first namespace is + handled separately because its r_debug structure must overlap with + the public _r_debug symbol, so the first array element corresponds + to LM_ID_BASE + 1. See elf/dl-debug-symbols.S. */ +struct r_debug_extended _r_debug_array[DL_NNS - 1]; + +/* If not null, pointer to the _r_debug in the main executable. */ +static struct r_debug *_r_debug_main; + +void +_dl_debug_post_relocate (struct link_map *main_map) +{ + /* Perform a full symbol search in all objects, to maintain + compatibility if interposed _r_debug definitions. The lookup + cannot fail because there is a definition in ld.so, and this + function is only called if the ld.so search scope is not empty. */ + const ElfW(Sym) *sym = NULL; + lookup_t result =_dl_lookup_symbol_x ("_r_debug", main_map, &sym, + main_map->l_scope, NULL, 0, 0, NULL); + if (sym->st_size >= sizeof (struct r_debug)) + { + struct r_debug *main_r_debug = DL_SYMBOL_ADDRESS (result, sym); + if (main_r_debug != &_r_debug_extended.base) + { + /* The extended version of the struct is not available in + the main executable because a copy relocation has been + used. r_map etc. have already been copied as part of the + copy relocation processing. */ + main_r_debug->r_version = 1; + + /* Record that dual updates of the initial link map are + required. */ + _r_debug_main = main_r_debug; + } + } +} + +/* Return the r_debug object for the namespace NS. */ +static inline struct r_debug_extended * +get_rdebug (Lmid_t ns) +{ + if (ns == LM_ID_BASE) + return &_r_debug_extended; + else + return &_r_debug_array[ns - 1]; +} +#else /* !SHARED */ +static inline struct r_debug_extended * +get_rdebug (Lmid_t ns) +{ + return &_r_debug_extended; /* There is just one namespace. */ +} +#endif /* !SHARED */ + /* Update the `r_map' member and return the address of `struct r_debug' of the namespace NS. */ struct r_debug * _dl_debug_update (Lmid_t ns) { - struct r_debug_extended *r; - if (ns == LM_ID_BASE) - r = &_r_debug_extended; - else - r = &GL(dl_ns)[ns]._ns_debug; + struct r_debug_extended *r = get_rdebug (ns); if (r->base.r_map == NULL) atomic_store_release (&r->base.r_map, (void *) GL(dl_ns)[ns]._ns_loaded); return &r->base; } +void +_dl_debug_change_state (struct r_debug *r, int state) +{ + atomic_store_release (&r->r_state, state); +#ifdef SHARED + if (r == &_r_debug_extended.base && _r_debug_main != NULL) + /* Update the copy-relocation of _r_debug. */ + atomic_store_release (&_r_debug_main->r_state, state); +#endif + _dl_debug_state (); +} + /* Initialize _r_debug_extended for the namespace NS. LDBASE is the run-time load address of the dynamic linker, to be put in _r_debug_extended.r_ldbase. Return the address of _r_debug. */ @@ -54,34 +118,7 @@ _dl_debug_update (Lmid_t ns) struct r_debug * _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) { - struct r_debug_extended *r, **pp = NULL; - - if (ns == LM_ID_BASE) - { - r = &_r_debug_extended; - /* Initialize r_version to 1. */ - if (_r_debug_extended.base.r_version == 0) - _r_debug_extended.base.r_version = 1; - } - else if (DL_NNS > 1) - { - r = &GL(dl_ns)[ns]._ns_debug; - if (r->base.r_brk == 0) - { - /* Add the new namespace to the linked list. After a namespace - is initialized, r_brk becomes non-zero. A namespace becomes - empty (r_map == NULL) when it is unused. But it is never - removed from the linked list. */ - struct r_debug_extended *p; - for (pp = &_r_debug_extended.r_next; - (p = *pp) != NULL; - pp = &p->r_next) - ; - - r->base.r_version = 2; - } - } - + struct r_debug_extended *r = get_rdebug (ns); if (r->base.r_brk == 0) { /* Tell the debugger where to find the map of loaded objects. @@ -89,30 +126,44 @@ _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) only once. */ r->base.r_ldbase = ldbase ?: _r_debug_extended.base.r_ldbase; r->base.r_brk = (ElfW(Addr)) &_dl_debug_state; - r->r_next = NULL; + +#ifdef SHARED + /* Add the new namespace to the linked list. This assumes that + namespaces are allocated in increasing order. After a + namespace is initialized, r_brk becomes non-zero. A + namespace becomes empty (r_map == NULL) when it is unused. + But it is never removed from the linked list. */ + + if (ns != LM_ID_BASE) + { + r->base.r_version = 2; + if (ns - 1 == LM_ID_BASE) + { + atomic_store_release (&_r_debug_extended.r_next, r); + /* Now there are multiple namespaces. Note that this + deliberately does not update the copy in the main + executable (if it exists). */ + atomic_store_release (&_r_debug_extended.base.r_version, 2); + } + else + /* Update r_debug_extended of the previous namespace. */ + atomic_store_release (&_r_debug_array[ns - 2].r_next, r); + } + else +#endif /* SHARED */ + r->base.r_version = 1; } if (r->base.r_map == NULL) - atomic_store_release (&r->base.r_map, - (void *) GL(dl_ns)[ns]._ns_loaded); - - if (pp != NULL) { - atomic_store_release (pp, r); - /* Bump r_version to 2 for the new namespace. */ - atomic_store_release (&_r_debug_extended.base.r_version, 2); + struct link_map_public *l = (void *) GL(dl_ns)[ns]._ns_loaded; + atomic_store_release (&r->base.r_map, l); +#ifdef SHARED + if (ns == LM_ID_BASE && _r_debug_main != NULL) + /* Update the copy-relocation of _r_debug. */ + atomic_store_release (&_r_debug_main->r_map, l); +#endif } return &r->base; } - - -/* This function exists solely to have a breakpoint set on it by the - debugger. The debugger is supposed to find this function's address by - examining the r_brk member of struct r_debug, but GDB 4.15 in fact looks - for this particular symbol name in the PT_INTERP file. */ -void -_dl_debug_state (void) -{ -} -rtld_hidden_def (_dl_debug_state) diff --git a/elf/dl-debug_state.c b/elf/dl-debug_state.c new file mode 100644 index 0000000000..40c134a49e --- /dev/null +++ b/elf/dl-debug_state.c @@ -0,0 +1,30 @@ +/* Debugger hook called after dynamic linker updates. + Copyright (C) 1996-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* This function exists solely to have a breakpoint set on it by the + debugger. The debugger is supposed to find this function's address by + examining the r_brk member of struct r_debug, but GDB 4.15 in fact looks + for this particular symbol name in the PT_INTERP file. Therefore, + this function must not be inlined. */ +void +_dl_debug_state (void) +{ +} +rtld_hidden_def (_dl_debug_state) diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c index 449302eda3..f258665a71 100644 --- a/elf/dl-find_object.c +++ b/elf/dl-find_object.c @@ -356,7 +356,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size) } int -__dl_find_object (void *pc1, struct dl_find_object *result) +_dl_find_object (void *pc1, struct dl_find_object *result) { uintptr_t pc = (uintptr_t) pc1; @@ -463,8 +463,38 @@ __dl_find_object (void *pc1, struct dl_find_object *result) return -1; } /* Transaction retry loop. */ } -hidden_def (__dl_find_object) -weak_alias (__dl_find_object, _dl_find_object) +rtld_hidden_def (_dl_find_object) + +/* Subroutine of _dlfo_process_initial to split out noncontigous link + maps. NODELETE is the number of used _dlfo_nodelete_mappings + elements. It is incremented as needed, and the new NODELETE value + is returned. */ +static size_t +_dlfo_process_initial_noncontiguous_map (struct link_map *map, + size_t nodelete) +{ + struct dl_find_object_internal dlfo; + _dl_find_object_from_map (map, &dlfo); + + /* PT_LOAD segments for a non-contiguous link map are added to the + non-closeable mappings. */ + const ElfW(Phdr) *ph = map->l_phdr; + const ElfW(Phdr) *ph_end = map->l_phdr + map->l_phnum; + for (; ph < ph_end; ++ph) + if (ph->p_type == PT_LOAD) + { + if (_dlfo_nodelete_mappings != NULL) + { + /* Second pass only. */ + _dlfo_nodelete_mappings[nodelete] = dlfo; + ElfW(Addr) start = ph->p_vaddr + map->l_addr; + _dlfo_nodelete_mappings[nodelete].map_start = start; + _dlfo_nodelete_mappings[nodelete].map_end = start + ph->p_memsz; + } + ++nodelete; + } + return nodelete; +} /* _dlfo_process_initial is called twice. First to compute the array sizes from the initial loaded mappings. Second to fill in the @@ -477,29 +507,8 @@ _dlfo_process_initial (void) size_t nodelete = 0; if (!main_map->l_contiguous) - { - struct dl_find_object_internal dlfo; - _dl_find_object_from_map (main_map, &dlfo); - - /* PT_LOAD segments for a non-contiguous are added to the - non-closeable mappings. */ - for (const ElfW(Phdr) *ph = main_map->l_phdr, - *ph_end = main_map->l_phdr + main_map->l_phnum; - ph < ph_end; ++ph) - if (ph->p_type == PT_LOAD) - { - if (_dlfo_nodelete_mappings != NULL) - { - /* Second pass only. */ - _dlfo_nodelete_mappings[nodelete] = dlfo; - _dlfo_nodelete_mappings[nodelete].map_start - = ph->p_vaddr + main_map->l_addr; - _dlfo_nodelete_mappings[nodelete].map_end - = _dlfo_nodelete_mappings[nodelete].map_start + ph->p_memsz; - } - ++nodelete; - } - } + /* Contiguous case already handled in _dl_find_object_init. */ + nodelete = _dlfo_process_initial_noncontiguous_map (main_map, nodelete); size_t loaded = 0; for (Lmid_t ns = 0; ns < GL(dl_nns); ++ns) @@ -511,11 +520,22 @@ _dlfo_process_initial (void) /* lt_library link maps are implicitly NODELETE. */ if (l->l_type == lt_library || l->l_nodelete_active) { - if (_dlfo_nodelete_mappings != NULL) - /* Second pass only. */ - _dl_find_object_from_map - (l, _dlfo_nodelete_mappings + nodelete); - ++nodelete; + /* The kernel may have loaded ld.so with gaps. */ + if (!l->l_contiguous +#ifdef SHARED + && l == &GL(dl_rtld_map) +#endif + ) + nodelete + = _dlfo_process_initial_noncontiguous_map (l, nodelete); + else + { + if (_dlfo_nodelete_mappings != NULL) + /* Second pass only. */ + _dl_find_object_from_map + (l, _dlfo_nodelete_mappings + nodelete); + ++nodelete; + } } else if (l->l_type == lt_loaded) { @@ -662,6 +682,14 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count) = _dlfo_loaded_mappings[!active_idx]; size_t remaining_to_add = current_used + count; + /* remaining_to_add can be 0 if (current_used + count) wraps, but in practice + this is not possible as it represent counts of link maps. Link maps have + sizes larger than 1 byte, so the sum of any two link map counts will + always fit within a size_t without wrapping around. This check ensures + that target_seg is not erroneously considered potentially NULL by GCC. */ + if (remaining_to_add == 0) + __builtin_unreachable (); + /* Ensure that the new segment chain has enough space. */ { size_t new_allocated @@ -757,7 +785,6 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count) /* Prefer newly loaded link map. */ assert (loaded_index1 > 0); _dl_find_object_from_map (loaded[loaded_index1 - 1], dlfo); - loaded[loaded_index1 - 1]->l_find_object_processed = 1; --loaded_index1; } diff --git a/elf/dl-find_object.h b/elf/dl-find_object.h index 0915065be0..8894c6657c 100644 --- a/elf/dl-find_object.h +++ b/elf/dl-find_object.h @@ -87,7 +87,7 @@ _dl_find_object_to_external (struct dl_find_object_internal *internal, } /* Extract the object location data from a link map and writes it to - *RESULT using relaxed MO stores. */ + *RESULT using relaxed MO stores. Set L->l_find_object_processed. */ static void __attribute__ ((unused)) _dl_find_object_from_map (struct link_map *l, struct dl_find_object_internal *result) @@ -100,6 +100,8 @@ _dl_find_object_from_map (struct link_map *l, atomic_store_relaxed (&result->eh_dbase, (void *) l->l_info[DT_PLTGOT]); #endif + l->l_find_object_processed = 1; + for (const ElfW(Phdr) *ph = l->l_phdr, *ph_end = l->l_phdr + l->l_phnum; ph < ph_end; ++ph) if (ph->p_type == DLFO_EH_SEGMENT_TYPE) diff --git a/elf/dl-fini.c b/elf/dl-fini.c index db996270de..a1a4c25829 100644 --- a/elf/dl-fini.c +++ b/elf/dl-fini.c @@ -69,6 +69,7 @@ _dl_fini (void) unsigned int i; struct link_map *l; + struct link_map *proxy_link_map = NULL; assert (nloaded != 0 || GL(dl_ns)[ns]._ns_loaded == NULL); for (l = GL(dl_ns)[ns]._ns_loaded, i = 0; l != NULL; l = l->l_next) /* Do not handle ld.so in secondary namespaces. */ @@ -84,6 +85,11 @@ _dl_fini (void) are not dlclose()ed from underneath us. */ ++l->l_direct_opencount; } + else + /* Used below to call la_objclose for the ld.so proxy + link map. */ + proxy_link_map = l; + assert (ns != LM_ID_BASE || i == nloaded); assert (ns == LM_ID_BASE || i == nloaded || i == nloaded - 1); unsigned int nmaps = i; @@ -122,6 +128,9 @@ _dl_fini (void) --l->l_direct_opencount; } + if (proxy_link_map != NULL) + _dl_audit_objclose (proxy_link_map); + #ifdef SHARED _dl_audit_activity_nsid (ns, LA_ACT_CONSISTENT); #endif diff --git a/elf/dl-load.c b/elf/dl-load.c index 8a89b71016..23b7e6a0c6 100644 --- a/elf/dl-load.c +++ b/elf/dl-load.c @@ -918,6 +918,36 @@ _dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph) } } +static void +_dl_notify_new_object (int mode, Lmid_t nsid, struct link_map *l) +{ + /* Signal that we are going to add new objects. */ + struct r_debug *r = _dl_debug_update (nsid); + if (r->r_state == RT_CONSISTENT) + { +#ifdef SHARED + /* Auditing checkpoint: we are going to add new objects. Since this + is called after _dl_add_to_namespace_list the namespace is guaranteed + to not be empty. */ + if ((mode & __RTLD_AUDIT) == 0) + _dl_audit_activity_nsid (nsid, LA_ACT_ADD); +#endif + + /* Notify the debugger we have added some objects. We need to + call _dl_debug_initialize in a static program in case dynamic + linking has not been used before. */ + _dl_debug_change_state (r, RT_ADD); + LIBC_PROBE (map_start, 2, nsid, r); + } + else + assert (r->r_state == RT_ADD); + +#ifdef SHARED + /* Auditing checkpoint: we have a new object. */ + if (!GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) + _dl_audit_objopen (l, nsid); +#endif +} /* Map in the shared object NAME, actually located in REALNAME, and already opened on FD. */ @@ -1018,6 +1048,8 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, /* Add the map for the mirrored object to the object list. */ _dl_add_to_namespace_list (l, nsid); + _dl_notify_new_object (mode, nsid, l); + return l; } #endif @@ -1442,33 +1474,7 @@ cannot enable executable stack as shared object requires"); if (mode & __RTLD_SPROF) return l; - /* Signal that we are going to add new objects. */ - struct r_debug *r = _dl_debug_update (nsid); - if (r->r_state == RT_CONSISTENT) - { -#ifdef SHARED - /* Auditing checkpoint: we are going to add new objects. Since this - is called after _dl_add_to_namespace_list the namespace is guaranteed - to not be empty. */ - if ((mode & __RTLD_AUDIT) == 0) - _dl_audit_activity_nsid (nsid, LA_ACT_ADD); -#endif - - /* Notify the debugger we have added some objects. We need to - call _dl_debug_initialize in a static program in case dynamic - linking has not been used before. */ - r->r_state = RT_ADD; - _dl_debug_state (); - LIBC_PROBE (map_start, 2, nsid, r); - } - else - assert (r->r_state == RT_ADD); - -#ifdef SHARED - /* Auditing checkpoint: we have a new object. */ - if (!GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) - _dl_audit_objopen (l, nsid); -#endif + _dl_notify_new_object (mode, nsid, l); return l; } diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c index 19ad2a25c5..7a70f1df2d 100644 --- a/elf/dl-lookup.c +++ b/elf/dl-lookup.c @@ -113,12 +113,22 @@ check_match (const char *const undef_name, /* We can match the version information or use the default one if it is not hidden. */ ElfW(Half) ndx = verstab[symidx] & 0x7fff; - if ((map->l_versions[ndx].hash != version->hash - || strcmp (map->l_versions[ndx].name, version->name)) - && (version->hidden || map->l_versions[ndx].hash - || (verstab[symidx] & 0x8000))) - /* It's not the version we want. */ - return NULL; + if (map->l_versions[ndx].hash == version->hash + && strcmp (map->l_versions[ndx].name, version->name) == 0) + /* This is an exact version match. Return the symbol below. */ + ; + else + { + if (!version->hidden + && map->l_versions[ndx].name[0] == '\0' + && (verstab[symidx] & 0x8000) == 0 + && (*num_versions)++ == 0) + /* This is the global default version. Store it as a + fallback match. */ + *versioned_sym = sym; + + return NULL; + } } } else diff --git a/elf/dl-open.c b/elf/dl-open.c index c378da16c0..6f6d3ddbf9 100644 --- a/elf/dl-open.c +++ b/elf/dl-open.c @@ -363,17 +363,8 @@ resize_tls_slotinfo (struct link_map *new) { bool any_tls = false; for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i) - { - struct link_map *imap = new->l_searchlist.r_list[i]; - - /* Only add TLS memory if this object is loaded now and - therefore is not yet initialized. */ - if (! imap->l_init_called && imap->l_tls_blocksize > 0) - { - _dl_add_to_slotinfo (imap, false); - any_tls = true; - } - } + if (_dl_add_to_slotinfo (new->l_searchlist.r_list[i], false)) + any_tls = true; return any_tls; } @@ -383,22 +374,8 @@ resize_tls_slotinfo (struct link_map *new) static void update_tls_slotinfo (struct link_map *new) { - unsigned int first_static_tls = new->l_searchlist.r_nlist; for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i) - { - struct link_map *imap = new->l_searchlist.r_list[i]; - - /* Only add TLS memory if this object is loaded now and - therefore is not yet initialized. */ - if (! imap->l_init_called && imap->l_tls_blocksize > 0) - { - _dl_add_to_slotinfo (imap, true); - - if (imap->l_need_tls_init - && first_static_tls == new->l_searchlist.r_nlist) - first_static_tls = i; - } - } + _dl_add_to_slotinfo (new->l_searchlist.r_list[i], true); size_t newgen = GL(dl_tls_generation) + 1; if (__glibc_unlikely (newgen == 0)) @@ -410,13 +387,11 @@ TLS generation counter wrapped! Please report this.")); /* We need a second pass for static tls data, because _dl_update_slotinfo must not be run while calls to _dl_add_to_slotinfo are still pending. */ - for (unsigned int i = first_static_tls; i < new->l_searchlist.r_nlist; ++i) + for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i) { struct link_map *imap = new->l_searchlist.r_list[i]; - if (imap->l_need_tls_init - && ! imap->l_init_called - && imap->l_tls_blocksize > 0) + if (imap->l_need_tls_init && imap->l_tls_blocksize > 0) { /* For static TLS we have to allocate the memory here and now, but we can delay updating the DTV. */ @@ -601,6 +576,14 @@ dl_open_worker_begin (void *a) _dl_debug_printf ("opening file=%s [%lu]; direct_opencount=%u\n\n", new->l_name, new->l_ns, new->l_direct_opencount); +#ifdef SHARED + /* No relocation processing on this execution path. But + relocation has not been performed for static + position-dependent executables, so disable the assert for + static linking. */ + assert (new->l_relocated); +#endif + /* If the user requested the object to be in the global namespace but it is not so far, prepare to add it now. This can raise an exception to do a malloc failure. */ @@ -622,9 +605,15 @@ dl_open_worker_begin (void *a) if ((mode & RTLD_GLOBAL) && new->l_global == 0) add_to_global_update (new); - const int r_state __attribute__ ((unused)) - = _dl_debug_update (args->nsid)->r_state; - assert (r_state == RT_CONSISTENT); + /* It is not possible to run the ELF constructor for the new + link map if it has not executed yet: If this dlopen call came + from an ELF constructor that has not put that object into a + consistent state, completing initialization for the entire + scope will expose objects that have this partially + constructed object among its dependencies to this + inconsistent state. This could happen even with a benign + dlopen (NULL, RTLD_LAZY) call from a constructor of an + initially loaded shared object. */ return; } @@ -656,17 +645,6 @@ dl_open_worker_begin (void *a) #endif } -#ifdef SHARED - /* Auditing checkpoint: we have added all objects. */ - _dl_audit_activity_nsid (new->l_ns, LA_ACT_CONSISTENT); -#endif - - /* Notify the debugger all new objects are now ready to go. */ - struct r_debug *r = _dl_debug_update (args->nsid); - r->r_state = RT_CONSISTENT; - _dl_debug_state (); - LIBC_PROBE (map_complete, 3, args->nsid, r, new); - _dl_open_check (new); /* Print scope information. */ @@ -713,6 +691,7 @@ dl_open_worker_begin (void *a) created dlmopen namespaces. Do not do this for static dlopen because libc has relocations against ld.so, which may not have been relocated at this point. */ + struct r_debug *r = _dl_debug_update (args->nsid); #ifdef SHARED if (GL(dl_ns)[args->nsid].libc_map != NULL) _dl_open_relocate_one_object (args, r, GL(dl_ns)[args->nsid].libc_map, @@ -804,6 +783,25 @@ dl_open_worker (void *a) __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + /* Auditing checkpoint and debugger signalling. Do this even on + error, so that dlopen exists with consistent state. */ + if (args->nsid >= 0 || args->map != NULL) + { + Lmid_t nsid = args->map != NULL ? args->map->l_ns : args->nsid; + struct r_debug *r = _dl_debug_update (nsid); +#ifdef SHARED + bool was_not_consistent = r->r_state != RT_CONSISTENT; +#endif + _dl_debug_change_state (r, RT_CONSISTENT); + LIBC_PROBE (map_complete, 3, nsid, r, args->map); + +#ifdef SHARED + if (was_not_consistent) + /* Avoid redudant/recursive signalling. */ + _dl_audit_activity_nsid (nsid, LA_ACT_CONSISTENT); +#endif + } + if (__glibc_unlikely (ex.errstring != NULL)) /* Reraise the error. */ _dl_signal_exception (err, &ex, NULL); @@ -872,7 +870,7 @@ no more namespaces available for dlmopen()")); } GL(dl_ns)[nsid].libc_map = NULL; - _dl_debug_update (nsid)->r_state = RT_CONSISTENT; + _dl_debug_change_state (_dl_debug_update (nsid), RT_CONSISTENT); } /* Never allow loading a DSO in a namespace which is empty. Such direct placements is only causing problems. Also don't allow diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c index 4bf7aec88b..76d14830dd 100644 --- a/elf/dl-reloc.c +++ b/elf/dl-reloc.c @@ -202,12 +202,9 @@ resolve_map (lookup_t l, struct r_scope_elem *scope[], const ElfW(Sym) **ref, #include "dynamic-link.h" void -_dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], - int reloc_mode, int consider_profiling) +_dl_relocate_object_no_relro (struct link_map *l, struct r_scope_elem *scope[], + int reloc_mode, int consider_profiling) { - if (l->l_relocated) - return; - struct textrels { caddr_t start; @@ -220,8 +217,8 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], int lazy = reloc_mode & RTLD_LAZY; int skip_ifunc = reloc_mode & __RTLD_NOIFUNC; -#ifdef SHARED bool consider_symbind = false; +#ifdef SHARED /* If we are auditing, install the same handlers we need for profiling. */ if ((reloc_mode & __RTLD_AUDIT) == 0) { @@ -240,9 +237,7 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], } #elif defined PROF /* Never use dynamic linker profiling for gprof profiling code. */ -# define consider_profiling 0 -#else -# define consider_symbind 0 + consider_profiling = 0; #endif /* If DT_BIND_NOW is set relocate all references in this object. We @@ -300,7 +295,6 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], ELF_DYNAMIC_RELOCATE (l, scope, lazy, consider_profiling, skip_ifunc); -#ifndef PROF if ((consider_profiling || consider_symbind) && l->l_info[DT_PLTRELSZ] != NULL) { @@ -321,7 +315,6 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], _dl_fatal_printf (errstring, RTLD_PROGNAME, l->l_name); } } -#endif } /* Mark the object so we know this work has been done. */ @@ -342,17 +335,24 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], textrels = textrels->next; } - - /* In case we can protect the data now that the relocations are - done, do it. */ - if (l->l_relro_size != 0) - _dl_protect_relro (l); } +void +_dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + int reloc_mode, int consider_profiling) +{ + if (l->l_relocated) + return; + _dl_relocate_object_no_relro (l, scope, reloc_mode, consider_profiling); + _dl_protect_relro (l); +} void _dl_protect_relro (struct link_map *l) { + if (l->l_relro_size == 0) + return; + ElfW(Addr) start = ALIGN_DOWN((l->l_addr + l->l_relro_addr), GLRO(dl_pagesize)); diff --git a/elf/dl-support.c b/elf/dl-support.c index 451932dd03..ee590edf93 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -338,8 +338,7 @@ _dl_non_dynamic_init (void) call_function_static_weak (_dl_find_object_init); /* Setup relro on the binary itself. */ - if (_dl_main_map.l_relro_size != 0) - _dl_protect_relro (&_dl_main_map); + _dl_protect_relro (&_dl_main_map); } #ifdef DL_SYSINFO_IMPLEMENTATION diff --git a/elf/dl-tls.c b/elf/dl-tls.c index 3d221273f1..b13e752358 100644 --- a/elf/dl-tls.c +++ b/elf/dl-tls.c @@ -528,6 +528,13 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) if (newp == NULL) oom (); memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t)); +#ifdef SHARED + /* Auditors can trigger a DTV resize event while the full malloc + is not yet in use. Mark the new DTV allocation as the + initial allocation. */ + if (!__rtld_malloc_is_complete ()) + GL(dl_initial_dtv) = &newp[1]; +#endif } else { @@ -552,9 +559,14 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) /* Allocate initial TLS. RESULT should be a non-NULL pointer to storage for the TLS space. The DTV may be resized, and so this function may call malloc to allocate that space. The loader's GL(dl_load_tls_lock) - is taken when manipulating global TLS-related data in the loader. */ + is taken when manipulating global TLS-related data in the loader. + + If MAIN_THREAD, this is the first call during process + initialization. In this case, TLS initialization for secondary + (audit) namespaces is skipped because that has already been handled + by dlopen. */ void * -_dl_allocate_tls_init (void *result, bool init_tls) +_dl_allocate_tls_init (void *result, bool main_thread) { if (result == NULL) /* The memory allocation failed. */ @@ -627,17 +639,21 @@ _dl_allocate_tls_init (void *result, bool init_tls) some platforms use in static programs requires it. */ dtv[map->l_tls_modid].pointer.val = dest; - /* Copy the initialization image and clear the BSS part. For - audit modules or dependencies with initial-exec TLS, we can not - set the initial TLS image on default loader initialization - because it would already be set by the audit setup. However, - subsequent thread creation would need to follow the default - behaviour. */ - if (map->l_ns != LM_ID_BASE && !init_tls) + /* Copy the initialization image and clear the BSS part. + For audit modules or dependencies with initial-exec TLS, + we can not set the initial TLS image on default loader + initialization because it would already be set by the + audit setup, which uses the dlopen code and already + clears l_need_tls_init. Calls with !main_thread from + pthread_create need to initialze TLS for the current + thread regardless of namespace. */ + if (map->l_ns != LM_ID_BASE && main_thread) continue; memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size), '\0', map->l_tls_blocksize - map->l_tls_initimage_size); + if (main_thread) + map->l_need_tls_init = 0; } total += cnt; @@ -661,7 +677,7 @@ _dl_allocate_tls (void *mem) { return _dl_allocate_tls_init (mem == NULL ? _dl_allocate_tls_storage () - : allocate_dtv (mem), true); + : allocate_dtv (mem), false); } rtld_hidden_def (_dl_allocate_tls) @@ -1094,9 +1110,32 @@ _dl_tls_initial_modid_limit_setup (void) } -void +/* Add module to slot information data. If DO_ADD is false, only the + required memory is allocated. Must be called with + GL (dl_load_tls_lock) acquired. If the function has already been + called for the link map L with !DO_ADD, then this function will not + raise an exception, otherwise it is possible that it encounters a + memory allocation failure. + + Return false if L has already been added to the slotinfo data, or + if L has no TLS data. If the returned value is true, L has been + added with this call (DO_ADD), or has been added in a previous call + (!DO_ADD). + + The expected usage is as follows: Call _dl_add_to_slotinfo for + several link maps with DO_ADD set to false, and record if any calls + result in a true result. If there was a true result, call + _dl_add_to_slotinfo again, this time with DO_ADD set to true. (For + simplicity, it's possible to call the function for link maps where + the previous result was false.) The return value from the second + round of calls can be ignored. If there was true result initially, + call _dl_update_slotinfo to update the TLS generation counter. */ +bool _dl_add_to_slotinfo (struct link_map *l, bool do_add) { + if (l->l_tls_blocksize == 0 || l->l_tls_in_slotinfo) + return false; + /* Now that we know the object is loaded successfully add modules containing TLS data to the dtv info table. We might have to increase its size. */ @@ -1152,7 +1191,10 @@ cannot create TLS data structures")); atomic_store_relaxed (&listp->slotinfo[idx].map, l); atomic_store_relaxed (&listp->slotinfo[idx].gen, GL(dl_tls_generation) + 1); + l->l_tls_in_slotinfo = true; } + + return true; } #if PTHREAD_IN_LIBC diff --git a/elf/dl-version.c b/elf/dl-version.c index 8966d612cc..708b1c94ea 100644 --- a/elf/dl-version.c +++ b/elf/dl-version.c @@ -357,6 +357,13 @@ _dl_check_map_versions (struct link_map *map, int verbose, int trace_mode) ent = (ElfW(Verdef) *) ((char *) ent + ent->vd_next); } } + + /* The empty string has ELF hash zero. This avoids a NULL check + before the version string comparison in check_match in + dl-lookup.c. */ + for (unsigned int i = 0; i < map->l_nversions; ++i) + if (map->l_versions[i].name == NULL) + map->l_versions[i].name = ""; } /* When there is a DT_VERNEED entry with libc.so on DT_NEEDED, issue diff --git a/elf/endswith.h b/elf/endswith.h index c6430c48be..3954e57f8e 100644 --- a/elf/endswith.h +++ b/elf/endswith.h @@ -17,6 +17,7 @@ #ifndef _ENDSWITH_H #define _ENDSWITH_H +#include #include /* Return true if the N bytes at NAME end with with the characters in @@ -30,4 +31,11 @@ endswithn (const char *name, size_t n, const char *suffix) strlen (suffix)) == 0); } +/* Same as endswithn, but uses the entire SUBJECT for matching. */ +static inline bool +endswith (const char *subject, const char *suffix) +{ + return endswithn (subject, strlen (subject), suffix); +} + #endif /* _ENDSWITH_H */ diff --git a/elf/rtld.c b/elf/rtld.c index bfdf632e77..dc65b6e404 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -1284,6 +1284,60 @@ rtld_setup_main_map (struct link_map *main_map) return has_interp; } +/* Set up the program header information for the dynamic linker + itself. It can be accessed via _r_debug and dl_iterate_phdr + callbacks, and it is used by _dl_find_object. */ +static void +rtld_setup_phdr (void) +{ + /* Starting from binutils-2.23, the linker will define the magic + symbol __ehdr_start to point to our own ELF header if it is + visible in a segment that also includes the phdrs. */ + + const ElfW(Ehdr) *rtld_ehdr = &__ehdr_start; + assert (rtld_ehdr->e_ehsize == sizeof *rtld_ehdr); + assert (rtld_ehdr->e_phentsize == sizeof (ElfW(Phdr))); + + const ElfW(Phdr) *rtld_phdr = (const void *) rtld_ehdr + rtld_ehdr->e_phoff; + + GL(dl_rtld_map).l_phdr = rtld_phdr; + GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum; + + + GL(dl_rtld_map).l_contiguous = 1; + /* The linker may not have produced a contiguous object. The kernel + will load the object with actual gaps (unlike the glibc loader + for shared objects, which always produces a contiguous mapping). + See similar logic in rtld_setup_main_map above. */ + { + ElfW(Addr) expected_load_address = 0; + for (const ElfW(Phdr) *ph = rtld_phdr; ph < &rtld_phdr[rtld_ehdr->e_phnum]; + ++ph) + if (ph->p_type == PT_LOAD) + { + ElfW(Addr) mapstart = ph->p_vaddr & ~(GLRO(dl_pagesize) - 1); + if (GL(dl_rtld_map).l_contiguous && expected_load_address != 0 + && expected_load_address != mapstart) + GL(dl_rtld_map).l_contiguous = 0; + ElfW(Addr) allocend = ph->p_vaddr + ph->p_memsz; + /* The next expected address is the page following this load + segment. */ + expected_load_address = ((allocend + GLRO(dl_pagesize) - 1) + & ~(GLRO(dl_pagesize) - 1)); + } + } + + /* PT_GNU_RELRO is usually the last phdr. */ + size_t cnt = rtld_ehdr->e_phnum; + while (cnt-- > 0) + if (rtld_phdr[cnt].p_type == PT_GNU_RELRO) + { + GL(dl_rtld_map).l_relro_addr = rtld_phdr[cnt].p_vaddr; + GL(dl_rtld_map).l_relro_size = rtld_phdr[cnt].p_memsz; + break; + } +} + /* Adjusts the contents of the stack and related globals for the user entry point. The ld.so processed skip_args arguments and bumped _dl_argv and _dl_argc accordingly. Those arguments are removed from @@ -1750,33 +1804,7 @@ dl_main (const ElfW(Phdr) *phdr, ++GL(dl_ns)[LM_ID_BASE]._ns_nloaded; ++GL(dl_load_adds); - /* Starting from binutils-2.23, the linker will define the magic symbol - __ehdr_start to point to our own ELF header if it is visible in a - segment that also includes the phdrs. If that's not available, we use - the old method that assumes the beginning of the file is part of the - lowest-addressed PT_LOAD segment. */ - - /* Set up the program header information for the dynamic linker - itself. It is needed in the dl_iterate_phdr callbacks. */ - const ElfW(Ehdr) *rtld_ehdr = &__ehdr_start; - assert (rtld_ehdr->e_ehsize == sizeof *rtld_ehdr); - assert (rtld_ehdr->e_phentsize == sizeof (ElfW(Phdr))); - - const ElfW(Phdr) *rtld_phdr = (const void *) rtld_ehdr + rtld_ehdr->e_phoff; - - GL(dl_rtld_map).l_phdr = rtld_phdr; - GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum; - - - /* PT_GNU_RELRO is usually the last phdr. */ - size_t cnt = rtld_ehdr->e_phnum; - while (cnt-- > 0) - if (rtld_phdr[cnt].p_type == PT_GNU_RELRO) - { - GL(dl_rtld_map).l_relro_addr = rtld_phdr[cnt].p_vaddr; - GL(dl_rtld_map).l_relro_size = rtld_phdr[cnt].p_memsz; - break; - } + rtld_setup_phdr (); /* Add the dynamic linker to the TLS list if it also uses TLS. */ if (GL(dl_rtld_map).l_tls_blocksize != 0) @@ -1823,8 +1851,7 @@ dl_main (const ElfW(Phdr) *phdr, elf_setup_debug_entry (main_map, r); /* We start adding objects. */ - r->r_state = RT_ADD; - _dl_debug_state (); + _dl_debug_change_state (r, RT_ADD); LIBC_PROBE (init_start, 2, LM_ID_BASE, r); /* Auditing checkpoint: we are ready to signal that the initial map @@ -1983,43 +2010,37 @@ dl_main (const ElfW(Phdr) *phdr, if (main_map->l_searchlist.r_list[i] == &GL(dl_rtld_map)) break; - bool rtld_multiple_ref = false; - if (__glibc_likely (i < main_map->l_searchlist.r_nlist)) - { - /* Some DT_NEEDED entry referred to the interpreter object itself, so - put it back in the list of visible objects. We insert it into the - chain in symbol search order because gdb uses the chain's order as - its symbol search order. */ - rtld_multiple_ref = true; + /* Insert the link map for the dynamic loader into the chain in + symbol search order because gdb uses the chain's order as its + symbol search order. */ - GL(dl_rtld_map).l_prev = main_map->l_searchlist.r_list[i - 1]; - if (__glibc_likely (state.mode == rtld_mode_normal)) - { - GL(dl_rtld_map).l_next = (i + 1 < main_map->l_searchlist.r_nlist - ? main_map->l_searchlist.r_list[i + 1] - : NULL); + GL(dl_rtld_map).l_prev = main_map->l_searchlist.r_list[i - 1]; + if (__glibc_likely (state.mode == rtld_mode_normal)) + { + GL(dl_rtld_map).l_next = (i + 1 < main_map->l_searchlist.r_nlist + ? main_map->l_searchlist.r_list[i + 1] + : NULL); #ifdef NEED_DL_SYSINFO_DSO - if (GLRO(dl_sysinfo_map) != NULL - && GL(dl_rtld_map).l_prev->l_next == GLRO(dl_sysinfo_map) - && GL(dl_rtld_map).l_next != GLRO(dl_sysinfo_map)) - GL(dl_rtld_map).l_prev = GLRO(dl_sysinfo_map); + if (GLRO(dl_sysinfo_map) != NULL + && GL(dl_rtld_map).l_prev->l_next == GLRO(dl_sysinfo_map) + && GL(dl_rtld_map).l_next != GLRO(dl_sysinfo_map)) + GL(dl_rtld_map).l_prev = GLRO(dl_sysinfo_map); #endif - } - else - /* In trace mode there might be an invisible object (which we - could not find) after the previous one in the search list. - In this case it doesn't matter much where we put the - interpreter object, so we just initialize the list pointer so - that the assertion below holds. */ - GL(dl_rtld_map).l_next = GL(dl_rtld_map).l_prev->l_next; - - assert (GL(dl_rtld_map).l_prev->l_next == GL(dl_rtld_map).l_next); - GL(dl_rtld_map).l_prev->l_next = &GL(dl_rtld_map); - if (GL(dl_rtld_map).l_next != NULL) - { - assert (GL(dl_rtld_map).l_next->l_prev == GL(dl_rtld_map).l_prev); - GL(dl_rtld_map).l_next->l_prev = &GL(dl_rtld_map); - } + } + else + /* In trace mode there might be an invisible object (which we + could not find) after the previous one in the search list. + In this case it doesn't matter much where we put the + interpreter object, so we just initialize the list pointer so + that the assertion below holds. */ + GL(dl_rtld_map).l_next = GL(dl_rtld_map).l_prev->l_next; + + assert (GL(dl_rtld_map).l_prev->l_next == GL(dl_rtld_map).l_next); + GL(dl_rtld_map).l_prev->l_next = &GL(dl_rtld_map); + if (GL(dl_rtld_map).l_next != NULL) + { + assert (GL(dl_rtld_map).l_next->l_prev == GL(dl_rtld_map).l_prev); + GL(dl_rtld_map).l_next->l_prev = &GL(dl_rtld_map); } /* Now let us see whether all libraries are available in the @@ -2269,25 +2290,25 @@ dl_main (const ElfW(Phdr) *phdr, _rtld_main_check (main_map, _dl_argv[0]); - /* Now we have all the objects loaded. Relocate them all except for - the dynamic linker itself. We do this in reverse order so that copy - relocs of earlier objects overwrite the data written by later - objects. We do not re-relocate the dynamic linker itself in this - loop because that could result in the GOT entries for functions we - call being changed, and that would break us. It is safe to relocate - the dynamic linker out of order because it has no copy relocations. - Likewise for libc, which is relocated early to ensure that IFUNC - resolvers in libc work. */ + /* Now we have all the objects loaded. */ int consider_profiling = GLRO(dl_profile) != NULL; /* If we are profiling we also must do lazy reloaction. */ GLRO(dl_lazy) |= consider_profiling; + /* If libc.so has been loaded, relocate it early, after the dynamic + loader itself. The initial self-relocation of ld.so should be + sufficient for IFUNC resolvers in libc.so. */ if (GL(dl_ns)[LM_ID_BASE].libc_map != NULL) - _dl_relocate_object (GL(dl_ns)[LM_ID_BASE].libc_map, - GL(dl_ns)[LM_ID_BASE].libc_map->l_scope, - GLRO(dl_lazy) ? RTLD_LAZY : 0, consider_profiling); + { + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); + _dl_relocate_object (GL(dl_ns)[LM_ID_BASE].libc_map, + GL(dl_ns)[LM_ID_BASE].libc_map->l_scope, + GLRO(dl_lazy) ? RTLD_LAZY : 0, consider_profiling); + rtld_timer_accum (&relocate_time, start); + } RTLD_TIMING_VAR (start); rtld_timer_start (&start); @@ -2310,9 +2331,8 @@ dl_main (const ElfW(Phdr) *phdr, /* Also allocated with the fake malloc(). */ l->l_free_initfini = 0; - if (l != &GL(dl_rtld_map)) - _dl_relocate_object (l, l->l_scope, GLRO(dl_lazy) ? RTLD_LAZY : 0, - consider_profiling); + _dl_relocate_object (l, l->l_scope, GLRO(dl_lazy) ? RTLD_LAZY : 0, + consider_profiling); /* Add object to slot information data if necessasy. */ if (l->l_tls_blocksize != 0 && __rtld_tls_init_tp_called) @@ -2338,7 +2358,7 @@ dl_main (const ElfW(Phdr) *phdr, into the main thread's TLS area, which we allocated above. Note: thread-local variables must only be accessed after completing the next step. */ - _dl_allocate_tls_init (tcbp, false); + _dl_allocate_tls_init (tcbp, true); /* And finally install it for the main thread. */ if (! __rtld_tls_init_tp_called) @@ -2347,36 +2367,29 @@ dl_main (const ElfW(Phdr) *phdr, /* Make sure no new search directories have been added. */ assert (GLRO(dl_init_all_dirs) == GL(dl_all_dirs)); - if (rtld_multiple_ref) - { - /* There was an explicit ref to the dynamic linker as a shared lib. - Re-relocate ourselves with user-controlled symbol definitions. - - We must do this after TLS initialization in case after this - re-relocation, we might call a user-supplied function - (e.g. calloc from _dl_relocate_object) that uses TLS data. */ - - /* Set up the object lookup structures. */ - _dl_find_object_init (); - - /* The malloc implementation has been relocated, so resolving - its symbols (and potentially calling IFUNC resolvers) is safe - at this point. */ - __rtld_malloc_init_real (main_map); - - /* Likewise for the locking implementation. */ - __rtld_mutex_init (); + /* Set up the object lookup structures. */ + _dl_find_object_init (); + /* If libc.so was loaded, relocate ld.so against it. Complete ld.so + initialization with mutex symbols from libc.so and malloc symbols + from the global scope. */ + if (GL(dl_ns)[LM_ID_BASE].libc_map != NULL) + { RTLD_TIMING_VAR (start); rtld_timer_start (&start); + _dl_relocate_object_no_relro (&GL(dl_rtld_map), main_map->l_scope, 0, 0); + rtld_timer_accum (&relocate_time, start); - /* Mark the link map as not yet relocated again. */ - GL(dl_rtld_map).l_relocated = 0; - _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0); + __rtld_mutex_init (); + __rtld_malloc_init_real (main_map); - rtld_timer_accum (&relocate_time, start); + /* Update copy-relocated _r_debug if necessary. */ + _dl_debug_post_relocate (main_map); } + /* All ld.so initialization is complete. Apply RELRO. */ + _dl_protect_relro (&GL(dl_rtld_map)); + /* Relocation is complete. Perform early libc initialization. This is the initial libc, even if audit modules have been loaded with other libcs. */ @@ -2389,16 +2402,15 @@ dl_main (const ElfW(Phdr) *phdr, _dl_relocate_object might need to call `mprotect' for DT_TEXTREL. */ _dl_sysdep_start_cleanup (); - /* Auditing checkpoint: we have added all objects. */ - _dl_audit_activity_nsid (LM_ID_BASE, LA_ACT_CONSISTENT); - /* Notify the debugger all new objects are now ready to go. We must re-get the address since by now the variable might be in another object. */ r = _dl_debug_update (LM_ID_BASE); - r->r_state = RT_CONSISTENT; - _dl_debug_state (); + _dl_debug_change_state (r, RT_CONSISTENT); LIBC_PROBE (init_complete, 2, LM_ID_BASE, r); + /* Auditing checkpoint: we have added all objects. */ + _dl_audit_activity_nsid (LM_ID_BASE, LA_ACT_CONSISTENT); + #if defined USE_LDCONFIG && !defined MAP_COPY /* We must munmap() the cache file. */ _dl_unload_cache (); diff --git a/elf/sprof.c b/elf/sprof.c index b19aca3292..1f5ab25ac3 100644 --- a/elf/sprof.c +++ b/elf/sprof.c @@ -38,6 +38,7 @@ #include #include #include +#include /* Get libc version number. */ #include "../version.h" @@ -410,6 +411,7 @@ load_shobj (const char *name) int fd; ElfW(Shdr) *shdr; size_t pagesize = getpagesize (); + struct stat st; /* Since we use dlopen() we must be prepared to work around the sometimes strange lookup rules for the shared objects. If we have a file foo.so @@ -553,14 +555,39 @@ load_shobj (const char *name) error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"), map->l_name); + if (fstat (fd, &st) < 0) + error (EXIT_FAILURE, errno, _("stat(%s) failure"), map->l_name); + + /* We're depending on data that's being read from the file, so be a + bit paranoid here and make sure the requests are reasonable - + i.e. both size and offset are nonnegative and smaller than the + file size, as well as the offset of the end of the data. PREAD + would have failed anyway, but this is more robust and explains + what happened better. Note that SZ must be unsigned and OFF may + be signed or unsigned. */ +#define PCHECK(sz1,off1) { \ + size_t sz = sz1, end_off; \ + off_t off = off1; \ + if (sz > st.st_size \ + || off < 0 || off > st.st_size \ + || INT_ADD_WRAPV (sz, off, &end_off) \ + || end_off > st.st_size) \ + error (EXIT_FAILURE, ERANGE, \ + _("read outside of file extents %zu + %jd > %jd"), \ + sz, (intmax_t) off, (intmax_t) st.st_size); \ + } + /* Map the section header. */ size_t size = ehdr->e_shnum * sizeof (ElfW(Shdr)); shdr = (ElfW(Shdr) *) alloca (size); + PCHECK (size, ehdr->e_shoff); if (pread (fd, shdr, size, ehdr->e_shoff) != size) error (EXIT_FAILURE, errno, _("reading of section headers failed")); /* Get the section header string table. */ char *shstrtab = (char *) alloca (shdr[ehdr->e_shstrndx].sh_size); + PCHECK (shdr[ehdr->e_shstrndx].sh_size, + shdr[ehdr->e_shstrndx].sh_offset); if (pread (fd, shstrtab, shdr[ehdr->e_shstrndx].sh_size, shdr[ehdr->e_shstrndx].sh_offset) != shdr[ehdr->e_shstrndx].sh_size) @@ -588,6 +615,7 @@ load_shobj (const char *name) size_t size = debuglink_entry->sh_size; char *debuginfo_fname = (char *) alloca (size + 1); debuginfo_fname[size] = '\0'; + PCHECK (size, debuglink_entry->sh_offset); if (pread (fd, debuginfo_fname, size, debuglink_entry->sh_offset) != size) { @@ -641,21 +669,32 @@ load_shobj (const char *name) if (fd2 != -1) { ElfW(Ehdr) ehdr2; + struct stat st; + + if (fstat (fd2, &st) < 0) + error (EXIT_FAILURE, errno, _("stat(%s) failure"), workbuf); /* Read the ELF header. */ + PCHECK (sizeof (ehdr2), 0); if (pread (fd2, &ehdr2, sizeof (ehdr2), 0) != sizeof (ehdr2)) error (EXIT_FAILURE, errno, _("reading of ELF header failed")); /* Map the section header. */ - size_t size = ehdr2.e_shnum * sizeof (ElfW(Shdr)); + size_t size; + if (INT_MULTIPLY_WRAPV (ehdr2.e_shnum, sizeof (ElfW(Shdr)), &size)) + error (EXIT_FAILURE, errno, _("too many section headers")); + ElfW(Shdr) *shdr2 = (ElfW(Shdr) *) alloca (size); + PCHECK (size, ehdr2.e_shoff); if (pread (fd2, shdr2, size, ehdr2.e_shoff) != size) error (EXIT_FAILURE, errno, _("reading of section headers failed")); /* Get the section header string table. */ shstrtab = (char *) alloca (shdr2[ehdr2.e_shstrndx].sh_size); + PCHECK (shdr2[ehdr2.e_shstrndx].sh_size, + shdr2[ehdr2.e_shstrndx].sh_offset); if (pread (fd2, shstrtab, shdr2[ehdr2.e_shstrndx].sh_size, shdr2[ehdr2.e_shstrndx].sh_offset) != shdr2[ehdr2.e_shstrndx].sh_size) diff --git a/elf/tst-audit-tlsdesc-dlopen2.c b/elf/tst-audit-tlsdesc-dlopen2.c new file mode 100644 index 0000000000..7ba2c4129a --- /dev/null +++ b/elf/tst-audit-tlsdesc-dlopen2.c @@ -0,0 +1,46 @@ +/* Loading TLS-using modules from auditors (bug 32412). Main program. + Copyright (C) 2021-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +static int +do_test (void) +{ + puts ("info: start of main program"); + + /* Load TLS-using modules, to trigger DTV resizing. The dynamic + linker will load them again (requiring their own TLS) because the + dlopen calls from the auditor were in the auditing namespace. */ + for (int i = 1; i <= 19; ++i) + { + char dso[30]; + snprintf (dso, sizeof (dso), "tst-tlsmod17a%d.so", i); + char sym[30]; + snprintf (sym, sizeof(sym), "tlsmod17a%d", i); + + void *handle = xdlopen (dso, RTLD_LAZY); + int (*func) (void) = xdlsym (handle, sym); + /* Trigger TLS allocation. */ + func (); + } + + return 0; +} + +#include diff --git a/elf/tst-audit23.c b/elf/tst-audit23.c index d2640fe8b2..7786a74e64 100644 --- a/elf/tst-audit23.c +++ b/elf/tst-audit23.c @@ -17,6 +17,7 @@ . */ #include +#include #include #include #include @@ -30,16 +31,21 @@ #include #include #include +#include static int restart; +static int do_dlclose; #define CMDLINE_OPTIONS \ - { "restart", no_argument, &restart, 1 }, + { "restart", no_argument, &restart, 1 }, \ + { "dlclose", no_argument, &do_dlclose, 1 }, \ static int handle_restart (void) { xdlopen ("tst-audit23mod.so", RTLD_NOW); - xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); + void *handle = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); + if (do_dlclose) + xdlclose (handle); return 0; } @@ -59,8 +65,8 @@ is_vdso (const char *str) || startswith (str, "linux-vdso"); } -static int -do_test (int argc, char *argv[]) +static void +do_one_test (int argc, char *argv[], bool pass_dlclose_flag) { /* We must have either: - One or four parameters left if called initially: @@ -68,16 +74,15 @@ do_test (int argc, char *argv[]) + "--library-path" optional + the library path optional + the application name */ - if (restart) - return handle_restart (); - - char *spargv[9]; + char *spargv[10]; TEST_VERIFY_EXIT (((argc - 1) + 3) < array_length (spargv)); int i = 0; for (; i < argc - 1; i++) spargv[i] = argv[i + 1]; spargv[i++] = (char *) "--direct"; spargv[i++] = (char *) "--restart"; + if (pass_dlclose_flag) + spargv[i++] = (char *) "--dlclose"; spargv[i] = NULL; setenv ("LD_AUDIT", "tst-auditmod23.so", 0); @@ -85,14 +90,30 @@ do_test (int argc, char *argv[]) = support_capture_subprogram (spargv[0], spargv, NULL); support_capture_subprocess_check (&result, "tst-audit22", 0, sc_allow_stderr); + { + FILE *fp = fmemopen (result.err.buffer, result.err.length, "r"); + TEST_VERIFY (fp != NULL); + unsigned int line = 0; + char *buffer = NULL; + size_t buffer_length = 0; + puts ("info: *** audit log start ***"); + while (xgetline (&buffer, &buffer_length, fp)) + printf ("%6u\t%s", ++line, buffer); + puts ("info: *** audit log end ***"); + free (buffer); + xfclose (fp); + } + /* The expected la_objopen/la_objclose: 1. executable 2. loader 3. libc.so - 4. tst-audit23mod.so - 5. libc.so (LM_ID_NEWLM). - 6. vdso (optional and ignored). */ - enum { max_objs = 6 }; + 4. libgcc_s.so (one some architectures, for libsupport) + 5. tst-audit23mod.so + 6. libc.so (LM_ID_NEWLM). + 7. loader (proxy link map in new namespace) + vdso (optional and ignored). */ + enum { max_objs = 7 }; struct la_obj_t { char *lname; @@ -115,8 +136,10 @@ do_test (int argc, char *argv[]) TEST_VERIFY (out != NULL); char *buffer = NULL; size_t buffer_length = 0; + unsigned int line = 0; while (xgetline (&buffer, &buffer_length, out)) { + ++line; if (startswith (buffer, "la_activity: ")) { uintptr_t cookie; @@ -127,8 +150,14 @@ do_test (int argc, char *argv[]) /* The cookie identifies the object at the head of the link map, so we only add a new namespace if it changes from the previous - one. This works since dlmopen is the last in the test body. */ - if (cookie != last_act_cookie && last_act_cookie != -1) + one. This works since dlmopen is the last in the test body. + + Currently, this does not work as expected because there + is no head link map if a namespace is completely deleted. + No LA_ACT_CONSISTENT event is generated in that case. + See the comment in _dl_audit_activity_nsid and bug 32068. */ + if (cookie != last_act_cookie && last_act_cookie != -1 + && !pass_dlclose_flag) TEST_COMPARE (last_act, LA_ACT_CONSISTENT); if (this_act == LA_ACT_ADD && acts[nacts] != cookie) @@ -174,8 +203,8 @@ do_test (int argc, char *argv[]) if (is_vdso (lname)) continue; if (nobjs == max_objs) - FAIL_EXIT1 ("non expected la_objopen: %s %"PRIxPTR" %ld", - lname, laddr, lmid); + FAIL_EXIT1 ("(line %u) non expected la_objopen: %s %"PRIxPTR" %ld", + line, lname, laddr, lmid); objs[nobjs].lname = lname; objs[nobjs].laddr = laddr; objs[nobjs].lmid = lmid; @@ -217,11 +246,26 @@ do_test (int argc, char *argv[]) } } + Lmid_t lmid_other = LM_ID_NEWLM; + unsigned int other_namespace_count = 0; for (size_t i = 0; i < nobjs; i++) { + if (objs[i].lmid != LM_ID_BASE) + { + if (lmid_other == LM_ID_NEWLM) + lmid_other = objs[i].lmid; + TEST_COMPARE (objs[i].lmid, lmid_other); + ++other_namespace_count; + if (!(endswith (objs[i].lname, "/" LIBC_SO) + || endswith (objs[i].lname, "/" LD_SO))) + FAIL ("unexpected object in secondary namespace: %s", + objs[i].lname); + } TEST_COMPARE (objs[i].closed, true); free (objs[i].lname); } + /* Both libc.so and ld.so should be present. */ + TEST_COMPARE (other_namespace_count, 2); /* la_activity(LA_ACT_CONSISTENT) should be the last callback received. Since only one link map may be not-CONSISTENT at a time, this also @@ -231,7 +275,16 @@ do_test (int argc, char *argv[]) free (buffer); xfclose (out); +} + +static int +do_test (int argc, char *argv[]) +{ + if (restart) + return handle_restart (); + do_one_test (argc, argv, false); + do_one_test (argc, argv, true); return 0; } diff --git a/elf/tst-auditmod-tlsdesc2.c b/elf/tst-auditmod-tlsdesc2.c new file mode 100644 index 0000000000..50275cd34d --- /dev/null +++ b/elf/tst-auditmod-tlsdesc2.c @@ -0,0 +1,59 @@ +/* Loading TLS-using modules from auditors (bug 32412). Audit module. + Copyright (C) 2021-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +unsigned int +la_version (unsigned int version) +{ + /* Open some modules, to trigger DTV resizing before the switch to + the main malloc. */ + for (int i = 1; i <= 19; ++i) + { + char dso[30]; + snprintf (dso, sizeof (dso), "tst-tlsmod17a%d.so", i); + char sym[30]; + snprintf (sym, sizeof(sym), "tlsmod17a%d", i); + + void *handle = dlopen (dso, RTLD_LAZY); + if (handle == NULL) + { + printf ("error: dlmopen from auditor: %s\n", dlerror ()); + fflush (stdout); + _exit (1); + } + int (*func) (void) = dlsym (handle, sym); + if (func == NULL) + { + printf ("error: dlsym from auditor: %s\n", dlerror ()); + fflush (stdout); + _exit (1); + } + /* Trigger TLS allocation. */ + func (); + } + + puts ("info: TLS-using modules loaded from auditor"); + fflush (stdout); + + return LAV_CURRENT; +} diff --git a/elf/tst-dlmopen4-nonpic.c b/elf/tst-dlmopen4-nonpic.c new file mode 100644 index 0000000000..ad4e409953 --- /dev/null +++ b/elf/tst-dlmopen4-nonpic.c @@ -0,0 +1,2 @@ +#define BUILD_FOR_NONPIC +#include "tst-dlmopen4.c" diff --git a/elf/tst-dlmopen4-pic.c b/elf/tst-dlmopen4-pic.c new file mode 100644 index 0000000000..919fa85c25 --- /dev/null +++ b/elf/tst-dlmopen4-pic.c @@ -0,0 +1,2 @@ +#define BUILD_FOR_PIC +#include "tst-dlmopen4.c" diff --git a/elf/tst-dlmopen4.c b/elf/tst-dlmopen4.c index b1c5502621..9e053fbc59 100644 --- a/elf/tst-dlmopen4.c +++ b/elf/tst-dlmopen4.c @@ -46,6 +46,15 @@ do_test (void) TEST_COMPARE (debug->base.r_version, 1); TEST_VERIFY_EXIT (debug->r_next == NULL); +#ifdef BUILD_FOR_PIC + /* In a PIC build, using _r_debug directly should give us the same + object. */ + TEST_VERIFY (&_r_debug == &debug->base); +#endif +#ifdef BUILD_FOR_NONPIC + TEST_COMPARE (_r_debug.r_version, 1); +#endif + void *h = xdlmopen (LM_ID_NEWLM, "$ORIGIN/tst-dlmopen1mod.so", RTLD_LAZY); @@ -57,6 +66,19 @@ do_test (void) const char *name = basename (debug->r_next->base.r_map->l_name); TEST_COMPARE_STRING (name, "tst-dlmopen1mod.so"); +#ifdef BUILD_FOR_NONPIC + /* If a copy relocation is used, it must be at version 1. */ + if (&_r_debug != &debug->base) + { + TEST_COMPARE (_r_debug.r_version, 1); + TEST_COMPARE ((uintptr_t) _r_debug.r_map, + (uintptr_t) debug->base.r_map); + TEST_COMPARE (_r_debug.r_brk, debug->base.r_brk); + TEST_COMPARE (_r_debug.r_state, debug->base.r_state); + TEST_COMPARE (_r_debug.r_ldbase, debug->base.r_ldbase); + } +#endif + xdlclose (h); return 0; diff --git a/elf/tst-dlopen-auditdup-auditmod.c b/elf/tst-dlopen-auditdup-auditmod.c new file mode 100644 index 0000000000..270a595ec4 --- /dev/null +++ b/elf/tst-dlopen-auditdup-auditmod.c @@ -0,0 +1,104 @@ +/* Auditor that opens again an object that just has been opened. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +unsigned int +la_version (unsigned int v) +{ + return LAV_CURRENT; +} + +static bool trigger_on_la_activity; + +unsigned int +la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) +{ + printf ("info: la_objopen: \"%s\"\n", map->l_name); + if (strstr (map->l_name, "/tst-dlopen-auditdupmod.so") != NULL) + trigger_on_la_activity = true; + return 0; +} + +void +la_activity (uintptr_t *cookie, unsigned int flag) +{ + static unsigned int calls; + ++calls; + printf ("info: la_activity: call %u (flag %u)\n", calls, flag); + fflush (stdout); + if (trigger_on_la_activity) + { + /* Avoid triggering on the dlmopen call below. */ + static bool recursion; + if (recursion) + return; + recursion = true; + + puts ("info: about to dlmopen tst-dlopen-auditdupmod.so"); + fflush (stdout); + void *handle = dlmopen (LM_ID_BASE, "tst-dlopen-auditdupmod.so", + RTLD_NOW); + if (handle == NULL) + { + printf ("error: dlmopen: %s\n", dlerror ()); + fflush (stdout); + _exit (1); + } + + /* Check that the constructor has not run. Running the + constructor would require constructing its dependencies, but + the constructor call that triggered this auditing activity + has not completed, and constructors among the dependencies + may not be able to deal with that. */ + int *status = dlsym (handle, "auditdupmod_status"); + if (status == NULL) + { + printf ("error: dlsym: %s\n", dlerror ()); + fflush (stdout); + _exit (1); + } + printf ("info: auditdupmod_status == %d\n", *status); + if (*status != 0) + { + puts ("error: auditdupmod_status == 0 expected"); + fflush (stdout); + _exit (1); + } + /* Checked in the destructor and the main program. */ + ++*status; + printf ("info: auditdupmod_status == %d\n", *status); + + /* Check that the module has been relocated. */ + int **status_address = dlsym (handle, "auditdupmod_status_address"); + if (status_address == NULL || *status_address != status) + { + puts ("error: invalid auditdupmod_status address in" + " tst-dlopen-auditdupmod.so"); + fflush (stdout); + _exit (1); + } + + fflush (stdout); + } +} diff --git a/elf/tst-dlopen-auditdup.c b/elf/tst-dlopen-auditdup.c new file mode 100644 index 0000000000..d022c58ae3 --- /dev/null +++ b/elf/tst-dlopen-auditdup.c @@ -0,0 +1,36 @@ +/* Test that recursive dlopen from auditor works (bug 31986). + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +static int +do_test (void) +{ + puts ("info: about to dlopen tst-dlopen-auditdupmod.so"); + fflush (stdout); + void *handle = xdlopen ("tst-dlopen-auditdupmod.so", RTLD_NOW); + int *status = xdlsym (handle, "auditdupmod_status"); + printf ("info: auditdupmod_status == %d (from main)\n", *status); + TEST_COMPARE (*status, 2); + xdlclose (handle); + return 0; +} + +#include diff --git a/elf/tst-dlopen-auditdupmod.c b/elf/tst-dlopen-auditdupmod.c new file mode 100644 index 0000000000..59b7e21daa --- /dev/null +++ b/elf/tst-dlopen-auditdupmod.c @@ -0,0 +1,48 @@ +/* Directly opened test module that gets reopened from the auditor. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +int auditdupmod_status; + +/* Used to check for successful relocation processing. */ +int *auditdupmod_status_address = &auditdupmod_status; + +static void __attribute__ ((constructor)) +init (void) +{ + ++auditdupmod_status; + printf ("info: tst-dlopen-auditdupmod.so constructor called (status %d)\n", + auditdupmod_status); +} + +static void __attribute__ ((destructor)) +fini (void) +{ + /* The tst-dlopen-auditdup-auditmod.so auditor incremented + auditdupmod_status. */ + printf ("info: tst-dlopen-auditdupmod.so destructor called (status %d)\n", + auditdupmod_status); + if (auditdupmod_status != 2) + { + puts ("error: auditdupmod_status == 2 expected"); + exit (1); + } +} diff --git a/elf/tst-dlopen-constructor-null-mod1.c b/elf/tst-dlopen-constructor-null-mod1.c new file mode 100644 index 0000000000..70a7a0ad46 --- /dev/null +++ b/elf/tst-dlopen-constructor-null-mod1.c @@ -0,0 +1,55 @@ +/* Module calling dlopen (NULL, RTLD_LAZY) to obtain the global scope. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +int mod1_status; + +static void __attribute__ ((constructor)) +init (void) +{ + puts ("info: tst-dlopen-constructor-null-mod1.so constructor"); + + void *handle = dlopen (NULL, RTLD_LAZY); + if (handle == NULL) + { + printf ("error: %s\n", dlerror ()); + exit (1); + } + puts ("info: dlopen returned"); + if (dlsym (handle, "malloc") != malloc) + { + puts ("error: dlsym did not produce expected result"); + exit (1); + } + dlclose (handle); + + /* Check that the second module's constructor has not executed. */ + if (getenv ("mod2_status") != NULL) + { + printf ("error: mod2_status environment variable set: %s\n", + getenv ("mod2_status")); + exit (1); + } + + /* Communicate to the second module that the constructor executed. */ + mod1_status = 1; +} diff --git a/elf/tst-dlopen-constructor-null-mod2.c b/elf/tst-dlopen-constructor-null-mod2.c new file mode 100644 index 0000000000..d6e945beae --- /dev/null +++ b/elf/tst-dlopen-constructor-null-mod2.c @@ -0,0 +1,37 @@ +/* Module whose constructor should not be invoked by dlopen (NULL, RTLD_LAZY). + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +extern int mod1_status; +int mod2_status; + +static void __attribute__ ((constructor)) +init (void) +{ + printf ("info: tst-dlopen-constructor-null-mod2.so constructor" + " (mod1_status=%d)", mod1_status); + if (!(mod1_status == 1 && mod2_status == 0)) + { + puts ("error: mod1_status == 1 && mod2_status == 0 expected"); + exit (1); + } + setenv ("mod2_status", "constructed", 1); + mod2_status = 1; +} diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/elf/tst-dlopen-constructor-null.c similarity index 55% rename from sysdeps/x86_64/dl-trampoline-save.h rename to elf/tst-dlopen-constructor-null.c index 84eac4a8ac..db90643325 100644 --- a/sysdeps/x86_64/dl-trampoline-save.h +++ b/elf/tst-dlopen-constructor-null.c @@ -1,4 +1,4 @@ -/* x86-64 PLT trampoline register save macros. +/* Verify that dlopen (NULL, RTLD_LAZY) does not complete initialization. Copyright (C) 2024 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,19 +16,23 @@ License along with the GNU C Library; if not, see . */ -#ifndef DL_STACK_ALIGNMENT -/* Due to GCC bug: +/* This test mimics what the glvndSetupPthreads function in libglvnd + does. */ - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 +#include +#include - __tls_get_addr may be called with 8-byte stack alignment. Although - this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume - that stack will be always aligned at 16 bytes. */ -# define DL_STACK_ALIGNMENT 8 -#endif +/* Defined and initialized in the shared objects. */ +extern int mod1_status; +extern int mod2_status; -/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align - stack to 16 bytes before calling _dl_fixup. */ -#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ - (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ - || 16 > DL_STACK_ALIGNMENT) +static int +do_test (void) +{ + TEST_COMPARE (mod1_status, 1); + TEST_COMPARE (mod2_status, 1); + TEST_COMPARE_STRING (getenv ("mod2_status"), "constructed"); + return 0; +} + +#include diff --git a/elf/tst-dlopen-sgid-mod.c b/elf/tst-dlopen-sgid-mod.c new file mode 100644 index 0000000000..5eb79eef48 --- /dev/null +++ b/elf/tst-dlopen-sgid-mod.c @@ -0,0 +1 @@ +/* Opening this object should not succeed. */ diff --git a/elf/tst-dlopen-sgid.c b/elf/tst-dlopen-sgid.c new file mode 100644 index 0000000000..8aec52e19f --- /dev/null +++ b/elf/tst-dlopen-sgid.c @@ -0,0 +1,106 @@ +/* Test case for ignored LD_LIBRARY_PATH in static startug (bug 32976). + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This is the name of our test object. Use a custom module for + testing, so that this object does not get picked up from the system + path. */ +static const char dso_name[] = "tst-dlopen-sgid-mod.so"; + +/* Used to mark the recursive invocation. */ +static const char magic_argument[] = "run-actual-test"; + +static int +do_test (void) +{ +/* Pathname of the directory that receives the shared objects this + test attempts to load. */ + char *libdir = support_create_temp_directory ("tst-dlopen-sgid-"); + + /* This is supposed to be ignored and stripped. */ + TEST_COMPARE (setenv ("LD_LIBRARY_PATH", libdir, 1), 0); + + /* Copy of libc.so.6. */ + { + char *from = xasprintf ("%s/%s", support_objdir_root, LIBC_SO); + char *to = xasprintf ("%s/%s", libdir, LIBC_SO); + add_temp_file (to); + support_copy_file (from, to); + free (to); + free (from); + } + + /* Copy of the test object. */ + { + char *from = xasprintf ("%s/elf/%s", support_objdir_root, dso_name); + char *to = xasprintf ("%s/%s", libdir, dso_name); + add_temp_file (to); + support_copy_file (from, to); + free (to); + free (from); + } + + free (libdir); + + support_capture_subprogram_self_sgid (magic_argument); + + return 0; +} + +static void +alternative_main (int argc, char **argv) +{ + if (argc == 2 && strcmp (argv[1], magic_argument) == 0) + { + if (getgid () == getegid ()) + /* This can happen if the file system is mounted nosuid. */ + FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", + (intmax_t) getgid ()); + + /* Should be removed due to SGID. */ + TEST_COMPARE_STRING (getenv ("LD_LIBRARY_PATH"), NULL); + + TEST_VERIFY (dlopen (dso_name, RTLD_NOW) == NULL); + { + const char *message = dlerror (); + TEST_COMPARE_STRING (message, + "tst-dlopen-sgid-mod.so:" + " cannot open shared object file:" + " No such file or directory"); + } + + support_record_failure_barrier (); + exit (EXIT_SUCCESS); + } +} + +#define PREPARE alternative_main +#include diff --git a/elf/tst-dlopen-tlsreinit1.c b/elf/tst-dlopen-tlsreinit1.c new file mode 100644 index 0000000000..2016b9b0c6 --- /dev/null +++ b/elf/tst-dlopen-tlsreinit1.c @@ -0,0 +1,40 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717). + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + void *handle = xdlopen ("tst-dlopen-tlsreinitmod1.so", RTLD_NOW); + + bool *tlsreinitmod3_tested = xdlsym (handle, "tlsreinitmod3_tested"); + TEST_VERIFY (*tlsreinitmod3_tested); + + xdlclose (handle); + + /* This crashes if the libc.so.6 TLS image has been reverted. */ + TEST_VERIFY (!isupper ('@')); + + return 0; +} + +#include diff --git a/elf/tst-dlopen-tlsreinit2.c b/elf/tst-dlopen-tlsreinit2.c new file mode 100644 index 0000000000..90ad2c7713 --- /dev/null +++ b/elf/tst-dlopen-tlsreinit2.c @@ -0,0 +1,39 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717). + Variant with initially-linked modules. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + + +static int +do_test (void) +{ + /* Defined in tst-dlopen-tlsreinitmod3.so. */ + extern bool tlsreinitmod3_tested; + TEST_VERIFY (tlsreinitmod3_tested); + + /* This crashes if the libc.so.6 TLS image has been reverted. */ + TEST_VERIFY (!isupper ('@')); + + return 0; +} + +#include diff --git a/elf/tst-dlopen-tlsreinit3.c b/elf/tst-dlopen-tlsreinit3.c new file mode 100644 index 0000000000..79bd585aff --- /dev/null +++ b/elf/tst-dlopen-tlsreinit3.c @@ -0,0 +1,2 @@ +/* Same code, but run with LD_AUDIT=tst-auditmod1.so. */ +#include "tst-dlopen-tlsreinit1.c" diff --git a/elf/tst-dlopen-tlsreinit4.c b/elf/tst-dlopen-tlsreinit4.c new file mode 100644 index 0000000000..344c9211ab --- /dev/null +++ b/elf/tst-dlopen-tlsreinit4.c @@ -0,0 +1,2 @@ +/* Same code, but run with LD_AUDIT=tst-auditmod1.so. */ +#include "tst-dlopen-tlsreinit2.c" diff --git a/elf/tst-dlopen-tlsreinitmod1.c b/elf/tst-dlopen-tlsreinitmod1.c new file mode 100644 index 0000000000..354cc3de51 --- /dev/null +++ b/elf/tst-dlopen-tlsreinitmod1.c @@ -0,0 +1,20 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717), module 1. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This module triggers loading of tst-dlopen-tlsreinitmod2.so and + tst-dlopen-tlsreinitmod3.so. */ diff --git a/elf/tst-dlopen-tlsreinitmod2.c b/elf/tst-dlopen-tlsreinitmod2.c new file mode 100644 index 0000000000..677e69bd35 --- /dev/null +++ b/elf/tst-dlopen-tlsreinitmod2.c @@ -0,0 +1,30 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717), module 2. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* Defined in tst-dlopen-tlsreinitmod3.so. This an underlinked symbol + dependency. */ +extern void call_tlsreinitmod3 (void); + +static void __attribute__ ((constructor)) +tlsreinitmod2_init (void) +{ + puts ("info: constructor of tst-dlopen-tlsreinitmod2.so invoked"); + call_tlsreinitmod3 (); +} diff --git a/elf/tst-dlopen-tlsreinitmod3.c b/elf/tst-dlopen-tlsreinitmod3.c new file mode 100644 index 0000000000..ef769c5131 --- /dev/null +++ b/elf/tst-dlopen-tlsreinitmod3.c @@ -0,0 +1,102 @@ +/* Test that dlopen preserves already accessed TLS (bug 31717), module 3. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* Used to verify from the main program that the test ran. */ +bool tlsreinitmod3_tested; + +/* This TLS variable must not revert back to the initial state after + dlopen. */ +static __thread int tlsreinitmod3_state = 1; + +/* Set from the ELF constructor during dlopen. */ +static bool tlsreinitmod3_constructed; + +/* Second half of test, behind a compiler barrier. The compiler + barrier is necessary to prevent carrying over TLS address + information from call_tlsreinitmod3 to call_tlsreinitmod3_tail. */ +void call_tlsreinitmod3_tail (void *self) __attribute__ ((weak)); + +/* Called from tst-dlopen-tlsreinitmod2.so. */ +void +call_tlsreinitmod3 (void) +{ + printf ("info: call_tlsreinitmod3 invoked (state=%d)\n", + tlsreinitmod3_state); + + if (tlsreinitmod3_constructed) + { + puts ("error: call_tlsreinitmod3 called after ELF constructor"); + fflush (stdout); + /* Cannot rely on test harness due to dynamic linking. */ + _exit (1); + } + + tlsreinitmod3_state = 2; + + /* Self-dlopen. This will run the ELF constructor. */ + void *self = dlopen ("tst-dlopen-tlsreinitmod3.so", RTLD_NOW); + if (self == NULL) + { + printf ("error: dlopen: %s\n", dlerror ()); + fflush (stdout); + /* Cannot rely on test harness due to dynamic linking. */ + _exit (1); + } + + call_tlsreinitmod3_tail (self); +} + +void +call_tlsreinitmod3_tail (void *self) +{ + printf ("info: dlopen returned in tlsreinitmod3 (state=%d)\n", + tlsreinitmod3_state); + + if (!tlsreinitmod3_constructed) + { + puts ("error: dlopen did not call tlsreinitmod3 ELF constructor"); + fflush (stdout); + /* Cannot rely on test harness due to dynamic linking. */ + _exit (1); + } + + if (tlsreinitmod3_state != 2) + { + puts ("error: TLS state reverted in tlsreinitmod3"); + fflush (stdout); + /* Cannot rely on test harness due to dynamic linking. */ + _exit (1); + } + + dlclose (self); + + /* Signal test completion to the main program. */ + tlsreinitmod3_tested = true; +} + +static void __attribute__ ((constructor)) +tlsreinitmod3_init (void) +{ + puts ("info: constructor of tst-dlopen-tlsreinitmod3.so invoked"); + tlsreinitmod3_constructed = true; +} diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index a47219047f..233eec7631 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -105,10 +105,7 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - - /* Special return code to make sure that the child executed all the way - through. */ - exit (42); + return 0; } else { @@ -127,18 +124,7 @@ do_test (int argc, char **argv) continue; } - int status = support_capture_subprogram_self_sgid (buf); - - /* Bail out early if unsupported. */ - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - return EXIT_UNSUPPORTED; - - if (WEXITSTATUS (status) != 42) - { - printf (" [%d] child failed with status %d\n", i, - WEXITSTATUS (status)); - support_record_failure (); - } + support_capture_subprogram_self_sgid (buf); } return 0; } diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c index 59f2ffeb88..ee3f058468 100644 --- a/elf/tst-env-setuid.c +++ b/elf/tst-env-setuid.c @@ -147,10 +147,7 @@ do_test (int argc, char **argv) if (ret != 0) exit (1); - - /* Special return code to make sure that the child executed all the way - through. */ - exit (42); + return 0; } else { @@ -174,17 +171,7 @@ do_test (int argc, char **argv) free (profilepath); } - int status = support_capture_subprogram_self_sgid (SETGID_CHILD); - - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - exit (EXIT_UNSUPPORTED); - - if (WEXITSTATUS (status) != 42) - { - printf (" child failed with status %d\n", - WEXITSTATUS (status)); - support_record_failure (); - } + support_capture_subprogram_self_sgid (SETGID_CHILD); return 0; } diff --git a/elf/tst-link-map-contiguous-ldso.c b/elf/tst-link-map-contiguous-ldso.c new file mode 100644 index 0000000000..04de808bb2 --- /dev/null +++ b/elf/tst-link-map-contiguous-ldso.c @@ -0,0 +1,98 @@ +/* Check that _dl_find_object behavior matches up with gaps. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + struct link_map *l = xdlopen (LD_SO, RTLD_NOW); + if (!l->l_contiguous) + { + puts ("info: ld.so link map is not contiguous"); + + /* Try to find holes by probing with mmap. */ + int pagesize = getpagesize (); + bool gap_found = false; + ElfW(Addr) addr = l->l_map_start; + TEST_COMPARE (addr % pagesize, 0); + while (addr < l->l_map_end) + { + void *expected = (void *) addr; + void *ptr = xmmap (expected, 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + struct dl_find_object dlfo; + int dlfo_ret = _dl_find_object (expected, &dlfo); + if (ptr == expected) + { + if (dlfo_ret < 0) + { + TEST_COMPARE (dlfo_ret, -1); + printf ("info: hole without mapping data found at %p\n", ptr); + } + else + FAIL ("object \"%s\" found in gap at %p", + dlfo.dlfo_link_map->l_name, ptr); + gap_found = true; + } + else if (dlfo_ret == 0) + { + if ((void *) dlfo.dlfo_link_map != (void *) l) + { + printf ("info: object \"%s\" found at %p\n", + dlfo.dlfo_link_map->l_name, ptr); + gap_found = true; + } + } + else + TEST_COMPARE (dlfo_ret, -1); + xmunmap (ptr, 1); + addr += pagesize; + } + if (!gap_found) + FAIL ("no ld.so gap found"); + } + else + { + puts ("info: ld.so link map is contiguous"); + + /* Assert that ld.so is truly contiguous in memory. */ + volatile long int *p = (volatile long int *) l->l_map_start; + volatile long int *end = (volatile long int *) l->l_map_end; + while (p < end) + { + *p; + ++p; + } + } + + xdlclose (l); + + return 0; +} + +#include diff --git a/elf/tst-link-map-contiguous-libc.c b/elf/tst-link-map-contiguous-libc.c new file mode 100644 index 0000000000..eb5728c765 --- /dev/null +++ b/elf/tst-link-map-contiguous-libc.c @@ -0,0 +1,57 @@ +/* Check that the entire libc.so program image is readable if contiguous. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + struct link_map *l = xdlopen (LIBC_SO, RTLD_NOW); + + /* The dynamic loader fills holes with PROT_NONE mappings. */ + if (!l->l_contiguous) + FAIL_EXIT1 ("libc.so link map is not contiguous"); + + /* Direct probing does not work because not everything is readable + due to PROT_NONE mappings. */ + int pagesize = getpagesize (); + ElfW(Addr) addr = l->l_map_start; + TEST_COMPARE (addr % pagesize, 0); + while (addr < l->l_map_end) + { + void *expected = (void *) addr; + void *ptr = xmmap (expected, 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + if (ptr == expected) + FAIL ("hole in libc.so memory image after %lu bytes", + (unsigned long int) (addr - l->l_map_start)); + xmunmap (ptr, 1); + addr += pagesize; + } + + xdlclose (l); + + return 0; +} +#include diff --git a/elf/tst-link-map-contiguous-main.c b/elf/tst-link-map-contiguous-main.c new file mode 100644 index 0000000000..2d1a054f0f --- /dev/null +++ b/elf/tst-link-map-contiguous-main.c @@ -0,0 +1,45 @@ +/* Check that the entire main program image is readable if contiguous. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +static int +do_test (void) +{ + struct link_map *l = xdlopen ("", RTLD_NOW); + if (!l->l_contiguous) + FAIL_UNSUPPORTED ("main link map is not contiguous"); + + /* This check only works if the kernel loaded the main program. The + dynamic loader replaces gaps with PROT_NONE mappings, resulting + in faults. */ + volatile long int *p = (volatile long int *) l->l_map_start; + volatile long int *end = (volatile long int *) l->l_map_end; + while (p < end) + { + *p; + ++p; + } + + xdlclose (l); + + return 0; +} +#include diff --git a/elf/tst-rtld-no-malloc-audit.c b/elf/tst-rtld-no-malloc-audit.c new file mode 100644 index 0000000000..a028377ad1 --- /dev/null +++ b/elf/tst-rtld-no-malloc-audit.c @@ -0,0 +1 @@ +#include "tst-rtld-no-malloc.c" diff --git a/elf/tst-rtld-no-malloc-preload.c b/elf/tst-rtld-no-malloc-preload.c new file mode 100644 index 0000000000..a028377ad1 --- /dev/null +++ b/elf/tst-rtld-no-malloc-preload.c @@ -0,0 +1 @@ +#include "tst-rtld-no-malloc.c" diff --git a/elf/tst-rtld-no-malloc.c b/elf/tst-rtld-no-malloc.c new file mode 100644 index 0000000000..5f24d4bd72 --- /dev/null +++ b/elf/tst-rtld-no-malloc.c @@ -0,0 +1,76 @@ +/* Test that program loading does not call malloc. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#include +#include + +static void +print (const char *s) +{ + const char *end = s + strlen (s); + while (s < end) + { + ssize_t ret = write (STDOUT_FILENO, s, end - s); + if (ret <= 0) + _exit (2); + s += ret; + } +} + +static void __attribute__ ((noreturn)) +unexpected_call (const char *function) +{ + print ("error: unexpected call to "); + print (function); + print ("\n"); + _exit (1); +} + +/* These are the malloc functions implement in elf/dl-minimal.c. */ + +void +free (void *ignored) +{ + unexpected_call ("free"); +} + +void * +calloc (size_t ignored1, size_t ignored2) +{ + unexpected_call ("calloc"); +} + +void * +malloc (size_t ignored) +{ + unexpected_call ("malloc"); +} + +void * +realloc (void *ignored1, size_t ignored2) +{ + unexpected_call ("realloc"); +} + +int +main (void) +{ + /* Do not use the test wrapper, to avoid spurious malloc calls from it. */ + return 0; +} diff --git a/elf/tst-tls23-mod.c b/elf/tst-tls23-mod.c new file mode 100644 index 0000000000..3ee4c70e40 --- /dev/null +++ b/elf/tst-tls23-mod.c @@ -0,0 +1,32 @@ +/* DSO used by tst-tls23. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +__thread struct tls tls_var0 __attribute__ ((visibility ("hidden"))); + +struct tls * +apply_tls (struct tls *p) +{ + INIT_TLS_CALL (); + BEFORE_TLS_CALL (); + tls_var0 = *p; + struct tls *ret = &tls_var0; + AFTER_TLS_CALL (); + return ret; +} diff --git a/elf/tst-tls23.c b/elf/tst-tls23.c new file mode 100644 index 0000000000..afe594c067 --- /dev/null +++ b/elf/tst-tls23.c @@ -0,0 +1,106 @@ +/* Test that __tls_get_addr preserves caller-saved registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef IS_SUPPORTED +# define IS_SUPPORTED() true +#endif + +/* An architecture can define it to clobber caller-saved registers in + malloc below to verify that __tls_get_addr won't change caller-saved + registers. */ +#ifndef PREPARE_MALLOC +# define PREPARE_MALLOC() +#endif + +extern void * __libc_malloc (size_t); + +size_t malloc_counter = 0; + +void * +malloc (size_t n) +{ + PREPARE_MALLOC (); + malloc_counter++; + return __libc_malloc (n); +} + +static void *mod; +static const char *modname = "tst-tls23-mod.so"; + +static void +open_mod (void) +{ + mod = xdlopen (modname, RTLD_LAZY); + printf ("open %s\n", modname); +} + +static void +close_mod (void) +{ + xdlclose (mod); + mod = NULL; + printf ("close %s\n", modname); +} + +static void +access_mod (const char *sym) +{ + struct tls var = { -4, -4, -4, -4 }; + struct tls *(*f) (struct tls *) = xdlsym (mod, sym); + /* Check that our malloc is called. */ + malloc_counter = 0; + struct tls *p = f (&var); + TEST_VERIFY (malloc_counter != 0); + printf ("access %s: %s() = %p\n", modname, sym, p); + TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0); + ++(p->a); +} + +static void * +start (void *arg) +{ + access_mod ("apply_tls"); + return arg; +} + +static int +do_test (void) +{ + if (!IS_SUPPORTED ()) + return EXIT_UNSUPPORTED; + + open_mod (); + pthread_t t = xpthread_create (NULL, start, NULL); + xpthread_join (t); + close_mod (); + + return 0; +} + +#include diff --git a/elf/tst-tls23.h b/elf/tst-tls23.h new file mode 100644 index 0000000000..d0e734569c --- /dev/null +++ b/elf/tst-tls23.h @@ -0,0 +1,40 @@ +/* Test that __tls_get_addr preserves caller-saved registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +struct tls +{ + int64_t a, b, c, d; +}; + +extern struct tls *apply_tls (struct tls *); + +/* An architecture can define them to verify that caller-saved registers + aren't changed by __tls_get_addr. */ +#ifndef INIT_TLS_CALL +# define INIT_TLS_CALL() +#endif + +#ifndef BEFORE_TLS_CALL +# define BEFORE_TLS_CALL() +#endif + +#ifndef AFTER_TLS_CALL +# define AFTER_TLS_CALL() +#endif diff --git a/elf/tst-version-hash-zero-linkmod.c b/elf/tst-version-hash-zero-linkmod.c new file mode 100644 index 0000000000..15e2506d01 --- /dev/null +++ b/elf/tst-version-hash-zero-linkmod.c @@ -0,0 +1,22 @@ +/* Stub module for linking tst-version-hash-zero-refmod.so. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + +/* The version script assigns a different symbol version for the stub + module. Loading the module with the incorrect version is expected + to fail. */ +#include "tst-version-hash-zero-mod.c" diff --git a/elf/tst-version-hash-zero-linkmod.map b/elf/tst-version-hash-zero-linkmod.map new file mode 100644 index 0000000000..2dba7c22d7 --- /dev/null +++ b/elf/tst-version-hash-zero-linkmod.map @@ -0,0 +1,7 @@ +Base { + local: *; +}; + +OTHER_VERSION { + global: global_variable; +} Base; diff --git a/elf/tst-version-hash-zero-mod.c b/elf/tst-version-hash-zero-mod.c new file mode 100644 index 0000000000..ac6b0dc4a5 --- /dev/null +++ b/elf/tst-version-hash-zero-mod.c @@ -0,0 +1,20 @@ +/* Test module with a zero version symbol hash. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + +/* The symbol version is assigned by version script. */ +int global_variable; diff --git a/elf/tst-version-hash-zero-mod.map b/elf/tst-version-hash-zero-mod.map new file mode 100644 index 0000000000..41eaff7914 --- /dev/null +++ b/elf/tst-version-hash-zero-mod.map @@ -0,0 +1,13 @@ +Base { + local: *; +}; + +/* Define the version so that tst-version-hash-zero-refmod.so passes + the initial symbol version check. */ +OTHER_VERSION { +} Base; + +/* This version string hashes to zero. */ +PPPPPPPPPPPP { + global: global_variable; +} Base; diff --git a/elf/tst-version-hash-zero-refmod.c b/elf/tst-version-hash-zero-refmod.c new file mode 100644 index 0000000000..cd8b3dcef5 --- /dev/null +++ b/elf/tst-version-hash-zero-refmod.c @@ -0,0 +1,23 @@ +/* Test module that triggers a relocation failure in tst-version-hash-zero. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + +/* This is bound to global_variable@@OTHER_VERSION via + tst-version-hash-zero-linkmod.so, but at run time, only + global_variable@PPPPPPPPPPPP exists. */ +extern int global_variable; +int *pointer_variable = &global_variable; diff --git a/elf/tst-version-hash-zero.c b/elf/tst-version-hash-zero.c new file mode 100644 index 0000000000..66a0db4f51 --- /dev/null +++ b/elf/tst-version-hash-zero.c @@ -0,0 +1,56 @@ +/* Symbols with version hash zero should not match any version (bug 29190). + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + void *handle = xdlopen ("tst-version-hash-zero-mod.so", RTLD_NOW); + + /* This used to crash because some struct r_found_version entries + with hash zero did not have valid version strings. */ + TEST_VERIFY (xdlvsym (handle, "global_variable", "PPPPPPPPPPPP") != NULL); + + /* Consistency check. */ + TEST_VERIFY (xdlsym (handle, "global_variable") + == xdlvsym (handle, "global_variable", "PPPPPPPPPPPP")); + + /* This symbol version is supposed to be missing. */ + TEST_VERIFY (dlvsym (handle, "global_variable", "OTHER_VERSION") == NULL); + + /* tst-version-hash-zero-refmod.so references + global_variable@@OTHER_VERSION and is expected to fail to load. + dlvsym sets the hidden flag during lookup. Relocation does not, + so this exercises a different failure case. */ + TEST_VERIFY_EXIT (dlopen ("tst-version-hash-zero-refmod.so", RTLD_NOW) + == NULL); + const char *message = dlerror (); + if (strstr (message, + ": undefined symbol: global_variable, version OTHER_VERSION") + == NULL) + FAIL_EXIT1 ("unexpected dlopen failure: %s", message); + + xdlclose (handle); + return 0; +} + +#include diff --git a/include/ctype.h b/include/ctype.h index 493a6f80ce..a15e5b6678 100644 --- a/include/ctype.h +++ b/include/ctype.h @@ -24,33 +24,35 @@ libc_hidden_proto (toupper) NL_CURRENT_INDIRECT. */ # include "../locale/localeinfo.h" -# include # ifndef CTYPE_EXTERN_INLINE /* Used by ctype/ctype-info.c, which see. */ # define CTYPE_EXTERN_INLINE extern inline # endif -__libc_tsd_define (extern, const uint16_t *, CTYPE_B) -__libc_tsd_define (extern, const int32_t *, CTYPE_TOUPPER) -__libc_tsd_define (extern, const int32_t *, CTYPE_TOLOWER) +extern __thread const uint16_t * __libc_tsd_CTYPE_B + attribute_hidden attribute_tls_model_ie; +extern __thread const int32_t * __libc_tsd_CTYPE_TOUPPER + attribute_hidden attribute_tls_model_ie; +extern __thread const int32_t * __libc_tsd_CTYPE_TOLOWER + attribute_hidden attribute_tls_model_ie; CTYPE_EXTERN_INLINE const uint16_t ** __attribute__ ((const)) __ctype_b_loc (void) { - return __libc_tsd_address (const uint16_t *, CTYPE_B); + return &__libc_tsd_CTYPE_B; } CTYPE_EXTERN_INLINE const int32_t ** __attribute__ ((const)) __ctype_toupper_loc (void) { - return __libc_tsd_address (const int32_t *, CTYPE_TOUPPER); + return &__libc_tsd_CTYPE_TOUPPER; } CTYPE_EXTERN_INLINE const int32_t ** __attribute__ ((const)) __ctype_tolower_loc (void) { - return __libc_tsd_address (const int32_t *, CTYPE_TOLOWER); + return &__libc_tsd_CTYPE_TOLOWER; } # ifndef __NO_CTYPE @@ -64,6 +66,11 @@ __ctype_tolower_loc (void) # define __isdigit_l(c, l) ({ int __c = (c); __c >= '0' && __c <= '9'; }) # endif /* Not __NO_CTYPE. */ +/* For use in initializers. */ +extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; +extern const uint32_t _nl_C_LC_CTYPE_toupper[] attribute_hidden; +extern const uint32_t _nl_C_LC_CTYPE_tolower[] attribute_hidden; + # endif /* IS_IN (libc). */ #endif /* Not _ISOMAC. */ diff --git a/include/dlfcn.h b/include/dlfcn.h index f49ee1b0c9..a44420fa37 100644 --- a/include/dlfcn.h +++ b/include/dlfcn.h @@ -4,8 +4,7 @@ #include /* For ElfW. */ #include -extern __typeof (_dl_find_object) __dl_find_object; -hidden_proto (__dl_find_object) +rtld_hidden_proto (_dl_find_object) /* Internally used flag. */ #define __RTLD_DLOPEN 0x80000000 diff --git a/include/libc-internal.h b/include/libc-internal.h index 87ac591835..1ef43ffe67 100644 --- a/include/libc-internal.h +++ b/include/libc-internal.h @@ -53,6 +53,9 @@ extern __typeof (__profile_frequency) __profile_frequency attribute_hidden; is not for an audit module, not loaded via dlmopen, and not loaded via static dlopen either). */ extern _Bool __libc_initial attribute_hidden; +#else +/* The static libc is always the initial namespace. */ +# define __libc_initial ((_Bool) 1) #endif #endif /* _LIBC_INTERNAL */ diff --git a/include/link.h b/include/link.h index cb0d7d8e2f..7ca305f780 100644 --- a/include/link.h +++ b/include/link.h @@ -212,6 +212,7 @@ struct link_map unsigned int l_find_object_processed:1; /* Zero if _dl_find_object_update needs to process this lt_library map. */ + unsigned int l_tls_in_slotinfo:1; /* TLS slotinfo updated in dlopen. */ /* NODELETE status of the map. Only valid for maps of type lt_loaded. Lazy binding sets l_nodelete_active directly, @@ -364,6 +365,8 @@ struct auditstate dynamic linker. */ extern struct r_debug_extended _r_debug_extended attribute_hidden; +rtld_hidden_proto (_r_debug) + #if __ELF_NATIVE_CLASS == 32 # define symbind symbind32 # define LA_SYMBIND "la_symbind32" diff --git a/include/math.h b/include/math.h index fa11a710a6..035fd160ff 100644 --- a/include/math.h +++ b/include/math.h @@ -130,7 +130,10 @@ fabsf128 (_Float128 x) } # endif -# if !(defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0) + +/* NB: Internal tests don't have access to internal symbols. */ +# if !IS_IN (testsuite_internal) \ + && !(defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0) # ifndef NO_MATH_REDIRECT /* Declare some functions for use within GLIBC. Compilers typically inline those functions as a single instruction. Use an asm to diff --git a/include/rpc/rpc.h b/include/rpc/rpc.h index f5cee6caef..ba967833ad 100644 --- a/include/rpc/rpc.h +++ b/include/rpc/rpc.h @@ -3,8 +3,6 @@ # ifndef _ISOMAC -#include - /* Now define the internal interfaces. */ extern unsigned long _create_xid (void); @@ -47,7 +45,8 @@ extern void __rpc_thread_key_cleanup (void) attribute_hidden; extern void __rpc_thread_destroy (void) attribute_hidden; -__libc_tsd_define (extern, struct rpc_thread_variables *, RPC_VARS) +extern __thread struct rpc_thread_variables *__libc_tsd_RPC_VARS + attribute_hidden attribute_tls_model_ie; #define RPC_THREAD_VARIABLE(x) (__rpc_thread_variables()->x) diff --git a/libio/Makefile b/libio/Makefile index 6a507b67ea..f2e98f96eb 100644 --- a/libio/Makefile +++ b/libio/Makefile @@ -117,6 +117,7 @@ tests = \ tst-mmap-offend \ tst-mmap-setvbuf \ tst-mmap2-eofsync \ + tst-popen-fork \ tst-popen1 \ tst-setvbuf1 \ tst-sprintf-chk-ub \ @@ -141,6 +142,8 @@ tests = \ tst_wscanf \ # tests +$(objpfx)tst-popen-fork: $(shared-thread-library) + tests-internal = tst-vtables tst-vtables-interposed ifeq (yes,$(build-shared)) @@ -286,11 +289,18 @@ endif ifeq ($(build-shared),yes) aux += oldfileops oldstdfiles tests += \ + tst-fopen-compat \ tst-stderr-compat \ # tests tests-2.0 += \ + tst-fopen-compat \ tst-stderr-compat \ # tests-2.0 + +tst-fopen-compat-ARGS = tst-fopen-compat.c +# Disable PIE to trigger copy relocation. +CFLAGS-tst-fopen-compat.c += -fno-pie +tst-fopen-compat-no-pie = yes endif shared-only-routines = oldiofopen oldiofdopen oldiofclose oldfileops \ diff --git a/libio/bits/stdio2.h b/libio/bits/stdio2.h index 91a80dd7c6..4b8bc35bdf 100644 --- a/libio/bits/stdio2.h +++ b/libio/bits/stdio2.h @@ -308,14 +308,14 @@ fgets (__fortify_clang_overload_arg (char *, __restrict, __s), int __n, "fgets called with bigger size than length of " "destination buffer") { - size_t sz = __glibc_objsize (__s); - if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) + size_t __sz = __glibc_objsize (__s); + if (__glibc_safe_or_unknown_len (__n, sizeof (char), __sz)) return __fgets_alias (__s, __n, __stream); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (char), sz)) - return __fgets_chk_warn (__s, sz, __n, __stream); + if (__glibc_unsafe_len (__n, sizeof (char), __sz)) + return __fgets_chk_warn (__s, __sz, __n, __stream); #endif - return __fgets_chk (__s, sz, __n, __stream); + return __fgets_chk (__s, __sz, __n, __stream); } __fortify_function __wur __nonnull ((4)) __attribute_overloadable__ size_t @@ -326,14 +326,14 @@ fread (__fortify_clang_overload_arg (void *, __restrict, __ptr), "fread called with bigger size * n than length " "of destination buffer") { - size_t sz = __glibc_objsize0 (__ptr); - if (__glibc_safe_or_unknown_len (__n, __size, sz)) + size_t __sz = __glibc_objsize0 (__ptr); + if (__glibc_safe_or_unknown_len (__n, __size, __sz)) return __fread_alias (__ptr, __size, __n, __stream); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, __size, sz)) - return __fread_chk_warn (__ptr, sz, __size, __n, __stream); + if (__glibc_unsafe_len (__n, __size, __sz)) + return __fread_chk_warn (__ptr, __sz, __size, __n, __stream); #endif - return __fread_chk (__ptr, sz, __size, __n, __stream); + return __fread_chk (__ptr, __sz, __size, __n, __stream); } #ifdef __USE_GNU @@ -345,14 +345,14 @@ fgets_unlocked (__fortify_clang_overload_arg (char *, __restrict, __s), "fgets called with bigger size than length of " "destination buffer") { - size_t sz = __glibc_objsize (__s); - if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) + size_t __sz = __glibc_objsize (__s); + if (__glibc_safe_or_unknown_len (__n, sizeof (char), __sz)) return __fgets_unlocked_alias (__s, __n, __stream); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (char), sz)) - return __fgets_unlocked_chk_warn (__s, sz, __n, __stream); + if (__glibc_unsafe_len (__n, sizeof (char), __sz)) + return __fgets_unlocked_chk_warn (__s, __sz, __n, __stream); #endif - return __fgets_unlocked_chk (__s, sz, __n, __stream); + return __fgets_unlocked_chk (__s, __sz, __n, __stream); } #endif @@ -366,8 +366,8 @@ fread_unlocked (__fortify_clang_overload_arg0 (void *, __restrict, __ptr), "fread_unlocked called with bigger size * n than " "length of destination buffer") { - size_t sz = __glibc_objsize0 (__ptr); - if (__glibc_safe_or_unknown_len (__n, __size, sz)) + size_t __sz = __glibc_objsize0 (__ptr); + if (__glibc_safe_or_unknown_len (__n, __size, __sz)) { # ifdef __USE_EXTERN_INLINES if (__builtin_constant_p (__size) @@ -393,10 +393,10 @@ fread_unlocked (__fortify_clang_overload_arg0 (void *, __restrict, __ptr), return __fread_unlocked_alias (__ptr, __size, __n, __stream); } # if !__fortify_use_clang - if (__glibc_unsafe_len (__n, __size, sz)) - return __fread_unlocked_chk_warn (__ptr, sz, __size, __n, __stream); + if (__glibc_unsafe_len (__n, __size, __sz)) + return __fread_unlocked_chk_warn (__ptr, __sz, __size, __n, __stream); # endif - return __fread_unlocked_chk (__ptr, sz, __size, __n, __stream); + return __fread_unlocked_chk (__ptr, __sz, __size, __n, __stream); } #endif diff --git a/libio/genops.c b/libio/genops.c index 99f5e80f20..6f20d49669 100644 --- a/libio/genops.c +++ b/libio/genops.c @@ -662,7 +662,7 @@ _IO_sputbackc (FILE *fp, int c) { int result; - if (fp->_IO_read_ptr > fp->_IO_read_base + if (fp->_IO_read_ptr > fp->_IO_read_base && !_IO_in_backup (fp) && (unsigned char)fp->_IO_read_ptr[-1] == (unsigned char)c) { fp->_IO_read_ptr--; @@ -816,6 +816,12 @@ _IO_unbuffer_all (void) legacy = 1; #endif + /* Free up the backup area if it was ever allocated. */ + if (_IO_have_backup (fp)) + _IO_free_backup_area (fp); + if (!legacy && fp->_mode > 0 && _IO_have_wbackup (fp)) + _IO_free_wbackup_area (fp); + if (! (fp->_flags & _IO_UNBUFFERED) /* Iff stream is un-orientated, it wasn't used. */ && (legacy || fp->_mode != 0)) diff --git a/libio/iopopen.c b/libio/iopopen.c index d01cb0648e..352513a291 100644 --- a/libio/iopopen.c +++ b/libio/iopopen.c @@ -57,6 +57,26 @@ unlock (void *not_used) } #endif +/* These lock/unlock/resetlock functions are used during fork. */ + +void +_IO_proc_file_chain_lock (void) +{ + _IO_lock_lock (proc_file_chain_lock); +} + +void +_IO_proc_file_chain_unlock (void) +{ + _IO_lock_unlock (proc_file_chain_lock); +} + +void +_IO_proc_file_chain_resetlock (void) +{ + _IO_lock_init (proc_file_chain_lock); +} + /* POSIX states popen shall ensure that any streams from previous popen() calls that remain open in the parent process should be closed in the new child process. diff --git a/libio/libioP.h b/libio/libioP.h index 1af287b19f..a83a411fdf 100644 --- a/libio/libioP.h +++ b/libio/libioP.h @@ -429,6 +429,12 @@ libc_hidden_proto (_IO_list_resetlock) extern void _IO_enable_locks (void) __THROW; libc_hidden_proto (_IO_enable_locks) +/* Functions for operating popen's proc_file_chain_lock during fork. */ + +extern void _IO_proc_file_chain_lock (void) __THROW attribute_hidden; +extern void _IO_proc_file_chain_unlock (void) __THROW attribute_hidden; +extern void _IO_proc_file_chain_resetlock (void) __THROW attribute_hidden; + /* Default jumptable functions. */ extern int _IO_default_underflow (FILE *) __THROW; @@ -577,8 +583,8 @@ extern void _IO_old_init (FILE *fp, int flags) __THROW; ((__fp)->_wide_data->_IO_write_base \ = (__fp)->_wide_data->_IO_write_ptr = __p, \ (__fp)->_wide_data->_IO_write_end = (__ep)) -#define _IO_have_backup(fp) ((fp)->_IO_save_base != NULL) -#define _IO_have_wbackup(fp) ((fp)->_wide_data->_IO_save_base != NULL) +#define _IO_have_backup(fp) ((fp)->_IO_backup_base != NULL) +#define _IO_have_wbackup(fp) ((fp)->_wide_data->_IO_backup_base != NULL) #define _IO_in_backup(fp) ((fp)->_flags & _IO_IN_BACKUP) #define _IO_have_markers(fp) ((fp)->_markers != NULL) #define _IO_blen(fp) ((fp)->_IO_buf_end - (fp)->_IO_buf_base) diff --git a/libio/oldfileops.c b/libio/oldfileops.c index 97148dba9b..8f775c9094 100644 --- a/libio/oldfileops.c +++ b/libio/oldfileops.c @@ -103,9 +103,11 @@ _IO_old_file_init_internal (struct _IO_FILE_plus *fp) fp->file._old_offset = _IO_pos_BAD; fp->file._flags |= CLOSED_FILEBUF_FLAGS; - _IO_link_in (fp); + /* NB: _vtable_offset must be set before calling _IO_link_in since + _IO_vtable_offset is used to detect the old binaries. */ fp->file._vtable_offset = ((int) sizeof (struct _IO_FILE) - (int) sizeof (struct _IO_FILE_complete)); + _IO_link_in (fp); fp->file._fileno = -1; if (&_IO_stdin_used != NULL || !_IO_legacy_file ((FILE *) fp)) diff --git a/libio/tst-fopen-compat.c b/libio/tst-fopen-compat.c new file mode 100644 index 0000000000..f241b61043 --- /dev/null +++ b/libio/tst-fopen-compat.c @@ -0,0 +1,85 @@ +/* Verify that fopen works with copy relocation on _IO_stderr_ in binaries + linked with glibc 2.0. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if TEST_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) +# define _LIBC +# define _IO_USE_OLD_IO_FILE +# include +# include +# include +# include +# include +# include + +struct _IO_jump_t; + +struct _IO_FILE_plus +{ + FILE file; + const struct _IO_jump_t *vtable; +}; + +extern struct _IO_FILE_plus _IO_stderr_; +compat_symbol_reference (libc, _IO_stderr_, _IO_stderr_, GLIBC_2_0); +compat_symbol_reference (libc, fopen, fopen, GLIBC_2_0); +compat_symbol_reference (libc, fclose, fclose, GLIBC_2_0); + +static int +do_test (int argc, char *argv[]) +{ + static char filename[PATH_MAX + 1]; + struct stat st; + char *name = NULL; + int i; + + /* Try to trigger copy relocation. */ + TEST_VERIFY_EXIT (_IO_stderr_.file._fileno == STDERR_FILENO); + + for (i = 1; i < argc; i++) + { + name = argv[i]; + if (stat (name, &st) == 0) + { + TEST_VERIFY_EXIT (strlen (name) <= PATH_MAX); + break; + } + } + TEST_VERIFY_EXIT (name != NULL); + + strcpy (filename, name); + FILE *fp = fopen (filename, "r"); + TEST_VERIFY_EXIT (strcmp (filename, name) == 0); + TEST_VERIFY_EXIT (fp != NULL); + TEST_VERIFY_EXIT (fclose (fp) == 0); + return 0; +} +#else +# include + +static int +do_test (int argc, char *argv[]) +{ + return EXIT_UNSUPPORTED; +} +#endif + +#define TEST_FUNCTION_ARGV do_test +#include diff --git a/libio/tst-popen-fork.c b/libio/tst-popen-fork.c new file mode 100644 index 0000000000..1df30fc6c0 --- /dev/null +++ b/libio/tst-popen-fork.c @@ -0,0 +1,80 @@ +/* Test concurrent popen and fork. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +static void +popen_and_pclose (void) +{ + FILE *f = popen ("true", "r"); + TEST_VERIFY_EXIT (f != NULL); + pclose (f); + return; +} + +static atomic_bool done = ATOMIC_VAR_INIT (0); + +static void * +popen_and_pclose_forever (__attribute__ ((unused)) + void *arg) +{ + while (!atomic_load_explicit (&done, memory_order_acquire)) + popen_and_pclose (); + return NULL; +} + +static int +do_test (void) +{ + + /* Repeatedly call popen in a loop during the entire test. */ + pthread_t t = xpthread_create (NULL, popen_and_pclose_forever, NULL); + + /* Repeatedly fork off and reap child processes one-by-one. + Each child calls popen once, then exits, leading to the possibility + that a child forks *during* our own popen call, thus inheriting any + intermediate popen state, possibly including lock state(s). */ + for (int i = 0; i < 100; i++) + { + int cpid = xfork (); + + if (cpid == 0) + { + popen_and_pclose (); + _exit (0); + } + else + xwaitpid (cpid, NULL, 0); + } + + /* Stop calling popen. */ + atomic_store_explicit (&done, 1, memory_order_release); + xpthread_join (t); + + return 0; +} + +#include diff --git a/locale/lc-ctype.c b/locale/lc-ctype.c index c77ec51cb8..70556acaf0 100644 --- a/locale/lc-ctype.c +++ b/locale/lc-ctype.c @@ -64,12 +64,9 @@ _nl_postload_ctype (void) in fact using the global locale. */ if (_NL_CURRENT_LOCALE == &_nl_global_locale) { - __libc_tsd_set (const uint16_t *, CTYPE_B, - (void *) _nl_global_locale.__ctype_b); - __libc_tsd_set (const int32_t *, CTYPE_TOUPPER, - (void *) _nl_global_locale.__ctype_toupper); - __libc_tsd_set (const int32_t *, CTYPE_TOLOWER, - (void *) _nl_global_locale.__ctype_tolower); + __libc_tsd_CTYPE_B = _nl_global_locale.__ctype_b; + __libc_tsd_CTYPE_TOUPPER = _nl_global_locale.__ctype_toupper; + __libc_tsd_CTYPE_TOLOWER = _nl_global_locale.__ctype_tolower; } #include diff --git a/locale/localeinfo.h b/locale/localeinfo.h index ed698faef1..c3249d3715 100644 --- a/locale/localeinfo.h +++ b/locale/localeinfo.h @@ -236,10 +236,9 @@ extern struct __locale_struct _nl_global_locale attribute_hidden; /* This fetches the thread-local locale_t pointer, either one set with uselocale or &_nl_global_locale. */ -#define _NL_CURRENT_LOCALE (__libc_tsd_get (locale_t, LOCALE)) -#include -__libc_tsd_define (extern, locale_t, LOCALE) - +#define _NL_CURRENT_LOCALE __libc_tsd_LOCALE +extern __thread locale_t __libc_tsd_LOCALE + attribute_hidden attribute_tls_model_ie; /* For static linking it is desireable to avoid always linking in the code and data for every category when we can tell at link time that they are diff --git a/locale/uselocale.c b/locale/uselocale.c index 8136caf61b..0b247a77d5 100644 --- a/locale/uselocale.c +++ b/locale/uselocale.c @@ -34,7 +34,7 @@ __uselocale (locale_t newloc) { const locale_t locobj = newloc == LC_GLOBAL_LOCALE ? &_nl_global_locale : newloc; - __libc_tsd_set (locale_t, LOCALE, locobj); + __libc_tsd_LOCALE = locobj; #ifdef NL_CURRENT_INDIRECT /* Now we must update all the per-category thread-local variables to @@ -62,11 +62,9 @@ __uselocale (locale_t newloc) #endif /* Update the special tsd cache of some locale data. */ - __libc_tsd_set (const uint16_t *, CTYPE_B, (void *) locobj->__ctype_b); - __libc_tsd_set (const int32_t *, CTYPE_TOLOWER, - (void *) locobj->__ctype_tolower); - __libc_tsd_set (const int32_t *, CTYPE_TOUPPER, - (void *) locobj->__ctype_toupper); + __libc_tsd_CTYPE_B = locobj->__ctype_b; + __libc_tsd_CTYPE_TOLOWER = locobj->__ctype_tolower; + __libc_tsd_CTYPE_TOUPPER = locobj->__ctype_toupper; } return oldloc == &_nl_global_locale ? LC_GLOBAL_LOCALE : oldloc; diff --git a/locale/xlocale.c b/locale/xlocale.c index f2b9d03303..d11c1cbf8c 100644 --- a/locale/xlocale.c +++ b/locale/xlocale.c @@ -18,18 +18,13 @@ #include #include "localeinfo.h" +#include #define DEFINE_CATEGORY(category, category_name, items, a) \ extern struct __locale_data _nl_C_##category; #include "categories.def" #undef DEFINE_CATEGORY -/* Defined in locale/C-ctype.c. */ -extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; -extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden; -extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden; - - const struct __locale_struct _nl_C_locobj attribute_hidden = { .__locales = diff --git a/localedata/tst-ctype.c b/localedata/tst-ctype.c index 9de979a2d7..a23689719c 100644 --- a/localedata/tst-ctype.c +++ b/localedata/tst-ctype.c @@ -21,6 +21,8 @@ #include #include +#include + static const char lower[] = "abcdefghijklmnopqrstuvwxyz"; static const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; @@ -53,19 +55,11 @@ static struct classes #define nclasses (sizeof (classes) / sizeof (classes[0])) -#define FAIL(str, args...) \ - { \ - printf (" " str "\n", ##args); \ - ++errors; \ - } - - static int do_test (void) { const char *cp; const char *cp2; - int errors = 0; char *inpline = NULL; size_t inplinelen = 0; char *resline = NULL; @@ -394,11 +388,8 @@ punct = %04x alnum = %04x\n", { if (((__ctype_b[(unsigned int) *inp] & classes[n].mask) != 0) != (*resp != '0')) - { - printf (" is%s('%c' = '\\x%02x') %s true\n", inpline, - *inp, *inp, *resp == '1' ? "not" : "is"); - ++errors; - } + FAIL (" is%s('%c' = '\\x%02x') %s true\n", inpline, + *inp, *inp, *resp == '1' ? "not" : "is"); ++inp; ++resp; } @@ -408,11 +399,8 @@ punct = %04x alnum = %04x\n", while (*inp != '\0') { if (tolower (*inp) != *resp) - { - printf (" tolower('%c' = '\\x%02x') != '%c'\n", - *inp, *inp, *resp); - ++errors; - } + FAIL (" tolower('%c' = '\\x%02x') != '%c'\n", + *inp, *inp, *resp); ++inp; ++resp; } @@ -422,11 +410,8 @@ punct = %04x alnum = %04x\n", while (*inp != '\0') { if (toupper (*inp) != *resp) - { - printf (" toupper('%c' = '\\x%02x') != '%c'\n", - *inp, *inp, *resp); - ++errors; - } + FAIL (" toupper('%c' = '\\x%02x') != '%c'\n", + *inp, *inp, *resp); ++inp; ++resp; } @@ -436,14 +421,7 @@ punct = %04x alnum = %04x\n", } - if (errors != 0) - { - printf (" %d error%s for `%s' locale\n\n\n", errors, - errors == 1 ? "" : "s", setlocale (LC_ALL, NULL)); - return 1; - } - - printf (" No errors for `%s' locale\n\n\n", setlocale (LC_ALL, NULL)); + printf ("Completed testing for `%s' locale\n\n\n", setlocale (LC_ALL, NULL)); return 0; } diff --git a/malloc/Makefile b/malloc/Makefile index 02aff1bd1d..98d507a6eb 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -28,6 +28,8 @@ tests := \ mallocbug \ tst-aligned-alloc \ tst-aligned-alloc-random \ + tst-aligned-alloc-random-thread \ + tst-aligned-alloc-random-thread-cross \ tst-alloc_buffer \ tst-calloc \ tst-free-errno \ @@ -151,6 +153,8 @@ ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes) # the tests expect specific internal behavior that is changed due to linking to # libmcheck.a. tests-exclude-mcheck = \ + tst-aligned-alloc-random-thread \ + tst-aligned-alloc-random-thread-cross \ tst-compathooks-off \ tst-compathooks-on \ tst-malloc-backtrace \ @@ -415,7 +419,11 @@ $(objpfx)tst-mallocstate: $(objpfx)libc_malloc_debug.so $(objpfx)tst-mallocstate-malloc-check: $(objpfx)libc_malloc_debug.so $(objpfx)tst-aligned-alloc-random.out: $(objpfx)tst-aligned_alloc-lib.so +$(objpfx)tst-aligned-alloc-random-thread.out: $(objpfx)tst-aligned_alloc-lib.so +$(objpfx)tst-aligned-alloc-random-thread-cross.out: $(objpfx)tst-aligned_alloc-lib.so $(objpfx)tst-malloc-random.out: $(objpfx)tst-aligned_alloc-lib.so tst-aligned-alloc-random-ENV = LD_PRELOAD=$(objpfx)tst-aligned_alloc-lib.so +tst-aligned-alloc-random-thread-ENV = LD_PRELOAD=$(objpfx)tst-aligned_alloc-lib.so +tst-aligned-alloc-random-thread-cross-ENV = LD_PRELOAD=$(objpfx)tst-aligned_alloc-lib.so tst-malloc-random-ENV = LD_PRELOAD=$(objpfx)tst-aligned_alloc-lib.so diff --git a/malloc/tst-aligned-alloc-random-thread-cross.c b/malloc/tst-aligned-alloc-random-thread-cross.c new file mode 100644 index 0000000000..360ecc56ee --- /dev/null +++ b/malloc/tst-aligned-alloc-random-thread-cross.c @@ -0,0 +1,19 @@ +/* multi-threaded memory allocation and cross-thread deallocation test. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ +#define CROSS_THREAD_DEALLOC +#include "tst-aligned-alloc-random-thread.c" diff --git a/malloc/tst-aligned-alloc-random-thread.c b/malloc/tst-aligned-alloc-random-thread.c new file mode 100644 index 0000000000..e95f79250a --- /dev/null +++ b/malloc/tst-aligned-alloc-random-thread.c @@ -0,0 +1,145 @@ +/* multi-threaded memory allocation/deallocation test. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef ITERATIONS +# define ITERATIONS 16 +#endif + +#ifndef NUM_THREADS +# define NUM_THREADS 8 +#endif + +#ifndef NUM_ALLOCATIONS +# define NUM_ALLOCATIONS 2048 +#endif + +static pthread_barrier_t barrier; + +__thread unsigned int seed; + +typedef struct +{ + int id; + pthread_t thread; +} thread; + +thread threads[NUM_THREADS]; + +void *allocations[NUM_THREADS][NUM_ALLOCATIONS]; + +void +run_thread_dealloc (int id) +{ + for (int i = 0; i < NUM_ALLOCATIONS; i++) + { + free (allocations[id][i]); + allocations[id][i] = NULL; + } +} + +void +run_thread_alloc (int id) +{ + size_t msb, size; + for (int i = 0; i < NUM_ALLOCATIONS; i++) + { + msb = 1 << rand_r (&seed) % 16; + size = msb + rand_r (&seed) % msb; + allocations[id][i] = malloc (size); + TEST_VERIFY_EXIT (allocations[id][i] != NULL); + } +} + +void * +run_allocations (void *arg) +{ + int id = *((int *) arg); + seed = time (NULL) + id; + + /* Stage 1: First half o the threads allocating memory and the second + * half waiting for them to finish + */ + if (id < NUM_THREADS / 2) + run_thread_alloc (id); + + xpthread_barrier_wait (&barrier); + + /* Stage 2: Half of the threads allocationg memory and the other + * half deallocating: + * - In the non cross-thread dealloc scenario the first half will be + * deallocating the memory allocated by themselves in stage 1 and the + * second half will be allocating memory. + * - In the cross-thread dealloc scenario the first half will continue + * to allocate memory and the second half will deallocate the memory + * allocated by the first half in stage 1. + */ + if (id < NUM_THREADS / 2) +#ifndef CROSS_THREAD_DEALLOC + run_thread_dealloc (id); +#else + run_thread_alloc (id + NUM_THREADS / 2); +#endif + else +#ifndef CROSS_THREAD_DEALLOC + run_thread_alloc (id); +#else + run_thread_dealloc (id - NUM_THREADS / 2); +#endif + + xpthread_barrier_wait (&barrier); + + // Stage 3: Second half of the threads deallocating and the first half + // waiting for them to finish. + if (id >= NUM_THREADS / 2) + run_thread_dealloc (id); + + return NULL; +} + +static int +do_test (void) +{ + xpthread_barrier_init (&barrier, NULL, NUM_THREADS); + + for (int i = 0; i < ITERATIONS; i++) + { + for (int t = 0; t < NUM_THREADS; t++) + { + threads[t].id = t; + threads[t].thread + = xpthread_create (NULL, run_allocations, &threads[t].id); + } + + for (int t = 0; t < NUM_THREADS; t++) + xpthread_join (threads[t].thread); + } + + return 0; +} + +#include diff --git a/malloc/tst-aligned-alloc.c b/malloc/tst-aligned-alloc.c index 91167d1392..b0f05a8fec 100644 --- a/malloc/tst-aligned-alloc.c +++ b/malloc/tst-aligned-alloc.c @@ -25,6 +25,8 @@ #include #include +#include "tst-malloc-aux.h" + static int do_test (void) { diff --git a/malloc/tst-aligned_alloc-lib.c b/malloc/tst-aligned_alloc-lib.c index 0205df5acf..9ef1f839c1 100644 --- a/malloc/tst-aligned_alloc-lib.c +++ b/malloc/tst-aligned_alloc-lib.c @@ -17,37 +17,38 @@ License along with the GNU C Library; see the file COPYING.LIB. If not, see . */ -#include #include #include +#include extern void *__libc_malloc (size_t size); extern void *__libc_calloc (size_t n, size_t size); +__thread unsigned int seed = 0; + int aligned_alloc_count = 0; int libc_malloc_count = 0; int libc_calloc_count = 0; -/* Get a random alignment value. Biased towards the smaller values. Must be - a power of 2. */ -static size_t get_random_alignment (void) -{ - size_t aligns[] = { - 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384 - }; - - return aligns[random () % array_length (aligns)]; -} - -static void *get_random_alloc (size_t size) +static void * +get_random_alloc (size_t size) { void *retval; size_t align; + struct timespec tp; + + if (seed == 0) + { + clock_gettime (CLOCK_REALTIME, &tp); + seed = tp.tv_nsec; + } - switch (random() % 3) - { + switch (rand_r (&seed) % 3) + { case 1: - align = get_random_alignment (); + /* Get a random alignment value. Biased towards the smaller + * values up to 16384. Must be a power of 2. */ + align = 1 << rand_r (&seed) % 15; retval = aligned_alloc (align, size); aligned_alloc_count++; break; @@ -59,13 +60,13 @@ static void *get_random_alloc (size_t size) retval = __libc_malloc (size); libc_malloc_count++; break; - } + } return retval; } - -void * __random_malloc (size_t size) +void * +__random_malloc (size_t size) { return get_random_alloc (size); } diff --git a/malloc/tst-calloc.c b/malloc/tst-calloc.c index 01f17f9e65..5a8c7ab121 100644 --- a/malloc/tst-calloc.c +++ b/malloc/tst-calloc.c @@ -23,6 +23,7 @@ #include #include +#include "tst-malloc-aux.h" /* Number of samples per size. */ #define N 50000 @@ -94,16 +95,19 @@ random_test (void) static void null_test (void) { + /* Obscure allocation size from the compiler. */ + volatile size_t max_size = UINT_MAX; + volatile size_t zero_size = 0; /* If the size is 0 the result is implementation defined. Just make sure the program doesn't crash. The result of calloc is deliberately ignored, so do not warn about that. */ DIAG_PUSH_NEEDS_COMMENT; DIAG_IGNORE_NEEDS_COMMENT (10, "-Wunused-result"); calloc (0, 0); - calloc (0, UINT_MAX); - calloc (UINT_MAX, 0); - calloc (0, ~((size_t) 0)); - calloc (~((size_t) 0), 0); + calloc (0, max_size); + calloc (max_size, 0); + calloc (0, ~((size_t) zero_size)); + calloc (~((size_t) zero_size), 0); DIAG_POP_NEEDS_COMMENT; } diff --git a/malloc/tst-compathooks-off.c b/malloc/tst-compathooks-off.c index d0106f3fb7..4cce6e5a80 100644 --- a/malloc/tst-compathooks-off.c +++ b/malloc/tst-compathooks-off.c @@ -25,6 +25,8 @@ #include #include +#include "tst-malloc-aux.h" + extern void (*volatile __free_hook) (void *, const void *); extern void *(*volatile __malloc_hook)(size_t, const void *); extern void *(*volatile __realloc_hook)(void *, size_t, const void *); diff --git a/malloc/tst-mallinfo2.c b/malloc/tst-mallinfo2.c index 2c02f5f700..f072b9f24b 100644 --- a/malloc/tst-mallinfo2.c +++ b/malloc/tst-mallinfo2.c @@ -23,6 +23,8 @@ #include #include +#include "tst-malloc-aux.h" + /* This is not specifically needed for the test, but (1) does something to the data so gcc doesn't optimize it away, and (2) may help when developing future tests. */ diff --git a/malloc/tst-malloc-aux.h b/malloc/tst-malloc-aux.h new file mode 100644 index 0000000000..3e1b61ce34 --- /dev/null +++ b/malloc/tst-malloc-aux.h @@ -0,0 +1,56 @@ +/* Wrappers for malloc-like functions to allow testing the implementation + without optimization. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + +#ifndef TST_MALLOC_AUX_H +#define TST_MALLOC_AUX_H + +#include +#include +#include + +static __typeof (aligned_alloc) * volatile aligned_alloc_indirect + = aligned_alloc; +static __typeof (calloc) * volatile calloc_indirect = calloc; +static __typeof (malloc) * volatile malloc_indirect = malloc; +static __typeof (memalign) * volatile memalign_indirect = memalign; +static __typeof (posix_memalign) * volatile posix_memalign_indirect + = posix_memalign; +static __typeof (pvalloc) * volatile pvalloc_indirect = pvalloc; +static __typeof (realloc) * volatile realloc_indirect = realloc; +static __typeof (valloc) * volatile valloc_indirect = valloc; + +#undef aligned_alloc +#undef calloc +#undef malloc +#undef memalign +#undef posix_memalign +#undef pvalloc +#undef realloc +#undef valloc + +#define aligned_alloc aligned_alloc_indirect +#define calloc calloc_indirect +#define malloc malloc_indirect +#define memalign memalign_indirect +#define posix_memalign posix_memalign_indirect +#define pvalloc pvalloc_indirect +#define realloc realloc_indirect +#define valloc valloc_indirect + +#endif /* TST_MALLOC_AUX_H */ diff --git a/malloc/tst-malloc-backtrace.c b/malloc/tst-malloc-backtrace.c index c7b1d65e5c..65fa91f6fd 100644 --- a/malloc/tst-malloc-backtrace.c +++ b/malloc/tst-malloc-backtrace.c @@ -22,6 +22,8 @@ #include #include +#include "tst-malloc-aux.h" + #define SIZE 4096 /* Wrap free with a function to prevent gcc from optimizing it out. */ diff --git a/malloc/tst-malloc-check.c b/malloc/tst-malloc-check.c index fde8863ad7..cc88bff3b3 100644 --- a/malloc/tst-malloc-check.c +++ b/malloc/tst-malloc-check.c @@ -20,6 +20,8 @@ #include #include +#include "tst-malloc-aux.h" + static int errors = 0; static void diff --git a/malloc/tst-malloc-too-large.c b/malloc/tst-malloc-too-large.c index 8e9e0d5fa2..2b91377e54 100644 --- a/malloc/tst-malloc-too-large.c +++ b/malloc/tst-malloc-too-large.c @@ -43,6 +43,7 @@ #include #include +#include "tst-malloc-aux.h" /* This function prepares for each 'too-large memory allocation' test by performing a small successful malloc/free and resetting errno prior to diff --git a/malloc/tst-malloc.c b/malloc/tst-malloc.c index f7a6e4654c..68af399022 100644 --- a/malloc/tst-malloc.c +++ b/malloc/tst-malloc.c @@ -22,6 +22,8 @@ #include #include +#include "tst-malloc-aux.h" + static int errors = 0; static void diff --git a/malloc/tst-memalign.c b/malloc/tst-memalign.c index 563f6413d2..ac9770d3f9 100644 --- a/malloc/tst-memalign.c +++ b/malloc/tst-memalign.c @@ -23,6 +23,8 @@ #include #include +#include "tst-malloc-aux.h" + static int errors = 0; static void diff --git a/malloc/tst-realloc.c b/malloc/tst-realloc.c index f50499ecb1..74a28fb45e 100644 --- a/malloc/tst-realloc.c +++ b/malloc/tst-realloc.c @@ -23,6 +23,8 @@ #include #include +#include "tst-malloc-aux.h" + static int do_test (void) { diff --git a/malloc/tst-safe-linking.c b/malloc/tst-safe-linking.c index 01dd07004d..63a7e2bc8e 100644 --- a/malloc/tst-safe-linking.c +++ b/malloc/tst-safe-linking.c @@ -26,6 +26,8 @@ #include #include +#include "tst-malloc-aux.h" + /* Run CALLBACK and check that the data on standard error equals EXPECTED. */ static void diff --git a/malloc/tst-valloc.c b/malloc/tst-valloc.c index 9bab8c6470..0243d3dfd4 100644 --- a/malloc/tst-valloc.c +++ b/malloc/tst-valloc.c @@ -23,6 +23,8 @@ #include #include +#include "tst-malloc-aux.h" + static int errors = 0; static void diff --git a/manual/dynlink.texi b/manual/dynlink.texi index 03565d4fb0..1500a53de6 100644 --- a/manual/dynlink.texi +++ b/manual/dynlink.texi @@ -993,21 +993,21 @@ The dynamic segment should also mention @code{BIND_NOW} on the enough). @item -For shared objects (not main programs), if the program header has a -@code{PT_TLS} segment, the dynamic segment (as shown by @samp{readelf --dW}) should contain the @code{STATIC_TLS} flag on the @code{FLAGS} -line. - -If @code{STATIC_TLS} is missing in shared objects, ensure that the -appropriate relocations for GNU2 TLS descriptors are used (for example, +Ensure that only static TLS relocations (thread-pointer relative offset +locations) are used, for example @code{R_AARCH64_TLS_TPREL} and +@code{X86_64_TPOFF64}. As the second-best option, and only if +compatibility with non-hardened applications using @code{dlopen} is +needed, GNU2 TLS descriptor relocations can be used (for example, @code{R_AARCH64_TLSDESC} or @code{R_X86_64_TLSDESC}). @item -There should not be a reference to the symbols @code{__tls_get_addr}, -@code{__tls_get_offset}, @code{__tls_get_addr_opt} in the dynamic symbol -table (in the @samp{readelf -sDW} output). Thread-local storage must be -accessed using the initial-exec (static) model, or using GNU2 TLS -descriptors. +There should not be references to the traditional TLS function symbols +@code{__tls_get_addr}, @code{__tls_get_offset}, +@code{__tls_get_addr_opt} in the dynamic symbol table (in the +@samp{readelf -sDW} output). Supporting global dynamic TLS relocations +(such as @code{R_AARCH64_TLS_DTPMOD}, @code{R_AARCH64_TLS_DTPREL}, +@code{R_X86_64_DTPMOD64}, @code{R_X86_64_DTPOFF64}) should not be used, +either. @item Likewise, the functions @code{dlopen}, @code{dlmopen}, @code{dlclose} diff --git a/manual/llio.texi b/manual/llio.texi index 6f0a48609b..ea84196abd 100644 --- a/manual/llio.texi +++ b/manual/llio.texi @@ -1892,7 +1892,7 @@ There is no existing mapping in at least part of the given region. @end deftypefun -@deftypefun {void *} mremap (void *@var{address}, size_t @var{length}, size_t @var{new_length}, int @var{flag}) +@deftypefun {void *} mremap (void *@var{address}, size_t @var{length}, size_t @var{new_length}, int @var{flag}, ... /* void *@var{new_address} */) @standards{GNU, sys/mman.h} @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} @@ -1901,12 +1901,40 @@ area. @var{address} and @var{length} must cover a region entirely mapped in the same @code{mmap} statement. A new mapping with the same characteristics will be returned with the length @var{new_length}. -One option is possible, @code{MREMAP_MAYMOVE}. If it is given in -@var{flags}, the system may remove the existing mapping and create a new -one of the desired length in another location. +Possible flags are -The address of the resulting mapping is returned, or @math{-1}. Possible -error codes include: +@table @code + +@item MREMAP_MAYMOVE +If it is given in @var{flags}, the system may remove the existing mapping +and create a new one of the desired length in another location. + +@item MREMAP_FIXED +If it is given in @var{flags}, @code{mremap} accepts a fifth argument, +@code{void *new_address}, which specifies a page-aligned address to +which the mapping must be moved. Any previous mapping at the address +range specified by @var{new_address} and @var{new_size} is unmapped. + +@code{MREMAP_FIXED} must be used together with @code{MREMAP_MAYMOVE}. + +@item MREMAP_DONTUNMAP +If it is given in @var{flags}, @code{mremap} accepts a fifth argument, +@code{void *new_address}, which specifies a page-aligned address. Any +previous mapping at the address range specified by @var{new_address} and +@var{new_size} is unmapped. If @var{new_address} is @code{NULL}, the +kernel chooses the page-aligned address at which to create the mapping. +Otherwise, the kernel takes it as a hint about where to place the mapping. +The mapping at the address range specified by @var{old_address} and +@var{old_size} isn't unmapped. + +@code{MREMAP_DONTUNMAP} must be used together with @code{MREMAP_MAYMOVE}. +@var{old_size} must be the same as @var{new_size}. This flag bit is +Linux-specific. + +@end table + +The address of the resulting mapping is returned, or @code{MAP_FAILED}. +Possible error codes include: @table @code @@ -1915,7 +1943,7 @@ There is no existing mapping in at least part of the original region, or the region covers two or more distinct mappings. @item EINVAL -The address given is misaligned or inappropriate. +Any arguments are inappropriate, including unknown @var{flags} values. @item EAGAIN The region has pages locked, and if extended it would exceed the diff --git a/manual/resource.texi b/manual/resource.texi index c9b21dedeb..25966bcb64 100644 --- a/manual/resource.texi +++ b/manual/resource.texi @@ -192,8 +192,8 @@ If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a @standards{BSD, sys/resource.h} @safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} @c Direct syscall on most systems; lock-taking critical section on HURD. -Store the current and maximum limits for the resource @var{resource} -in @code{*@var{rlp}}. +Change the current and maximum limits of the process for the resource +@var{resource} to the values provided in @code{*@var{rlp}}. The return value is @code{0} on success and @code{-1} on failure. The following @code{errno} error condition is possible: diff --git a/manual/stdio.texi b/manual/stdio.texi index f5e289d58a..92614775fa 100644 --- a/manual/stdio.texi +++ b/manual/stdio.texi @@ -903,21 +903,17 @@ This function is a GNU extension. @deftypefun int putc (int @var{c}, FILE *@var{stream}) @standards{ISO, stdio.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} -This is just like @code{fputc}, except that most systems implement it as -a macro, making it faster. One consequence is that it may evaluate the -@var{stream} argument more than once, which is an exception to the -general rule for macros. @code{putc} is usually the best function to -use for writing a single character. +This is just like @code{fputc}, except that it may be implemented as +a macro and may evaluate the @var{stream} argument more than once. +Therefore, @var{stream} should never be an expression with side-effects. @end deftypefun @deftypefun wint_t putwc (wchar_t @var{wc}, FILE *@var{stream}) @standards{ISO, wchar.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} -This is just like @code{fputwc}, except that it can be implement as -a macro, making it faster. One consequence is that it may evaluate the -@var{stream} argument more than once, which is an exception to the -general rule for macros. @code{putwc} is usually the best function to -use for writing a single wide character. +This is just like @code{fputwc}, except that it may be implemented as +a macro and may evaluate the @var{stream} argument more than once. +Therefore, @var{stream} should never be an expression with side-effects. @end deftypefun @deftypefun int putc_unlocked (int @var{c}, FILE *@var{stream}) @@ -1110,20 +1106,17 @@ This function is a GNU extension. @deftypefun int getc (FILE *@var{stream}) @standards{ISO, stdio.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} -This is just like @code{fgetc}, except that it is permissible (and -typical) for it to be implemented as a macro that evaluates the -@var{stream} argument more than once. @code{getc} is often highly -optimized, so it is usually the best function to use to read a single -character. +This is just like @code{fgetc}, except that it may be implemented as +a macro and may evaluate the @var{stream} argument more than once. +Therefore, @var{stream} should never be an expression with side-effects. @end deftypefun @deftypefun wint_t getwc (FILE *@var{stream}) @standards{ISO, wchar.h} @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} -This is just like @code{fgetwc}, except that it is permissible for it to -be implemented as a macro that evaluates the @var{stream} argument more -than once. @code{getwc} can be highly optimized, so it is usually the -best function to use to read a single wide character. +This is just like @code{fgetwc}, except that it may be implemented as +a macro and may evaluate the @var{stream} argument more than once. +Therefore, @var{stream} should never be an expression with side-effects. @end deftypefun @deftypefun int getc_unlocked (FILE *@var{stream}) @@ -1474,11 +1467,9 @@ program; usually @code{ungetc} is used only to unread a character that was just read from the same stream. @Theglibc{} supports this even on files opened in binary mode, but other systems might not. -@Theglibc{} only supports one character of pushback---in other -words, it does not work to call @code{ungetc} twice without doing input -in between. Other systems might let you push back multiple characters; -then reading from the stream retrieves the characters in the reverse -order that they were pushed. +@Theglibc{} supports pushing back multiple characters; subsequently +reading from the stream retrieves the characters in the reverse order +that they were pushed. Pushing back characters doesn't alter the file; only the internal buffering for the stream is affected. If a file positioning function diff --git a/math/Makefile b/math/Makefile index f06d370383..b64c3eedd5 100644 --- a/math/Makefile +++ b/math/Makefile @@ -1077,6 +1077,7 @@ CFLAGS-test-flt-eval-method.c += -fexcess-precision=standard CFLAGS-test-fe-snans-always-signal.c += $(config-cflags-signaling-nans) CFLAGS-test-nan-const.c += -fno-builtin +CFLAGS-test-nan-payload.c += -fno-builtin CFLAGS-test-ceil-except-2.c += -fno-builtin CFLAGS-test-floor-except-2.c += -fno-builtin diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in index 8f0c3d87bb..7100171346 100644 --- a/math/auto-libm-test-in +++ b/math/auto-libm-test-in @@ -5555,7 +5555,7 @@ exp2m1 -0x1p-1021 exp2m1 -0x1p-16381 expm1 0 -expm1 -0 no-mathvec +expm1 -0 expm1 1 expm1 0.75 expm1 2 @@ -5620,7 +5620,7 @@ expm1 -0x1p-100 expm1 0x1p-600 expm1 -0x1p-600 expm1 0x1p-10000 -expm1 -0x1p-10000 no-mathvec +expm1 -0x1p-10000 expm1 0xe.4152ac57cd1ea7ap-60 expm1 0x6.660247486aed8p-4 expm1 0x6.289a78p-4 @@ -6835,7 +6835,7 @@ log10p1 -0x6.3fef3067427e43dfcde9e48f74bcp-4 log10p1 0x6.af53d00fd2845d4772260ef5adc4p-4 log1p 0 -log1p -0 no-mathvec +log1p -0 log1p e-1 log1p -0.25 log1p -0.875 @@ -7627,7 +7627,7 @@ pow 0x1.7ac7cp+5 23 pow -0x1.7ac7cp+5 23 sin 0 -sin -0 no-mathvec +sin -0 sin pi/6 sin -pi/6 sin pi/2 @@ -7964,7 +7964,7 @@ sqrt min sqrt min_subnorm tan 0 -tan -0 no-mathvec +tan -0 tan pi/4 tan pi/2 tan -pi/2 @@ -8056,7 +8056,7 @@ tan min_subnorm tan -min_subnorm tanh 0 -tanh -0 no-mathvec +tanh -0 tanh 0.75 tanh -0.75 tanh 1.0 diff --git a/math/auto-libm-test-out-expm1 b/math/auto-libm-test-out-expm1 index 91da41b7f6..8483455801 100644 --- a/math/auto-libm-test-out-expm1 +++ b/math/auto-libm-test-out-expm1 @@ -23,31 +23,31 @@ expm1 0 = expm1 tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok = expm1 towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok = expm1 upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok -expm1 -0 no-mathvec -= expm1 downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +expm1 -0 += expm1 downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok expm1 1 = expm1 downward binary32 0x1p+0 : 0x1.b7e15p+0 : inexact-ok = expm1 tonearest binary32 0x1p+0 : 0x1.b7e152p+0 : inexact-ok @@ -1880,87 +1880,87 @@ expm1 0x1p-10000 = expm1 tonearest binary128 0x1p-10000 : 0x1p-10000 : inexact-ok = expm1 towardzero binary128 0x1p-10000 : 0x1p-10000 : inexact-ok = expm1 upward binary128 0x1p-10000 : 0x1.0000000000000000000000000001p-10000 : inexact-ok -expm1 -0x1p-10000 no-mathvec -= expm1 downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= expm1 downward binary32 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 tonearest binary32 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 towardzero binary32 -0x8p-152 : -0x0p+0 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 upward binary32 -0x8p-152 : -0x0p+0 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 downward binary64 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 tonearest binary64 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 towardzero binary64 -0x8p-152 : -0x7.ffffffffffffcp-152 : no-mathvec inexact-ok -= expm1 upward binary64 -0x8p-152 : -0x7.ffffffffffffcp-152 : no-mathvec inexact-ok -= expm1 downward intel96 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 tonearest intel96 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 towardzero intel96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : no-mathvec inexact-ok -= expm1 upward intel96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : no-mathvec inexact-ok -= expm1 downward m68k96 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 tonearest m68k96 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 towardzero m68k96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : no-mathvec inexact-ok -= expm1 upward m68k96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : no-mathvec inexact-ok -= expm1 downward binary128 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 tonearest binary128 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 towardzero binary128 -0x8p-152 : -0x7.fffffffffffffffffffffffffffcp-152 : no-mathvec inexact-ok -= expm1 upward binary128 -0x8p-152 : -0x7.fffffffffffffffffffffffffffcp-152 : no-mathvec inexact-ok -= expm1 downward ibm128 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 tonearest ibm128 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok -= expm1 towardzero ibm128 -0x8p-152 : -0x7.fffffffffffffffffffffffffep-152 : no-mathvec inexact-ok -= expm1 upward ibm128 -0x8p-152 : -0x7.fffffffffffffffffffffffffep-152 : no-mathvec inexact-ok -= expm1 downward binary64 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 tonearest binary64 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 towardzero binary64 -0x4p-1076 : -0x0p+0 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 upward binary64 -0x4p-1076 : -0x0p+0 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 downward intel96 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok -= expm1 tonearest intel96 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok -= expm1 towardzero intel96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : no-mathvec inexact-ok -= expm1 upward intel96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : no-mathvec inexact-ok -= expm1 downward m68k96 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok -= expm1 tonearest m68k96 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok -= expm1 towardzero m68k96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : no-mathvec inexact-ok -= expm1 upward m68k96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : no-mathvec inexact-ok -= expm1 downward binary128 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok -= expm1 tonearest binary128 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok -= expm1 towardzero binary128 -0x4p-1076 : -0x3.fffffffffffffffffffffffffffep-1076 : no-mathvec inexact-ok -= expm1 upward binary128 -0x4p-1076 : -0x3.fffffffffffffffffffffffffffep-1076 : no-mathvec inexact-ok -= expm1 downward ibm128 -0x4p-1076 : -0x4p-1076 : no-mathvec xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok -= expm1 tonearest ibm128 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok underflow errno-erange-ok -= expm1 towardzero ibm128 -0x4p-1076 : -0x0p+0 : no-mathvec xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok -= expm1 upward ibm128 -0x4p-1076 : -0x0p+0 : no-mathvec xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok -= expm1 downward intel96 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok -= expm1 tonearest intel96 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok -= expm1 towardzero intel96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : no-mathvec inexact-ok -= expm1 upward intel96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : no-mathvec inexact-ok -= expm1 downward m68k96 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok -= expm1 tonearest m68k96 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok -= expm1 towardzero m68k96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : no-mathvec inexact-ok -= expm1 upward m68k96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : no-mathvec inexact-ok -= expm1 downward binary128 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok -= expm1 tonearest binary128 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok -= expm1 towardzero binary128 -0x1p-10000 : -0xf.fffffffffffffffffffffffffff8p-10004 : no-mathvec inexact-ok -= expm1 upward binary128 -0x1p-10000 : -0xf.fffffffffffffffffffffffffff8p-10004 : no-mathvec inexact-ok +expm1 -0x1p-10000 += expm1 downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += expm1 downward binary32 -0x8p-152 : -0x8p-152 : inexact-ok underflow errno-erange-ok += expm1 tonearest binary32 -0x8p-152 : -0x8p-152 : inexact-ok underflow errno-erange-ok += expm1 towardzero binary32 -0x8p-152 : -0x0p+0 : inexact-ok underflow errno-erange-ok += expm1 upward binary32 -0x8p-152 : -0x0p+0 : inexact-ok underflow errno-erange-ok += expm1 downward binary64 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 tonearest binary64 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 towardzero binary64 -0x8p-152 : -0x7.ffffffffffffcp-152 : inexact-ok += expm1 upward binary64 -0x8p-152 : -0x7.ffffffffffffcp-152 : inexact-ok += expm1 downward intel96 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 tonearest intel96 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 towardzero intel96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : inexact-ok += expm1 upward intel96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : inexact-ok += expm1 downward m68k96 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 tonearest m68k96 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 towardzero m68k96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : inexact-ok += expm1 upward m68k96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : inexact-ok += expm1 downward binary128 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 tonearest binary128 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 towardzero binary128 -0x8p-152 : -0x7.fffffffffffffffffffffffffffcp-152 : inexact-ok += expm1 upward binary128 -0x8p-152 : -0x7.fffffffffffffffffffffffffffcp-152 : inexact-ok += expm1 downward ibm128 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 tonearest ibm128 -0x8p-152 : -0x8p-152 : inexact-ok += expm1 towardzero ibm128 -0x8p-152 : -0x7.fffffffffffffffffffffffffep-152 : inexact-ok += expm1 upward ibm128 -0x8p-152 : -0x7.fffffffffffffffffffffffffep-152 : inexact-ok += expm1 downward binary64 -0x4p-1076 : -0x4p-1076 : inexact-ok underflow errno-erange-ok += expm1 tonearest binary64 -0x4p-1076 : -0x4p-1076 : inexact-ok underflow errno-erange-ok += expm1 towardzero binary64 -0x4p-1076 : -0x0p+0 : inexact-ok underflow errno-erange-ok += expm1 upward binary64 -0x4p-1076 : -0x0p+0 : inexact-ok underflow errno-erange-ok += expm1 downward intel96 -0x4p-1076 : -0x4p-1076 : inexact-ok += expm1 tonearest intel96 -0x4p-1076 : -0x4p-1076 : inexact-ok += expm1 towardzero intel96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : inexact-ok += expm1 upward intel96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : inexact-ok += expm1 downward m68k96 -0x4p-1076 : -0x4p-1076 : inexact-ok += expm1 tonearest m68k96 -0x4p-1076 : -0x4p-1076 : inexact-ok += expm1 towardzero m68k96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : inexact-ok += expm1 upward m68k96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : inexact-ok += expm1 downward binary128 -0x4p-1076 : -0x4p-1076 : inexact-ok += expm1 tonearest binary128 -0x4p-1076 : -0x4p-1076 : inexact-ok += expm1 towardzero binary128 -0x4p-1076 : -0x3.fffffffffffffffffffffffffffep-1076 : inexact-ok += expm1 upward binary128 -0x4p-1076 : -0x3.fffffffffffffffffffffffffffep-1076 : inexact-ok += expm1 downward ibm128 -0x4p-1076 : -0x4p-1076 : xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok += expm1 tonearest ibm128 -0x4p-1076 : -0x4p-1076 : inexact-ok underflow errno-erange-ok += expm1 towardzero ibm128 -0x4p-1076 : -0x0p+0 : xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok += expm1 upward ibm128 -0x4p-1076 : -0x0p+0 : xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok += expm1 downward intel96 -0x1p-10000 : -0x1p-10000 : inexact-ok += expm1 tonearest intel96 -0x1p-10000 : -0x1p-10000 : inexact-ok += expm1 towardzero intel96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : inexact-ok += expm1 upward intel96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : inexact-ok += expm1 downward m68k96 -0x1p-10000 : -0x1p-10000 : inexact-ok += expm1 tonearest m68k96 -0x1p-10000 : -0x1p-10000 : inexact-ok += expm1 towardzero m68k96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : inexact-ok += expm1 upward m68k96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : inexact-ok += expm1 downward binary128 -0x1p-10000 : -0x1p-10000 : inexact-ok += expm1 tonearest binary128 -0x1p-10000 : -0x1p-10000 : inexact-ok += expm1 towardzero binary128 -0x1p-10000 : -0xf.fffffffffffffffffffffffffff8p-10004 : inexact-ok += expm1 upward binary128 -0x1p-10000 : -0xf.fffffffffffffffffffffffffff8p-10004 : inexact-ok expm1 0xe.4152ac57cd1ea7ap-60 = expm1 downward binary32 0xe.4152bp-60 : 0xe.4152bp-60 : inexact-ok = expm1 tonearest binary32 0xe.4152bp-60 : 0xe.4152bp-60 : inexact-ok diff --git a/math/auto-libm-test-out-log1p b/math/auto-libm-test-out-log1p index f83241f51a..f7d3b35e6d 100644 --- a/math/auto-libm-test-out-log1p +++ b/math/auto-libm-test-out-log1p @@ -23,31 +23,31 @@ log1p 0 = log1p tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok = log1p towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok = log1p upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok -log1p -0 no-mathvec -= log1p downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= log1p upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +log1p -0 += log1p downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += log1p tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok += log1p towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok += log1p upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += log1p downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += log1p tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok += log1p towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok += log1p upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += log1p downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += log1p tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok += log1p towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok += log1p upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += log1p downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += log1p tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += log1p towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += log1p upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += log1p downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += log1p tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok += log1p towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok += log1p upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += log1p downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += log1p tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += log1p towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += log1p upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok log1p e-1 = log1p downward binary32 0x1.b7e152p+0 : 0x1p+0 : inexact-ok = log1p tonearest binary32 0x1.b7e152p+0 : 0x1p+0 : inexact-ok diff --git a/math/auto-libm-test-out-sin b/math/auto-libm-test-out-sin index e1f6845283..f1d21b179c 100644 --- a/math/auto-libm-test-out-sin +++ b/math/auto-libm-test-out-sin @@ -23,31 +23,31 @@ sin 0 = sin tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok = sin towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok = sin upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok -sin -0 no-mathvec -= sin downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= sin upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +sin -0 += sin downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += sin tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok += sin towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok += sin upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += sin downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += sin tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok += sin towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok += sin upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += sin downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += sin tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok += sin towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok += sin upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += sin downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += sin tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += sin towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += sin upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += sin downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += sin tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok += sin towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok += sin upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += sin downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += sin tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += sin towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += sin upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok sin pi/6 = sin downward binary32 0x8.60a92p-4 : 0x8p-4 : inexact-ok = sin tonearest binary32 0x8.60a92p-4 : 0x8p-4 : inexact-ok diff --git a/math/auto-libm-test-out-tan b/math/auto-libm-test-out-tan index f46fdc7ec6..7d00d03e1d 100644 --- a/math/auto-libm-test-out-tan +++ b/math/auto-libm-test-out-tan @@ -23,31 +23,31 @@ tan 0 = tan tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok = tan towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok = tan upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok -tan -0 no-mathvec -= tan downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tan upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +tan -0 += tan downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += tan tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok += tan towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok += tan upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += tan downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += tan tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok += tan towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok += tan upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += tan downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += tan tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok += tan towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok += tan upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += tan downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += tan tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += tan towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += tan upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += tan downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += tan tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok += tan towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok += tan upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += tan downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += tan tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += tan towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += tan upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok tan pi/4 = tan downward binary32 0xc.90fdbp-4 : 0x1p+0 : inexact-ok = tan tonearest binary32 0xc.90fdbp-4 : 0x1p+0 : inexact-ok diff --git a/math/auto-libm-test-out-tanh b/math/auto-libm-test-out-tanh index 19ce2e7b93..8b9427c917 100644 --- a/math/auto-libm-test-out-tanh +++ b/math/auto-libm-test-out-tanh @@ -23,31 +23,31 @@ tanh 0 = tanh tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok = tanh towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok = tanh upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok -tanh -0 no-mathvec -= tanh downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok -= tanh upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +tanh -0 += tanh downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += tanh tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok += tanh towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok += tanh upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok += tanh downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += tanh tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok += tanh towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok += tanh upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok += tanh downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += tanh tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok += tanh towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok += tanh upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok += tanh downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += tanh tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += tanh towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += tanh upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok += tanh downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += tanh tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok += tanh towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok += tanh upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok += tanh downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += tanh tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += tanh towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok += tanh upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok tanh 0.75 = tanh downward binary32 0xcp-4 : 0xa.2991fp-4 : inexact-ok = tanh tonearest binary32 0xcp-4 : 0xa.2991fp-4 : inexact-ok diff --git a/math/gen-auto-libm-tests.c b/math/gen-auto-libm-tests.c index 0b1e307fae..ff8a22b90f 100644 --- a/math/gen-auto-libm-tests.c +++ b/math/gen-auto-libm-tests.c @@ -96,8 +96,7 @@ zero and infinite results should be ignored; "xfail" indicates the test is disabled as expected to produce incorrect results, "xfail-rounding" indicates the test is disabled only in rounding - modes other than round-to-nearest; "no-mathvec" indicates the test - is disabled in vector math libraries. Otherwise, test flags are of + modes other than round-to-nearest. Otherwise, test flags are of the form "spurious-" and "missing-", for any exception ("overflow", "underflow", "inexact", "invalid", "divbyzero"), "spurious-errno" and "missing-errno", to indicate @@ -353,7 +352,6 @@ typedef enum flag_missing_overflow, flag_missing_underflow, flag_missing_errno, - flag_no_mathvec, num_input_flag_types, flag_first_flag = 0, flag_spurious_first = flag_spurious_divbyzero, @@ -379,7 +377,6 @@ static const char *const input_flags[num_input_flag_types] = "missing-overflow", "missing-underflow", "missing-errno", - "no-mathvec", }; /* An input flag, possibly conditional. */ @@ -2056,7 +2053,6 @@ output_for_one_input_case (FILE *fp, const char *filename, test_function *tf, { case flag_ignore_zero_inf_sign: case flag_xfail: - case flag_no_mathvec: if (fprintf (fp, " %s%s", input_flags[it->flags[i].type], (it->flags[i].cond diff --git a/math/gen-libm-test.py b/math/gen-libm-test.py index 397dbd3259..6e8bb56437 100755 --- a/math/gen-libm-test.py +++ b/math/gen-libm-test.py @@ -93,8 +93,7 @@ BEAUTIFY_MAP = {'minus_zero': '-0', # Flags in auto-libm-test-out that map directly to C flags. FLAGS_SIMPLE = {'ignore-zero-inf-sign': 'IGNORE_ZERO_INF_SIGN', - 'xfail': 'XFAIL_TEST', - 'no-mathvec': 'NO_TEST_MATHVEC'} + 'xfail': 'XFAIL_TEST'} # Exceptions in auto-libm-test-out, and their corresponding C flags # for being required, OK or required to be absent. diff --git a/math/libm-test-support.c b/math/libm-test-support.c index 0796f9d495..3fecd87064 100644 --- a/math/libm-test-support.c +++ b/math/libm-test-support.c @@ -776,7 +776,7 @@ check_float_internal (const char *test_name, FLOAT computed, FLOAT expected, ulps = ULPDIFF (computed, expected); set_max_error (ulps, curr_max_error); print_diff = 1; - if ((exceptions & IGNORE_ZERO_INF_SIGN) == 0 + if (((exceptions & IGNORE_ZERO_INF_SIGN) == 0) && !flag_test_mathvec && computed == 0.0 && expected == 0.0 && signbit(computed) != signbit (expected)) ok = 0; diff --git a/math/test-nan-payload.c b/math/test-nan-payload.c index 4a81dc348b..413791e09f 100644 --- a/math/test-nan-payload.c +++ b/math/test-nan-payload.c @@ -16,6 +16,9 @@ License along with the GNU C Library; if not, see . */ +#define _LIBC_TEST 1 +#define __STDC_WANT_IEC_60559_TYPES_EXT__ +#include #include #include #include @@ -31,7 +34,7 @@ #define CHECK_IS_NAN(TYPE, A) \ do \ { \ - if (isnan (A)) \ + if (isnan (A) && !issignaling (A)) \ puts ("PASS: " #TYPE " " #A); \ else \ { \ @@ -41,6 +44,19 @@ } \ while (0) +#define CHECK_PAYLOAD(TYPE, FUNC, A, P) \ + do \ + { \ + if (FUNC (&(A)) == (P)) \ + puts ("PASS: " #TYPE " payload " #A); \ + else \ + { \ + puts ("FAIL: " #TYPE " payload " #A); \ + result = 1; \ + } \ + } \ + while (0) + #define CHECK_SAME_NAN(TYPE, A, B) \ do \ { \ @@ -67,33 +83,97 @@ } \ while (0) +#define CLEAR_ERRNO \ + do \ + { \ + errno = 12345; \ + } \ + while (0) + +#define CHECK_ERRNO(TYPE, A) \ + do \ + { \ + if (errno == 12345) \ + puts ("PASS: " #TYPE " " #A " errno"); \ + else \ + { \ + puts ("FAIL: " #TYPE " " #A " errno"); \ + result = 1; \ + } \ + } \ + while (0) + /* Cannot test payloads by memcmp for formats where NaNs have padding bits. */ #define CAN_TEST_EQ(MANT_DIG) ((MANT_DIG) != 64 && (MANT_DIG) != 106) -#define RUN_TESTS(TYPE, SFUNC, FUNC, MANT_DIG) \ +#define RUN_TESTS(TYPE, SFUNC, FUNC, PLFUNC, MANT_DIG) \ do \ { \ + CLEAR_ERRNO; \ TYPE n123 = WRAP_NAN (FUNC, "123"); \ + CHECK_ERRNO (TYPE, n123); \ CHECK_IS_NAN (TYPE, n123); \ + CLEAR_ERRNO; \ TYPE s123 = WRAP_STRTO (SFUNC, "NAN(123)"); \ + CHECK_ERRNO (TYPE, s123); \ CHECK_IS_NAN (TYPE, s123); \ + CLEAR_ERRNO; \ TYPE n456 = WRAP_NAN (FUNC, "456"); \ + CHECK_ERRNO (TYPE, n456); \ CHECK_IS_NAN (TYPE, n456); \ + CLEAR_ERRNO; \ TYPE s456 = WRAP_STRTO (SFUNC, "NAN(456)"); \ + CHECK_ERRNO (TYPE, s456); \ CHECK_IS_NAN (TYPE, s456); \ + CLEAR_ERRNO; \ + TYPE nh123 = WRAP_NAN (FUNC, "0x123"); \ + CHECK_ERRNO (TYPE, nh123); \ + CHECK_IS_NAN (TYPE, nh123); \ + CLEAR_ERRNO; \ + TYPE sh123 = WRAP_STRTO (SFUNC, "NAN(0x123)"); \ + CHECK_ERRNO (TYPE, sh123); \ + CHECK_IS_NAN (TYPE, sh123); \ + CLEAR_ERRNO; \ TYPE n123x = WRAP_NAN (FUNC, "123)"); \ + CHECK_ERRNO (TYPE, n123x); \ CHECK_IS_NAN (TYPE, n123x); \ + CLEAR_ERRNO; \ TYPE nemp = WRAP_NAN (FUNC, ""); \ + CHECK_ERRNO (TYPE, nemp); \ CHECK_IS_NAN (TYPE, nemp); \ + CLEAR_ERRNO; \ TYPE semp = WRAP_STRTO (SFUNC, "NAN()"); \ + CHECK_ERRNO (TYPE, semp); \ CHECK_IS_NAN (TYPE, semp); \ + CLEAR_ERRNO; \ TYPE sx = WRAP_STRTO (SFUNC, "NAN"); \ + CHECK_ERRNO (TYPE, sx); \ CHECK_IS_NAN (TYPE, sx); \ + CLEAR_ERRNO; \ + TYPE novf = WRAP_NAN (FUNC, "9999999999" \ + "99999999999999999999" \ + "9999999999"); \ + CHECK_ERRNO (TYPE, novf); \ + CHECK_IS_NAN (TYPE, novf); \ + CLEAR_ERRNO; \ + TYPE sovf = WRAP_STRTO (SFUNC, "NAN(9999999999" \ + "99999999999999999999" \ + "9999999999)"); \ + CHECK_ERRNO (TYPE, sovf); \ + CHECK_IS_NAN (TYPE, sovf); \ if (CAN_TEST_EQ (MANT_DIG)) \ CHECK_SAME_NAN (TYPE, n123, s123); \ + CHECK_PAYLOAD (TYPE, PLFUNC, n123, 123); \ + CHECK_PAYLOAD (TYPE, PLFUNC, s123, 123); \ if (CAN_TEST_EQ (MANT_DIG)) \ CHECK_SAME_NAN (TYPE, n456, s456); \ + CHECK_PAYLOAD (TYPE, PLFUNC, n456, 456); \ + CHECK_PAYLOAD (TYPE, PLFUNC, s456, 456); \ + if (CAN_TEST_EQ (MANT_DIG)) \ + CHECK_SAME_NAN (TYPE, nh123, sh123); \ + CHECK_PAYLOAD (TYPE, PLFUNC, nh123, 0x123); \ + CHECK_PAYLOAD (TYPE, PLFUNC, sh123, 0x123); \ if (CAN_TEST_EQ (MANT_DIG)) \ CHECK_SAME_NAN (TYPE, nemp, semp); \ if (CAN_TEST_EQ (MANT_DIG)) \ @@ -110,9 +190,31 @@ static int do_test (void) { int result = 0; - RUN_TESTS (float, strtof, nanf, FLT_MANT_DIG); - RUN_TESTS (double, strtod, nan, DBL_MANT_DIG); - RUN_TESTS (long double, strtold, nanl, LDBL_MANT_DIG); + RUN_TESTS (float, strtof, nanf, getpayloadf, FLT_MANT_DIG); + RUN_TESTS (double, strtod, nan, getpayload, DBL_MANT_DIG); + RUN_TESTS (long double, strtold, nanl, getpayloadl, LDBL_MANT_DIG); +#if __HAVE_FLOAT16 + RUN_TESTS (_Float16, strtof16, nanf16, getpayloadf16, FLT16_MANT_DIG); +#endif +#if __HAVE_FLOAT32 + RUN_TESTS (_Float32, strtof32, nanf32, getpayloadf32, FLT32_MANT_DIG); +#endif +#if __HAVE_FLOAT64 + RUN_TESTS (_Float64, strtof64, nanf64, getpayloadf64, FLT64_MANT_DIG); +#endif +#if __HAVE_FLOAT128 + RUN_TESTS (_Float128, strtof128, nanf128, getpayloadf128, FLT128_MANT_DIG); +#endif +#if __HAVE_FLOAT32X + RUN_TESTS (_Float32x, strtof32x, nanf32x, getpayloadf32x, FLT32X_MANT_DIG); +#endif +#if __HAVE_FLOAT64X + RUN_TESTS (_Float64x, strtof64x, nanf64x, getpayloadf64x, FLT64X_MANT_DIG); +#endif +#if __HAVE_FLOAT128X + RUN_TESTS (_Float128x, strtof128x, nanf128x, getpayloadf128x, + FLT128X_MANT_DIG); +#endif return result; } diff --git a/math/test-tgmath2.c b/math/test-tgmath2.c index 37afa8a08a..4aeb877b8e 100644 --- a/math/test-tgmath2.c +++ b/math/test-tgmath2.c @@ -24,6 +24,8 @@ #include #include +#include + //#define DEBUG typedef complex float cfloat; @@ -87,13 +89,6 @@ enum int count; int counts[Tlast][C_last]; -#define FAIL(str) \ - do \ - { \ - printf ("%s failure on line %d\n", (str), __LINE__); \ - result = 1; \ - } \ - while (0) #define TEST_TYPE_ONLY(expr, rettype) \ do \ { \ @@ -133,8 +128,6 @@ int counts[Tlast][C_last]; int test_cos (const int Vint4, const long long int Vllong4) { - int result = 0; - TEST (cos (vfloat1), float, cos); TEST (cos (vdouble1), double, cos); TEST (cos (vldouble1), ldouble, cos); @@ -152,7 +145,7 @@ test_cos (const int Vint4, const long long int Vllong4) TEST (cos (Vcdouble1), cdouble, cos); TEST (cos (Vcldouble1), cldouble, cos); - return result; + return 0; } int diff --git a/misc/Makefile b/misc/Makefile index 5d17c562fe..7b7f8351bf 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -257,6 +257,8 @@ tests := \ tst-mntent-blank-passno \ tst-mntent-escape \ tst-mntent2 \ + tst-mremap1 \ + tst-mremap2 \ tst-preadvwritev \ tst-preadvwritev2 \ tst-preadvwritev64 \ diff --git a/misc/tst-mremap1.c b/misc/tst-mremap1.c new file mode 100644 index 0000000000..0469991a6c --- /dev/null +++ b/misc/tst-mremap1.c @@ -0,0 +1,46 @@ +/* Test mremap with MREMAP_MAYMOVE. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + size_t old_size = getpagesize (); + char *old_addr = xmmap (NULL, old_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + old_addr[0] = 1; + old_addr[old_size - 1] = 2; + + /* Test MREMAP_MAYMOVE. */ + size_t new_size = old_size + old_size; + char *new_addr = mremap (old_addr, old_size, new_size, MREMAP_MAYMOVE); + TEST_VERIFY_EXIT (new_addr != MAP_FAILED); + new_addr[0] = 1; + new_addr[new_size - 1] = 2; + xmunmap (new_addr, new_size); + + return 0; +} + +#include diff --git a/misc/tst-mremap2.c b/misc/tst-mremap2.c new file mode 100644 index 0000000000..45be7f0369 --- /dev/null +++ b/misc/tst-mremap2.c @@ -0,0 +1,54 @@ +/* Test mremap with MREMAP_FIXED. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + size_t old_size = getpagesize (); + size_t new_size = old_size + old_size; + char *old_addr = xmmap (NULL, old_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + old_addr[0] = 1; + old_addr[old_size - 1] = 2; + + char *fixed_addr = xmmap (NULL, new_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + fixed_addr[0] = 1; + fixed_addr[new_size - 1] = 2; + + /* Test MREMAP_FIXED. */ + char *new_addr = mremap (old_addr, old_size, new_size, + MREMAP_FIXED | MREMAP_MAYMOVE, + fixed_addr); + if (new_addr == MAP_FAILED) + return mremap_failure_exit (errno); + new_addr[0] = 1; + new_addr[new_size - 1] = 2; + xmunmap (new_addr, new_size); + + return 0; +} + +#include diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index f35a8369bd..2cb562f8ea 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -139,7 +139,7 @@ get_cached_stack (size_t *sizep, void **memp) memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t)); /* Re-initialize the TLS. */ - _dl_allocate_tls_init (TLS_TPADJ (result), true); + _dl_allocate_tls_init (TLS_TPADJ (result), false); return result; } diff --git a/nptl/descr.h b/nptl/descr.h index 8cef95810c..c4bdd7757a 100644 --- a/nptl/descr.h +++ b/nptl/descr.h @@ -414,6 +414,8 @@ struct pthread { uint32_t cpu_id_start; uint32_t cpu_id; + uint64_t rseq_cs; + uint32_t flags; }; char pad[32]; /* Original rseq area size. */ } rseq_area __attribute__ ((aligned (32))); diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c index aada91639a..51afa62adf 100644 --- a/nptl/pthread_cond_broadcast.c +++ b/nptl/pthread_cond_broadcast.c @@ -57,10 +57,10 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) { /* Add as many signals as the remaining size of the group. */ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, - cond->__data.__g_size[g1] << 1); + cond->__data.__g_size[g1]); cond->__data.__g_size[g1] = 0; - /* We need to wake G1 waiters before we quiesce G1 below. */ + /* We need to wake G1 waiters before we switch G1 below. */ /* TODO Only set it if there are indeed futex waiters. We could also try to move this out of the critical section in cases when G2 is empty (and we don't need to quiesce). */ @@ -69,11 +69,11 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) /* G1 is complete. Step (2) is next unless there are no waiters in G2, in which case we can stop. */ - if (__condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private)) + if (__condvar_switch_g1 (cond, wseq, &g1, private)) { /* Step (3): Send signals to all waiters in the old G2 / new G1. */ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, - cond->__data.__g_size[g1] << 1); + cond->__data.__g_size[g1]); cond->__data.__g_size[g1] = 0; /* TODO Only set it if there are indeed futex waiters. */ do_futex_wake = true; diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c index 3487557bb8..389402913c 100644 --- a/nptl/pthread_cond_common.c +++ b/nptl/pthread_cond_common.c @@ -189,19 +189,17 @@ __condvar_get_private (int flags) return FUTEX_SHARED; } -/* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to - leave G1, converts G1 into a fresh G2, and then switches group roles so that - the former G2 becomes the new G1 ending at the current __wseq value when we - eventually make the switch (WSEQ is just an observation of __wseq by the - signaler). +/* This closes G1 (whose index is in G1INDEX), converts G1 into a fresh G2, + and then switches group roles so that the former G2 becomes the new G1 + ending at the current __wseq value when we eventually make the switch + (WSEQ is just an observation of __wseq by the signaler). If G2 is empty, it will not switch groups because then it would create an empty G1 which would require switching groups again on the next signal. Returns false iff groups were not switched because G2 was empty. */ static bool __attribute__ ((unused)) -__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, +__condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, unsigned int *g1index, int private) { - const unsigned int maxspin = 0; unsigned int g1 = *g1index; /* If there is no waiter in G2, we don't do anything. The expression may @@ -210,96 +208,23 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, behavior. Note that this works correctly for a zero-initialized condvar too. */ unsigned int old_orig_size = __condvar_get_orig_size (cond); - uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; - if (((unsigned) (wseq - old_g1_start - old_orig_size) - + cond->__data.__g_size[g1 ^ 1]) == 0) + uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond); + uint64_t new_g1_start = old_g1_start + old_orig_size; + if (((unsigned) (wseq - new_g1_start) + cond->__data.__g_size[g1 ^ 1]) == 0) return false; - /* Now try to close and quiesce G1. We have to consider the following kinds - of waiters: + /* We have to consider the following kinds of waiters: * Waiters from less recent groups than G1 are not affected because nothing will change for them apart from __g1_start getting larger. * New waiters arriving concurrently with the group switching will all go into G2 until we atomically make the switch. Waiters existing in G2 are not affected. - * Waiters in G1 will be closed out immediately by setting a flag in - __g_signals, which will prevent waiters from blocking using a futex on - __g_signals and also notifies them that the group is closed. As a - result, they will eventually remove their group reference, allowing us - to close switch group roles. */ + * Waiters in G1 have already received a signal and been woken. */ - /* First, set the closed flag on __g_signals. This tells waiters that are - about to wait that they shouldn't do that anymore. This basically - serves as an advance notification of the upcoming change to __g1_start; - waiters interpret it as if __g1_start was larger than their waiter - sequence position. This allows us to change __g1_start after waiting - for all existing waiters with group references to leave, which in turn - makes recovery after stealing a signal simpler because it then can be - skipped if __g1_start indicates that the group is closed (otherwise, - we would have to recover always because waiters don't know how big their - groups are). Relaxed MO is fine. */ - atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1); - - /* Wait until there are no group references anymore. The fetch-or operation - injects us into the modification order of __g_refs; release MO ensures - that waiters incrementing __g_refs after our fetch-or see the previous - changes to __g_signals and to __g1_start that had to happen before we can - switch this G1 and alias with an older group (we have two groups, so - aliasing requires switching group roles twice). Note that nobody else - can have set the wake-request flag, so we do not have to act upon it. - - Also note that it is harmless if older waiters or waiters from this G1 - get a group reference after we have quiesced the group because it will - remain closed for them either because of the closed flag in __g_signals - or the later update to __g1_start. New waiters will never arrive here - but instead continue to go into the still current G2. */ - unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0); - while ((r >> 1) > 0) - { - for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--) - { - /* TODO Back off. */ - r = atomic_load_relaxed (cond->__data.__g_refs + g1); - } - if ((r >> 1) > 0) - { - /* There is still a waiter after spinning. Set the wake-request - flag and block. Relaxed MO is fine because this is just about - this futex word. - - Update r to include the set wake-request flag so that the upcoming - futex_wait only blocks if the flag is still set (otherwise, we'd - violate the basic client-side futex protocol). */ - r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1) | 1; - - if ((r >> 1) > 0) - futex_wait_simple (cond->__data.__g_refs + g1, r, private); - /* Reload here so we eventually see the most recent value even if we - do not spin. */ - r = atomic_load_relaxed (cond->__data.__g_refs + g1); - } - } - /* Acquire MO so that we synchronize with the release operation that waiters - use to decrement __g_refs and thus happen after the waiters we waited - for. */ - atomic_thread_fence_acquire (); - - /* Update __g1_start, which finishes closing this group. The value we add - will never be negative because old_orig_size can only be zero when we - switch groups the first time after a condvar was initialized, in which - case G1 will be at index 1 and we will add a value of 1. See above for - why this takes place after waiting for quiescence of the group. - Relaxed MO is fine because the change comes with no additional - constraints that others would have to observe. */ - __condvar_add_g1_start_relaxed (cond, - (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); - - /* Now reopen the group, thus enabling waiters to again block using the - futex controlled by __g_signals. Release MO so that observers that see - no signals (and thus can block) also see the write __g1_start and thus - that this is now a new group (see __pthread_cond_wait_common for the - matching acquire MO loads). */ - atomic_store_release (cond->__data.__g_signals + g1, 0); + /* Update __g1_start, which closes this group. Relaxed MO is fine because + the change comes with no additional constraints that others would have + to observe. */ + __condvar_add_g1_start_relaxed (cond, old_orig_size); /* At this point, the old G1 is now a valid new G2 (but not in use yet). No old waiter can neither grab a signal nor acquire a reference without @@ -311,9 +236,13 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, g1 ^= 1; *g1index ^= 1; + /* Now advance the new G1 g_signals to the new g1_start, giving it + an effective signal count of 0 to start. */ + atomic_store_release (cond->__data.__g_signals + g1, (unsigned)new_g1_start); + /* These values are just observed by signalers, and thus protected by the lock. */ - unsigned int orig_size = wseq - (old_g1_start + old_orig_size); + unsigned int orig_size = wseq - new_g1_start; __condvar_set_orig_size (cond, orig_size); /* Use and addition to not loose track of cancellations in what was previously G2. */ diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c index 43d6286ecd..fa3a5c3d8f 100644 --- a/nptl/pthread_cond_signal.c +++ b/nptl/pthread_cond_signal.c @@ -69,19 +69,18 @@ ___pthread_cond_signal (pthread_cond_t *cond) bool do_futex_wake = false; /* If G1 is still receiving signals, we put the signal there. If not, we - check if G2 has waiters, and if so, quiesce and switch G1 to the former - G2; if this results in a new G1 with waiters (G2 might have cancellations - already, see __condvar_quiesce_and_switch_g1), we put the signal in the - new G1. */ + check if G2 has waiters, and if so, switch G1 to the former G2; if this + results in a new G1 with waiters (G2 might have cancellations already, + see __condvar_switch_g1), we put the signal in the new G1. */ if ((cond->__data.__g_size[g1] != 0) - || __condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private)) + || __condvar_switch_g1 (cond, wseq, &g1, private)) { /* Add a signal. Relaxed MO is fine because signaling does not need to - establish a happens-before relation (see above). We do not mask the - release-MO store when initializing a group in - __condvar_quiesce_and_switch_g1 because we use an atomic - read-modify-write and thus extend that store's release sequence. */ - atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2); + establish a happens-before relation (see above). We do not mask the + release-MO store when initializing a group in __condvar_switch_g1 + because we use an atomic read-modify-write and thus extend that + store's release sequence. */ + atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 1); cond->__data.__g_size[g1]--; /* TODO Only set it if there are indeed futex waiters. */ do_futex_wake = true; diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c index 66786c7b90..0f1dfcb595 100644 --- a/nptl/pthread_cond_wait.c +++ b/nptl/pthread_cond_wait.c @@ -84,7 +84,7 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, not hold a reference on the group. */ __condvar_acquire_lock (cond, private); - uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); if (g1_start > seq) { /* Our group is closed, so someone provided enough signals for it. @@ -143,23 +143,6 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, } } -/* Wake up any signalers that might be waiting. */ -static void -__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private) -{ - /* Release MO to synchronize-with the acquire load in - __condvar_quiesce_and_switch_g1. */ - if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3) - { - /* Clear the wake-up request flag before waking up. We do not need more - than relaxed MO and it doesn't matter if we apply this for an aliased - group because we wake all futex waiters right after clearing the - flag. */ - atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1); - futex_wake (cond->__data.__g_refs + g, INT_MAX, private); - } -} - /* Clean-up for cancellation of waiters waiting for normal signals. We cancel our registration as a waiter, confirm we have woken up, and re-acquire the mutex. */ @@ -171,8 +154,6 @@ __condvar_cleanup_waiting (void *arg) pthread_cond_t *cond = cbuffer->cond; unsigned g = cbuffer->wseq & 1; - __condvar_dec_grefs (cond, g, cbuffer->private); - __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private); /* FIXME With the current cancellation implementation, it is possible that a thread is cancelled after it has returned from a syscall. This could @@ -238,9 +219,7 @@ __condvar_cleanup_waiting (void *arg) signaled), and a reference count. The group reference count is used to maintain the number of waiters that - are using the group's futex. Before a group can change its role, the - reference count must show that no waiters are using the futex anymore; this - prevents ABA issues on the futex word. + are using the group's futex. To represent which intervals in the waiter sequence the groups cover (and thus also which group slot contains G1 or G2), we use a 64b counter to @@ -251,7 +230,7 @@ __condvar_cleanup_waiting (void *arg) figure out whether they are in a group that has already been completely signaled (i.e., if the current G1 starts at a later position that the waiter's position). Waiters cannot determine whether they are currently - in G2 or G1 -- but they do not have too because all they are interested in + in G2 or G1 -- but they do not have to because all they are interested in is whether there are available signals, and they always start in G2 (whose group slot they know because of the bit in the waiter sequence. Signalers will simply fill the right group until it is completely signaled and can @@ -280,7 +259,6 @@ __condvar_cleanup_waiting (void *arg) * Waiters fetch-add while having acquire the mutex associated with the condvar. Signalers load it and fetch-xor it concurrently. __g1_start: Starting position of G1 (inclusive) - * LSB is index of current G2. * Modified by signalers while having acquired the condvar-internal lock and observed concurrently by waiters. __g1_orig_size: Initial size of G1 @@ -300,11 +278,10 @@ __condvar_cleanup_waiting (void *arg) last reference. * Reference count used by waiters concurrently with signalers that have acquired the condvar-internal lock. - __g_signals: The number of signals that can still be consumed. + __g_signals: The number of signals that can still be consumed, relative to + the current g1_start. (i.e. g1_start with the signal count added) * Used as a futex word by waiters. Used concurrently by waiters and signalers. - * LSB is true iff this group has been completely signaled (i.e., it is - closed). __g_size: Waiters remaining in this group (i.e., which have not been signaled yet. * Accessed by signalers and waiters that cancel waiting (both do so only @@ -328,27 +305,6 @@ __condvar_cleanup_waiting (void *arg) sufficient because if a waiter can see a sufficiently large value, it could have also consume a signal in the waiters group. - Waiters try to grab a signal from __g_signals without holding a reference - count, which can lead to stealing a signal from a more recent group after - their own group was already closed. They cannot always detect whether they - in fact did because they do not know when they stole, but they can - conservatively add a signal back to the group they stole from; if they - did so unnecessarily, all that happens is a spurious wake-up. To make this - even less likely, __g1_start contains the index of the current g2 too, - which allows waiters to check if there aliasing on the group slots; if - there wasn't, they didn't steal from the current G1, which means that the - G1 they stole from must have been already closed and they do not need to - fix anything. - - It is essential that the last field in pthread_cond_t is __g_signals[1]: - The previous condvar used a pointer-sized field in pthread_cond_t, so a - PTHREAD_COND_INITIALIZER from that condvar implementation might only - initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes - in total instead of the 48 we need). __g_signals[1] is not accessed before - the first group switch (G2 starts at index 0), which will set its value to - zero after a harmless fetch-or whose return value is ignored. This - effectively completes initialization. - Limitations: * This condvar isn't designed to allow for more than @@ -379,7 +335,6 @@ static __always_inline int __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, clockid_t clockid, const struct __timespec64 *abstime) { - const int maxspin = 0; int err; int result = 0; @@ -396,8 +351,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, because we do not need to establish any happens-before relation with signalers (see __pthread_cond_signal); modification order alone establishes a total order of waiters/signals. We do need acquire MO - to synchronize with group reinitialization in - __condvar_quiesce_and_switch_g1. */ + to synchronize with group reinitialization in __condvar_switch_g1. */ uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2); /* Find our group's index. We always go into what was G2 when we acquired our position. */ @@ -424,178 +378,64 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, return err; } - /* Now wait until a signal is available in our group or it is closed. - Acquire MO so that if we observe a value of zero written after group - switching in __condvar_quiesce_and_switch_g1, we synchronize with that - store and will see the prior update of __g1_start done while switching - groups too. */ - unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); - do + while (1) { - while (1) - { - /* Spin-wait first. - Note that spinning first without checking whether a timeout - passed might lead to what looks like a spurious wake-up even - though we should return ETIMEDOUT (e.g., if the caller provides - an absolute timeout that is clearly in the past). However, - (1) spurious wake-ups are allowed, (2) it seems unlikely that a - user will (ab)use pthread_cond_wait as a check for whether a - point in time is in the past, and (3) spinning first without - having to compare against the current time seems to be the right - choice from a performance perspective for most use cases. */ - unsigned int spin = maxspin; - while (signals == 0 && spin > 0) - { - /* Check that we are not spinning on a group that's already - closed. */ - if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) - goto done; - - /* TODO Back off. */ - - /* Reload signals. See above for MO. */ - signals = atomic_load_acquire (cond->__data.__g_signals + g); - spin--; - } - - /* If our group will be closed as indicated by the flag on signals, - don't bother grabbing a signal. */ - if (signals & 1) - goto done; - - /* If there is an available signal, don't block. */ - if (signals != 0) - break; - - /* No signals available after spinning, so prepare to block. - We first acquire a group reference and use acquire MO for that so - that we synchronize with the dummy read-modify-write in - __condvar_quiesce_and_switch_g1 if we read from that. In turn, - in this case this will make us see the closed flag on __g_signals - that designates a concurrent attempt to reuse the group's slot. - We use acquire MO for the __g_signals check to make the - __g1_start check work (see spinning above). - Note that the group reference acquisition will not mask the - release MO when decrementing the reference count because we use - an atomic read-modify-write operation and thus extend the release - sequence. */ - atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); - if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0) - || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))) - { - /* Our group is closed. Wake up any signalers that might be - waiting. */ - __condvar_dec_grefs (cond, g, private); - goto done; - } - - // Now block. - struct _pthread_cleanup_buffer buffer; - struct _condvar_cleanup_buffer cbuffer; - cbuffer.wseq = wseq; - cbuffer.cond = cond; - cbuffer.mutex = mutex; - cbuffer.private = private; - __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); - - err = __futex_abstimed_wait_cancelable64 ( - cond->__data.__g_signals + g, 0, clockid, abstime, private); - - __pthread_cleanup_pop (&buffer, 0); - - if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) - { - __condvar_dec_grefs (cond, g, private); - /* If we timed out, we effectively cancel waiting. Note that - we have decremented __g_refs before cancellation, so that a - deadlock between waiting for quiescence of our group in - __condvar_quiesce_and_switch_g1 and us trying to acquire - the lock during cancellation is not possible. */ - __condvar_cancel_waiting (cond, seq, g, private); - result = err; - goto done; - } - else - __condvar_dec_grefs (cond, g, private); - - /* Reload signals. See above for MO. */ - signals = atomic_load_acquire (cond->__data.__g_signals + g); + /* Now wait until a signal is available in our group or it is closed. + Acquire MO so that if we observe (signals == lowseq) after group + switching in __condvar_switch_g1, we synchronize with that store and + will see the prior update of __g1_start done while switching groups + too. */ + unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + + if (seq < g1_start) + { + /* If the group is closed already, + then this waiter originally had enough extra signals to + consume, up until the time its group was closed. */ + break; + } + + /* If there is an available signal, don't block. + If __g1_start has advanced at all, then we must be in G1 + by now, perhaps in the process of switching back to an older + G2, but in either case we're allowed to consume the available + signal and should not block anymore. */ + if ((int)(signals - (unsigned int)g1_start) > 0) + { + /* Try to grab a signal. See above for MO. (if we do another loop + iteration we need to see the correct value of g1_start) */ + if (atomic_compare_exchange_weak_acquire ( + cond->__data.__g_signals + g, + &signals, signals - 1)) + break; + else + continue; } + // Now block. + struct _pthread_cleanup_buffer buffer; + struct _condvar_cleanup_buffer cbuffer; + cbuffer.wseq = wseq; + cbuffer.cond = cond; + cbuffer.mutex = mutex; + cbuffer.private = private; + __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); + + err = __futex_abstimed_wait_cancelable64 ( + cond->__data.__g_signals + g, signals, clockid, abstime, private); + + __pthread_cleanup_pop (&buffer, 0); + + if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) + { + /* If we timed out, we effectively cancel waiting. */ + __condvar_cancel_waiting (cond, seq, g, private); + result = err; + break; + } } - /* Try to grab a signal. Use acquire MO so that we see an up-to-date value - of __g1_start below (see spinning above for a similar case). In - particular, if we steal from a more recent group, we will also see a - more recent __g1_start below. */ - while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, - &signals, signals - 2)); - - /* We consumed a signal but we could have consumed from a more recent group - that aliased with ours due to being in the same group slot. If this - might be the case our group must be closed as visible through - __g1_start. */ - uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); - if (seq < (g1_start >> 1)) - { - /* We potentially stole a signal from a more recent group but we do not - know which group we really consumed from. - We do not care about groups older than current G1 because they are - closed; we could have stolen from these, but then we just add a - spurious wake-up for the current groups. - We will never steal a signal from current G2 that was really intended - for G2 because G2 never receives signals (until it becomes G1). We - could have stolen a signal from G2 that was conservatively added by a - previous waiter that also thought it stole a signal -- but given that - that signal was added unnecessarily, it's not a problem if we steal - it. - Thus, the remaining case is that we could have stolen from the current - G1, where "current" means the __g1_start value we observed. However, - if the current G1 does not have the same slot index as we do, we did - not steal from it and do not need to undo that. This is the reason - for putting a bit with G2's index into__g1_start as well. */ - if (((g1_start & 1) ^ 1) == g) - { - /* We have to conservatively undo our potential mistake of stealing - a signal. We can stop trying to do that when the current G1 - changes because other spinning waiters will notice this too and - __condvar_quiesce_and_switch_g1 has checked that there are no - futex waiters anymore before switching G1. - Relaxed MO is fine for the __g1_start load because we need to - merely be able to observe this fact and not have to observe - something else as well. - ??? Would it help to spin for a little while to see whether the - current G1 gets closed? This might be worthwhile if the group is - small or close to being closed. */ - unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g); - while (__condvar_load_g1_start_relaxed (cond) == g1_start) - { - /* Try to add a signal. We don't need to acquire the lock - because at worst we can cause a spurious wake-up. If the - group is in the process of being closed (LSB is true), this - has an effect similar to us adding a signal. */ - if (((s & 1) != 0) - || atomic_compare_exchange_weak_relaxed - (cond->__data.__g_signals + g, &s, s + 2)) - { - /* If we added a signal, we also need to add a wake-up on - the futex. We also need to do that if we skipped adding - a signal because the group is being closed because - while __condvar_quiesce_and_switch_g1 could have closed - the group, it might still be waiting for futex waiters to - leave (and one of those waiters might be the one we stole - the signal from, which cause it to block using the - futex). */ - futex_wake (cond->__data.__g_signals + g, 1, private); - break; - } - /* TODO Back off. */ - } - } - } - - done: /* Confirm that we have been woken. We do that before acquiring the mutex to allow for execution of pthread_cond_destroy while having acquired the diff --git a/nptl/pthread_getattr_np.c b/nptl/pthread_getattr_np.c index 1e91874767..3ce34437bc 100644 --- a/nptl/pthread_getattr_np.c +++ b/nptl/pthread_getattr_np.c @@ -145,9 +145,9 @@ __pthread_getattr_np (pthread_t thread_id, pthread_attr_t *attr) > (size_t) iattr->stackaddr - last_to) iattr->stacksize = (size_t) iattr->stackaddr - last_to; #else - /* The limit might be too high. */ + /* The limit might be too low. */ if ((size_t) iattr->stacksize - > to - (size_t) iattr->stackaddr) + < to - (size_t) iattr->stackaddr) iattr->stacksize = to - (size_t) iattr->stackaddr; #endif /* We succeed and no need to look further. */ diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c index 1336e9c79d..bdcb45c536 100644 --- a/nptl/tst-cond22.c +++ b/nptl/tst-cond22.c @@ -106,13 +106,13 @@ do_test (void) status = 1; } - printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n", + printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u, %u/%u, %u, %u }\n", c.__data.__wseq.__value32.__high, c.__data.__wseq.__value32.__low, c.__data.__g1_start.__value32.__high, c.__data.__g1_start.__value32.__low, - c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], - c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], + c.__data.__g_signals[0], c.__data.__g_size[0], + c.__data.__g_signals[1], c.__data.__g_size[1], c.__data.__g1_orig_size, c.__data.__wrefs); if (pthread_create (&th, NULL, tf, (void *) 1l) != 0) @@ -152,13 +152,13 @@ do_test (void) status = 1; } - printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n", + printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u, %u/%u, %u, %u }\n", c.__data.__wseq.__value32.__high, c.__data.__wseq.__value32.__low, c.__data.__g1_start.__value32.__high, c.__data.__g1_start.__value32.__low, - c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], - c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], + c.__data.__g_signals[0], c.__data.__g_size[0], + c.__data.__g_signals[1], c.__data.__g_size[1], c.__data.__g1_orig_size, c.__data.__wrefs); return status; diff --git a/nss/getXXbyYY_r.c b/nss/getXXbyYY_r.c index fe7d5b7d0e..3a15b1a4ae 100644 --- a/nss/getXXbyYY_r.c +++ b/nss/getXXbyYY_r.c @@ -157,19 +157,15 @@ __merge_einval (LOOKUP_TYPE *a, #define CHECK_MERGE(err, status) \ ({ \ - do \ + if (err) \ { \ - if (err) \ - { \ - __set_errno (err); \ - if (err == ERANGE) \ - status = NSS_STATUS_TRYAGAIN; \ - else \ - status = NSS_STATUS_UNAVAIL; \ - break; \ - } \ + __set_errno (err); \ + if (err == ERANGE) \ + status = NSS_STATUS_TRYAGAIN; \ + else \ + status = NSS_STATUS_UNAVAIL; \ + break; \ } \ - while (0); \ }) /* Type of the lookup function we need here. */ diff --git a/nss/getaddrinfo.c b/nss/getaddrinfo.c index 3ccd3905fa..542b362af3 100644 --- a/nss/getaddrinfo.c +++ b/nss/getaddrinfo.c @@ -1865,6 +1865,22 @@ scopecmp (const void *p1, const void *p2) return 1; } +/* Return true if PTR points to a valid decimal value string and + store the value in *VALUE_P. Otherwise, return false. */ + +static bool +valid_decimal_value (const char *str, unsigned long int *value_p) +{ + char *endp; + unsigned long int value = strtoul (str, &endp, 10); + if (str == endp + || *endp != '\0' + || (value == ULONG_MAX && errno == ERANGE)) + return false; + *value_p = value; + return true; +} + static bool add_prefixlist (struct prefixlist **listp, size_t *lenp, bool *nullbitsp, char *val1, char *val2, char **pos) @@ -1872,7 +1888,6 @@ add_prefixlist (struct prefixlist **listp, size_t *lenp, bool *nullbitsp, struct in6_addr prefix; unsigned long int bits; unsigned long int val; - char *endp; bits = 128; __set_errno (0); @@ -1881,14 +1896,9 @@ add_prefixlist (struct prefixlist **listp, size_t *lenp, bool *nullbitsp, *cp++ = '\0'; *pos = cp; if (inet_pton (AF_INET6, val1, &prefix) - && (cp == NULL - || (bits = strtoul (cp, &endp, 10)) != ULONG_MAX - || errno != ERANGE) - && *endp == '\0' + && (cp == NULL || valid_decimal_value (cp, &bits)) && bits <= 128 - && ((val = strtoul (val2, &endp, 10)) != ULONG_MAX - || errno != ERANGE) - && *endp == '\0' + && valid_decimal_value (val2, &val) && val <= INT_MAX) { struct prefixlist *newp = malloc (sizeof (*newp)); @@ -2031,7 +2041,6 @@ gaiconf_init (void) struct in6_addr prefix; unsigned long int bits; unsigned long int val; - char *endp; bits = 32; __set_errno (0); @@ -2042,15 +2051,10 @@ gaiconf_init (void) { bits = 128; if (IN6_IS_ADDR_V4MAPPED (&prefix) - && (cp == NULL - || (bits = strtoul (cp, &endp, 10)) != ULONG_MAX - || errno != ERANGE) - && *endp == '\0' + && (cp == NULL || valid_decimal_value (cp, &bits)) && bits >= 96 && bits <= 128 - && ((val = strtoul (val2, &endp, 10)) != ULONG_MAX - || errno != ERANGE) - && *endp == '\0' + && valid_decimal_value (val2, &val) && val <= INT_MAX) { if (!add_scopelist (&scopelist, &nscopelist, @@ -2064,14 +2068,9 @@ gaiconf_init (void) } } else if (inet_pton (AF_INET, val1, &prefix.s6_addr32[3]) - && (cp == NULL - || (bits = strtoul (cp, &endp, 10)) != ULONG_MAX - || errno != ERANGE) - && *endp == '\0' + && (cp == NULL || valid_decimal_value (cp, &bits)) && bits <= 32 - && ((val = strtoul (val2, &endp, 10)) != ULONG_MAX - || errno != ERANGE) - && *endp == '\0' + && valid_decimal_value (val2, &val) && val <= INT_MAX) { if (!add_scopelist (&scopelist, &nscopelist, diff --git a/posix/Makefile b/posix/Makefile index 2c598cd20a..830278a423 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -303,6 +303,7 @@ tests := \ tst-posix_spawn-setsid \ tst-preadwrite \ tst-preadwrite64 \ + tst-regcomp-bracket-free \ tst-regcomp-truncated \ tst-regex \ tst-regex2 \ diff --git a/posix/fork.c b/posix/fork.c index 298765a1ff..cf9b80e7c0 100644 --- a/posix/fork.c +++ b/posix/fork.c @@ -62,6 +62,7 @@ __libc_fork (void) call_function_static_weak (__nss_database_fork_prepare_parent, &nss_database_data); + _IO_proc_file_chain_lock (); _IO_list_lock (); /* Acquire malloc locks. This needs to come last because fork @@ -92,6 +93,7 @@ __libc_fork (void) /* Reset locks in the I/O code. */ _IO_list_resetlock (); + _IO_proc_file_chain_resetlock (); call_function_static_weak (__nss_database_fork_subprocess, &nss_database_data); @@ -121,6 +123,7 @@ __libc_fork (void) /* We execute this even if the 'fork' call failed. */ _IO_list_unlock (); + _IO_proc_file_chain_unlock (); } /* Run the handlers registered for the parent. */ diff --git a/posix/regcomp.c b/posix/regcomp.c index 5380d3c7b9..6595bb3c0d 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -3384,6 +3384,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, { #ifdef RE_ENABLE_I18N free_charset (mbcset); + mbcset = NULL; #endif /* Build a tree for simple bracket. */ br_token.type = SIMPLE_BRACKET; @@ -3399,7 +3400,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, parse_bracket_exp_free_return: re_free (sbcset); #ifdef RE_ENABLE_I18N - free_charset (mbcset); + if (__glibc_likely (mbcset != NULL)) + free_charset (mbcset); #endif /* RE_ENABLE_I18N */ return NULL; } diff --git a/posix/tst-regcomp-bracket-free.c b/posix/tst-regcomp-bracket-free.c new file mode 100644 index 0000000000..3c091d8c44 --- /dev/null +++ b/posix/tst-regcomp-bracket-free.c @@ -0,0 +1,176 @@ +/* Test regcomp bracket parsing with injected allocation failures (bug 33185). + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This test invokes regcomp multiple times, failing one memory + allocation in each call. The function call should fail with + REG_ESPACE (or succeed if it can recover from the allocation + failure). Previously, there was double-free bug. */ + +#include +#include +#include +#include +#include +#include +#include + +/* Data structure allocated via MAP_SHARED, so that writes from the + subprocess are visible. */ +struct shared_data +{ + /* Number of tracked allocations performed so far. */ + volatile unsigned int allocation_count; + + /* If this number is reached, one allocation fails. */ + volatile unsigned int failing_allocation; + + /* The subprocess stores the expected name here. */ + char name[100]; +}; + +/* Allocation count in shared mapping. */ +static struct shared_data *shared; + +/* Returns true if a failure should be injected for this allocation. */ +static bool +fail_this_allocation (void) +{ + if (shared != NULL) + { + unsigned int count = shared->allocation_count; + shared->allocation_count = count + 1; + return count == shared->failing_allocation; + } + else + return false; +} + +/* Failure-injecting wrappers for allocation functions used by glibc. */ + +void * +malloc (size_t size) +{ + if (fail_this_allocation ()) + { + errno = ENOMEM; + return NULL; + } + extern __typeof (malloc) __libc_malloc; + return __libc_malloc (size); +} + +void * +calloc (size_t a, size_t b) +{ + if (fail_this_allocation ()) + { + errno = ENOMEM; + return NULL; + } + extern __typeof (calloc) __libc_calloc; + return __libc_calloc (a, b); +} + +void * +realloc (void *ptr, size_t size) +{ + if (fail_this_allocation ()) + { + errno = ENOMEM; + return NULL; + } + extern __typeof (realloc) __libc_realloc; + return __libc_realloc (ptr, size); +} + +/* No-op subprocess to verify that support_isolate_in_subprocess does + not perform any heap allocations. */ +static void +no_op (void *ignored) +{ +} + +/* Perform a regcomp call in a subprocess. Used to count its + allocations. */ +static void +initialize (void *regexp1) +{ + const char *regexp = regexp1; + + shared->allocation_count = 0; + + regex_t reg; + TEST_COMPARE (regcomp (®, regexp, 0), 0); +} + +/* Perform regcomp in a subprocess with fault injection. */ +static void +test_in_subprocess (void *regexp1) +{ + const char *regexp = regexp1; + unsigned int inject_at = shared->failing_allocation; + + regex_t reg; + int ret = regcomp (®, regexp, 0); + + if (ret != 0) + { + TEST_COMPARE (ret, REG_ESPACE); + printf ("info: allocation %u failure results in return value %d," + " error %s (%d)\n", + inject_at, ret, strerrorname_np (errno), errno); + } +} + +static int +do_test (void) +{ + char regexp[] = "[:alpha:]"; + + shared = support_shared_allocate (sizeof (*shared)); + + /* Disable fault injection. */ + shared->failing_allocation = ~0U; + + support_isolate_in_subprocess (no_op, NULL); + TEST_COMPARE (shared->allocation_count, 0); + + support_isolate_in_subprocess (initialize, regexp); + + /* The number of allocations in the successful case, plus some + slack. Once the number of expected allocations is exceeded, + injecting further failures does not make a difference. */ + unsigned int maximum_allocation_count = shared->allocation_count; + printf ("info: successful call performs %u allocations\n", + maximum_allocation_count); + maximum_allocation_count += 10; + + for (unsigned int inject_at = 0; inject_at <= maximum_allocation_count; + ++inject_at) + { + shared->allocation_count = 0; + shared->failing_allocation = inject_at; + support_isolate_in_subprocess (test_in_subprocess, regexp); + } + + support_shared_free (shared); + + return 0; +} + +#include diff --git a/posix/tst-truncate-common.c b/posix/tst-truncate-common.c index b774fa46b8..b8c561ffdb 100644 --- a/posix/tst-truncate-common.c +++ b/posix/tst-truncate-common.c @@ -21,6 +21,8 @@ #include #include +#include + static void do_prepare (void); #define PREPARE(argc, argv) do_prepare () static int do_test (void); @@ -42,9 +44,6 @@ do_prepare (void) } } -#define FAIL(str) \ - do { printf ("error: %s (line %d)\n", str, __LINE__); return 1; } while (0) - static int do_test_with_offset (off_t offset) { @@ -54,35 +53,35 @@ do_test_with_offset (off_t offset) memset (buf, 0xcf, sizeof (buf)); if (pwrite (temp_fd, buf, sizeof (buf), offset) != sizeof (buf)) - FAIL ("write failed"); + FAIL_RET ("write failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + sizeof (buf))) - FAIL ("initial size wrong"); + FAIL_RET ("initial size wrong"); if (ftruncate (temp_fd, offset + 800) < 0) - FAIL ("size reduction with ftruncate failed"); + FAIL_RET ("size reduction with ftruncate failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + 800)) - FAIL ("size after reduction with ftruncate is incorrect"); + FAIL_RET ("size after reduction with ftruncate is incorrect"); /* The following test covers more than POSIX. POSIX does not require that ftruncate() can increase the file size. But we are testing Unix systems. */ if (ftruncate (temp_fd, offset + 1200) < 0) - FAIL ("size increate with ftruncate failed"); + FAIL_RET ("size increate with ftruncate failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + 1200)) - FAIL ("size after increase is incorrect"); + FAIL_RET ("size after increase is incorrect"); if (truncate (temp_filename, offset + 800) < 0) - FAIL ("size reduction with truncate failed"); + FAIL_RET ("size reduction with truncate failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + 800)) - FAIL ("size after reduction with truncate incorrect"); + FAIL_RET ("size after reduction with truncate incorrect"); /* The following test covers more than POSIX. POSIX does not require that truncate() can increase the file size. But we are testing Unix systems. */ if (truncate (temp_filename, (offset + 1200)) < 0) - FAIL ("size increase with truncate failed"); + FAIL_RET ("size increase with truncate failed"); if (fstat (temp_fd, &st) < 0 || st.st_size != (offset + 1200)) - FAIL ("size increase with truncate is incorrect"); + FAIL_RET ("size increase with truncate is incorrect"); return 0; } diff --git a/resolv/Makefile b/resolv/Makefile index 5f44f5896b..abff7fc007 100644 --- a/resolv/Makefile +++ b/resolv/Makefile @@ -106,6 +106,8 @@ tests += \ tst-resolv-nondecimal \ tst-resolv-res_init-multi \ tst-resolv-search \ + tst-resolv-semi-failure \ + tst-resolv-short-response \ tst-resolv-trailing \ # This test calls __res_context_send directly, which is not exported @@ -299,6 +301,10 @@ $(objpfx)tst-resolv-nondecimal: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-qtypes: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-rotate: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-search: $(objpfx)libresolv.so $(shared-thread-library) +$(objpfx)tst-resolv-semi-failure: $(objpfx)libresolv.so \ + $(shared-thread-library) +$(objpfx)tst-resolv-short-response: $(objpfx)libresolv.so \ + $(shared-thread-library) $(objpfx)tst-resolv-trailing: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-threads: $(objpfx)libresolv.so $(shared-thread-library) $(objpfx)tst-resolv-txnid-collision: $(objpfx)libresolv.a \ diff --git a/resolv/res_send.c b/resolv/res_send.c index ea7cf192b2..9c77613f37 100644 --- a/resolv/res_send.c +++ b/resolv/res_send.c @@ -1199,19 +1199,30 @@ send_dg(res_state statp, } /* Check for the correct header layout and a matching - question. */ + question. Some recursive resolvers send REFUSED + without copying back the question section + (producing a response that is only HFIXEDSZ bytes + long). Skip query matching in this case. */ + bool thisansp_error = (anhp->rcode == SERVFAIL || + anhp->rcode == NOTIMP || + anhp->rcode == REFUSED); + bool skip_query_match = (*thisresplenp == HFIXEDSZ + && ntohs (anhp->qdcount) == 0 + && thisansp_error); int matching_query = 0; /* Default to no matching query. */ if (!recvresp1 && anhp->id == hp->id - && __libc_res_queriesmatch (buf, buf + buflen, - *thisansp, - *thisansp + *thisanssizp)) + && (skip_query_match + || __libc_res_queriesmatch (buf, buf + buflen, + *thisansp, + *thisansp + *thisanssizp))) matching_query = 1; if (!recvresp2 && anhp->id == hp2->id - && __libc_res_queriesmatch (buf2, buf2 + buflen2, - *thisansp, - *thisansp + *thisanssizp)) + && (skip_query_match + || __libc_res_queriesmatch (buf2, buf2 + buflen2, + *thisansp, + *thisansp + *thisanssizp))) matching_query = 2; if (matching_query == 0) /* Spurious UDP packet. Drop it and continue @@ -1221,15 +1232,13 @@ send_dg(res_state statp, goto wait; } - if (anhp->rcode == SERVFAIL || - anhp->rcode == NOTIMP || - anhp->rcode == REFUSED) { + if (thisansp_error) { next_ns: if (recvresp1 || (buf2 != NULL && recvresp2)) { *resplen2 = 0; return resplen; } - if (buf2 != NULL) + if (buf2 != NULL && !single_request) { /* No data from the first reply. */ resplen = 0; diff --git a/resolv/tst-resolv-semi-failure.c b/resolv/tst-resolv-semi-failure.c new file mode 100644 index 0000000000..aa9798b5a7 --- /dev/null +++ b/resolv/tst-resolv-semi-failure.c @@ -0,0 +1,133 @@ +/* Test parallel failure/success responses (bug 30081). + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* The rcode in the initial response. */ +static volatile int rcode; + +/* Whether to fail the initial A query (!fail_aaaa) or the initial + AAAA query (fail_aaaa). */ +static volatile bool fail_aaaa; + +static void +response (const struct resolv_response_context *ctx, + struct resolv_response_builder *b, + const char *qname, uint16_t qclass, uint16_t qtype) +{ + /* Handle the failing query. */ + if ((fail_aaaa && qtype == T_AAAA) && ctx->server_index == 0) + { + struct resolv_response_flags flags = {.rcode = rcode}; + resolv_response_init (b, flags); + return; + } + + /* Otherwise produce a response. */ + resolv_response_init (b, (struct resolv_response_flags) {}); + resolv_response_add_question (b, qname, qclass, qtype); + resolv_response_section (b, ns_s_an); + resolv_response_open_record (b, qname, qclass, qtype, 0); + switch (qtype) + { + case T_A: + { + char ipv4[4] = {192, 0, 2, 17}; + resolv_response_add_data (b, &ipv4, sizeof (ipv4)); + } + break; + case T_AAAA: + { + char ipv6[16] + = {0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + resolv_response_add_data (b, &ipv6, sizeof (ipv6)); + } + break; + default: + FAIL_EXIT1 ("unexpected TYPE%d query", qtype); + } + resolv_response_close_record (b); +} + +static void +check_one (void) +{ + + /* The buggy 1-second query timeout results in 30 seconds of delay, + which triggers are test timeout failure. */ + for (int i = 0; i < 30; ++i) + { + static const struct addrinfo hints = + { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *ai; + int ret = getaddrinfo ("www.example", "80", &hints, &ai); + const char *expected; + if (ret == 0 && ai->ai_next != NULL) + expected = ("address: STREAM/TCP 192.0.2.17 80\n" + "address: STREAM/TCP 2001:db8::1 80\n"); + else + /* Only one response because the AAAA lookup failure is + treated as an ignoreable error. */ + expected = "address: STREAM/TCP 192.0.2.17 80\n"; + check_addrinfo ("www.example", ai, ret, expected); + if (ret == 0) + freeaddrinfo (ai); + } +} + +static int +do_test (void) +{ + for (int do_single_lookup = 0; do_single_lookup < 2; ++do_single_lookup) + { + struct resolv_test *aux = resolv_test_start + ((struct resolv_redirect_config) + { + .response_callback = response, + }); + + if (do_single_lookup) + _res.options |= RES_SNGLKUP; + + for (int do_fail_aaaa = 0; do_fail_aaaa < 2; ++do_fail_aaaa) + { + fail_aaaa = do_fail_aaaa; + + rcode = 2; /* SERVFAIL. */ + check_one (); + + rcode = 4; /* NOTIMP. */ + check_one (); + + rcode = 5; /* REFUSED. */ + check_one (); + } + + resolv_test_end (aux); + } + + return 0; +} + +#include diff --git a/resolv/tst-resolv-short-response.c b/resolv/tst-resolv-short-response.c new file mode 100644 index 0000000000..9b06b0c176 --- /dev/null +++ b/resolv/tst-resolv-short-response.c @@ -0,0 +1,126 @@ +/* Test for spurious timeouts with short 12-byte responses (bug 31890). + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* The rcode in the initial response. */ +static volatile int rcode; + +static void +response (const struct resolv_response_context *ctx, + struct resolv_response_builder *b, + const char *qname, uint16_t qclass, uint16_t qtype) +{ + switch (ctx->server_index) + { + case 0: + /* First server times out. */ + { + struct resolv_response_flags flags = {.rcode = rcode}; + resolv_response_init (b, flags); + } + break; + case 1: + /* Second server sends reply. */ + resolv_response_init (b, (struct resolv_response_flags) {}); + resolv_response_add_question (b, qname, qclass, qtype); + resolv_response_section (b, ns_s_an); + resolv_response_open_record (b, qname, qclass, qtype, 0); + switch (qtype) + { + case T_A: + { + char ipv4[4] = {192, 0, 2, 17}; + resolv_response_add_data (b, &ipv4, sizeof (ipv4)); + } + break; + case T_AAAA: + { + char ipv6[16] + = {0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + resolv_response_add_data (b, &ipv6, sizeof (ipv6)); + } + break; + default: + FAIL_EXIT1 ("unexpected TYPE%d query", qtype); + } + resolv_response_close_record (b); + break; + default: + FAIL_EXIT1 ("unexpected query to server %d", ctx->server_index); + } +} + +static void +check_one (void) +{ + + /* The buggy 1-second query timeout results in 30 seconds of delay, + which triggers a test timeout failure. */ + for (int i = 0; i < 10; ++i) + { + check_hostent ("www.example", gethostbyname ("www.example"), + "name: www.example\n" + "address: 192.0.2.17\n"); + check_hostent ("www.example", gethostbyname2 ("www.example", AF_INET6), + "name: www.example\n" + "address: 2001:db8::1\n"); + static const struct addrinfo hints = + { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *ai; + int ret = getaddrinfo ("www.example", "80", &hints, &ai); + check_addrinfo ("www.example", ai, ret, + "address: STREAM/TCP 192.0.2.17 80\n" + "address: STREAM/TCP 2001:db8::1 80\n"); + if (ret == 0) + freeaddrinfo (ai); + } +} + +static int +do_test (void) +{ + struct resolv_test *aux = resolv_test_start + ((struct resolv_redirect_config) + { + .response_callback = response, + }); + + _res.options |= RES_SNGLKUP; + + rcode = 2; /* SERVFAIL. */ + check_one (); + + rcode = 4; /* NOTIMP. */ + check_one (); + + rcode = 5; /* REFUSED. */ + check_one (); + + resolv_test_end (aux); + + return 0; +} + +#include diff --git a/socket/bits/socket2.h b/socket/bits/socket2.h index 04780f320e..bd91647f37 100644 --- a/socket/bits/socket2.h +++ b/socket/bits/socket2.h @@ -37,14 +37,14 @@ recv (int __fd, __fortify_clang_overload_arg0 (void *, ,__buf), size_t __n, "recv called with bigger length than " "size of destination buffer") { - size_t sz = __glibc_objsize0 (__buf); - if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) + size_t __sz = __glibc_objsize0 (__buf); + if (__glibc_safe_or_unknown_len (__n, sizeof (char), __sz)) return __recv_alias (__fd, __buf, __n, __flags); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (char), sz)) - return __recv_chk_warn (__fd, __buf, __n, sz, __flags); + if (__glibc_unsafe_len (__n, sizeof (char), __sz)) + return __recv_chk_warn (__fd, __buf, __n, __sz, __flags); #endif - return __recv_chk (__fd, __buf, __n, sz, __flags); + return __recv_chk (__fd, __buf, __n, __sz, __flags); } extern ssize_t __recvfrom_chk (int __fd, void *__restrict __buf, size_t __n, @@ -71,13 +71,13 @@ recvfrom (int __fd, __fortify_clang_overload_arg0 (void *, __restrict, __buf), "recvfrom called with bigger length " "than size of destination buffer") { - size_t sz = __glibc_objsize0 (__buf); - if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) + size_t __sz = __glibc_objsize0 (__buf); + if (__glibc_safe_or_unknown_len (__n, sizeof (char), __sz)) return __recvfrom_alias (__fd, __buf, __n, __flags, __addr, __addr_len); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (char), sz)) - return __recvfrom_chk_warn (__fd, __buf, __n, sz, __flags, __addr, + if (__glibc_unsafe_len (__n, sizeof (char), __sz)) + return __recvfrom_chk_warn (__fd, __buf, __n, __sz, __flags, __addr, __addr_len); #endif - return __recvfrom_chk (__fd, __buf, __n, sz, __flags, __addr, __addr_len); + return __recvfrom_chk (__fd, __buf, __n, __sz, __flags, __addr, __addr_len); } diff --git a/stdio-common/Makefile b/stdio-common/Makefile index a63c05a120..3396090be1 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -207,6 +207,7 @@ tests := \ tst-cookie \ tst-dprintf-length \ tst-fdopen \ + tst-fdopen2 \ tst-ferror \ tst-fgets \ tst-fileno \ @@ -216,6 +217,7 @@ tests := \ tst-fmemopen4 \ tst-fphex \ tst-fphex-wide \ + tst-fread \ tst-fseek \ tst-fwrite \ tst-gets \ @@ -240,6 +242,7 @@ tests := \ tst-scanf-binary-c23 \ tst-scanf-binary-gnu11 \ tst-scanf-binary-gnu89 \ + tst-scanf-bz27650 \ tst-scanf-intn \ tst-scanf-round \ tst-scanf-to_inpunct \ @@ -253,6 +256,7 @@ tests := \ tst-swscanf \ tst-tmpnam \ tst-ungetc \ + tst-ungetc-leak \ tst-unlockedio \ tst-vfprintf-mbs-prec \ tst-vfprintf-user-type \ @@ -315,6 +319,7 @@ tests-special += \ $(objpfx)tst-printf-bz25691-mem.out \ $(objpfx)tst-printf-fp-free-mem.out \ $(objpfx)tst-printf-fp-leak-mem.out \ + $(objpfx)tst-ungetc-leak-mem.out \ $(objpfx)tst-vfprintf-width-prec-mem.out \ # tests-special @@ -328,6 +333,9 @@ generated += \ tst-printf-fp-free.mtrace \ tst-printf-fp-leak-mem.out \ tst-printf-fp-leak.mtrace \ + tst-scanf-bz27650.mtrace \ + tst-ungetc-leak-mem.out \ + tst-ungetc-leak.mtrace \ tst-vfprintf-width-prec-mem.out \ tst-vfprintf-width-prec.mtrace \ # generated @@ -419,6 +427,12 @@ tst-printf-fp-free-ENV = \ tst-printf-fp-leak-ENV = \ MALLOC_TRACE=$(objpfx)tst-printf-fp-leak.mtrace \ LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so +tst-scanf-bz27650-ENV = \ + MALLOC_TRACE=$(objpfx)tst-scanf-bz27650.mtrace \ + LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so +tst-ungetc-leak-ENV = \ + MALLOC_TRACE=$(objpfx)tst-ungetc-leak.mtrace \ + LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so $(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc $(SHELL) $< $(common-objpfx) '$(test-program-prefix)'; \ diff --git a/stdio-common/printf-parsemb.c b/stdio-common/printf-parsemb.c index ab9fafb5ec..8db18f11b3 100644 --- a/stdio-common/printf-parsemb.c +++ b/stdio-common/printf-parsemb.c @@ -17,6 +17,7 @@ . */ #include +#include #include #include #include diff --git a/stdio-common/tst-fdopen2.c b/stdio-common/tst-fdopen2.c new file mode 100644 index 0000000000..0c6625f258 --- /dev/null +++ b/stdio-common/tst-fdopen2.c @@ -0,0 +1,246 @@ +/* Test the fdopen function. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include + +char *tmp_dir; +char *path_to_file; + +void +prepare_tmp_dir (void) +{ + tmp_dir = support_create_temp_directory ("tst-fdopen2"); + path_to_file = xasprintf ("%s/tst-fdopen2.txt", tmp_dir); +} + +/* open temp file descriptor with mode. */ +int +open_tmp_fd (int mode) +{ + int fd = xopen (path_to_file, mode, 0644); + return fd; +} + + +/* close and remove temp file with close. */ +void +close_tmp_fd (int fd) +{ + xclose (fd); + xunlink (path_to_file); +} + +/* close and remove temp file with fclose. */ +void +close_tmp_fp (FILE *fp) +{ + fclose (fp); + xunlink (path_to_file); +} + +/* test "w" fdopen mode. */ +void +do_test_fdopen_w (void) +{ + int fd, ret; + FILE *fp; + fd = open_tmp_fd (O_WRONLY | O_CREAT | O_TRUNC); + + /* test mode mismatch. */ + fp = fdopen (fd, "r"); + if (fp != NULL || errno != EINVAL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, r) should fail with EINVAL: %m", fd); + } + + fp = fdopen (fd, "w"); + if (fp == NULL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, w): %m", fd); + } + + const void *buf = "AAAA"; + ret = fwrite (buf, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fwrite (): %m"); + } + + unsigned char buf2[4]; + rewind (fp); + clearerr (fp); + /* fread should fail in "w" mode */ + ret = fread (buf2, 1, 4, fp); + if (ret != 0 || ferror (fp) == 0) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fread should fail in \"w\" mode"); + } + + fclose (fp); +} + +/* test "r" fdopen mode. */ +void +do_test_fdopen_r (void) +{ + int fd, ret; + FILE *fp; + fd = open_tmp_fd (O_RDONLY); + + /* test mode mismatch. */ + fp = fdopen (fd, "w"); + if (fp != NULL || errno != EINVAL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, w) should fail with EINVAL: %m", fd); + } + + fp = fdopen (fd, "r"); + if (fp == NULL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, w): %m", fd); + } + + const void *buf = "BBBB"; + /* fwrite should fail in "r" mode. */ + ret = fwrite (buf, 1, 4, fp); + if (ret != 0 || ferror (fp) == 0) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fwrite should fail in \"r\" mode"); + } + + unsigned char buf2[4]; + ret = fread (buf2, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fread (): %m"); + } + + fclose (fp); +} + +/* test "a" fdopen mode. */ +void +do_test_fdopen_a (void) +{ + int fd, ret; + FILE *fp; + fd = open_tmp_fd (O_WRONLY | O_CREAT | O_APPEND); + + /* test mode mismatch. */ + fp = fdopen (fd, "r+"); + if (fp != NULL || errno != EINVAL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, \"r+\") should fail with EINVAL: %m", fd); + } + + fp = fdopen (fd, "a"); + if (fp == NULL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, w): %m", fd); + } + + const void *buf = "CCCC"; + ret = fwrite (buf, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fwrite (): %m"); + } + + /* fread should fail in "a" mode. */ + unsigned char buf2[4]; + clearerr (fp); + ret = fread (buf2, 1, 4, fp); + if (ret != 0 || ferror (fp) == 0) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fread should fail \"a\" mode"); + } + + fclose (fp); +} + +void +do_test_fdopen_mode (int mode, const char *fmode) +{ + int fd, ret; + FILE *fp; + fd = open_tmp_fd (mode); + + fp = fdopen (fd, fmode); + if (fp == NULL) + { + close_tmp_fd (fd); + FAIL_EXIT1 ("fdopen (%d, %s): %m", fd, fmode); + } + + const void *buf = "EEEE"; + ret = fwrite (buf, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fwrite () in mode:%s returns %d: %m", fmode, ret); + } + + rewind (fp); + unsigned char buf2[4]; + ret = fread (buf2, 1, 4, fp); + if (ret != 4) + { + close_tmp_fp (fp); + FAIL_EXIT1 ("fread () in mode:%s returns %d: %m", fmode, ret); + } + + fclose (fp); +} + +static int +do_test (void) +{ + + prepare_tmp_dir (); + + do_test_fdopen_w (); + do_test_fdopen_r (); + do_test_fdopen_a (); + + /* test r+ w+ a+ fdopen modes. */ + do_test_fdopen_mode (O_RDWR, "r+"); + do_test_fdopen_mode (O_RDWR | O_CREAT | O_TRUNC, "w+"); + do_test_fdopen_mode (O_RDWR | O_CREAT | O_APPEND, "a+"); + xunlink (path_to_file); + return 0; +} + +#include diff --git a/stdio-common/tst-fread.c b/stdio-common/tst-fread.c new file mode 100644 index 0000000000..4d9a7895f6 --- /dev/null +++ b/stdio-common/tst-fread.c @@ -0,0 +1,134 @@ +/* Test fread. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int +do_test (void) +{ + char *temp_dir = support_create_temp_directory ("tst-fread"); + char *file1 = xasprintf ("%s/file1", temp_dir); + support_write_file_string (file1, "file1"); + add_temp_file (file1); + FILE *fp; + size_t ret; + char buf[1024]; + + verbose_printf ("test single-byte reads\n"); + fp = xfopen (file1, "r"); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, 2, fp); + TEST_COMPARE (ret, 2); + TEST_COMPARE (buf[0], 'f'); + TEST_COMPARE (buf[1], 'i'); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (ftell (fp), 2); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, 3, fp); + TEST_COMPARE (ret, 3); + TEST_COMPARE (buf[0], 'l'); + TEST_COMPARE (buf[1], 'e'); + TEST_COMPARE (buf[2], '1'); + TEST_COMPARE (ftell (fp), 5); + TEST_COMPARE (feof (fp), 0); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, 1, fp); + TEST_COMPARE (ret, 0); + TEST_COMPARE (!!feof (fp), 1); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 5); + xfclose (fp); + + verbose_printf ("test single-byte reads, EOF part way through\n"); + fp = xfopen (file1, "r"); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, sizeof buf, fp); + TEST_COMPARE (ret, 5); + TEST_COMPARE (buf[0], 'f'); + TEST_COMPARE (buf[1], 'i'); + TEST_COMPARE (buf[2], 'l'); + TEST_COMPARE (buf[3], 'e'); + TEST_COMPARE (buf[4], '1'); + TEST_COMPARE (!!feof (fp), 1); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 5); + xfclose (fp); + + verbose_printf ("test multi-byte reads\n"); + fp = xfopen (file1, "r"); + memset (buf, 0, sizeof buf); + ret = fread (buf, 2, 2, fp); + TEST_COMPARE (ret, 2); + TEST_COMPARE (buf[0], 'f'); + TEST_COMPARE (buf[1], 'i'); + TEST_COMPARE (buf[2], 'l'); + TEST_COMPARE (buf[3], 'e'); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (ftell (fp), 4); + memset (buf, 0, sizeof buf); + ret = fread (buf, 3, 3, fp); + TEST_COMPARE (ret, 0); + /* The bytes written for a partial element read are unspecified. */ + TEST_COMPARE (!!feof (fp), 1); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 5); + xfclose (fp); + + verbose_printf ("test read error\n"); + fp = xfopen (file1, "r"); + xclose (fileno (fp)); + memset (buf, 0, sizeof buf); + ret = fread (buf, 1, sizeof buf, fp); + TEST_COMPARE (ret, 0); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (!!ferror (fp), 1); + fclose (fp); + + verbose_printf ("test zero size\n"); + fp = xfopen (file1, "r"); + ret = fread (buf, 0, SIZE_MAX, fp); + TEST_COMPARE (ret, 0); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 0); + xfclose (fp); + + verbose_printf ("test zero items\n"); + fp = xfopen (file1, "r"); + ret = fread (buf, SIZE_MAX, 0, fp); + TEST_COMPARE (ret, 0); + TEST_COMPARE (feof (fp), 0); + TEST_COMPARE (ferror (fp), 0); + TEST_COMPARE (ftell (fp), 0); + xfclose (fp); + + free (temp_dir); + free (file1); + return 0; +} + +#include diff --git a/stdio-common/tst-scanf-bz27650.c b/stdio-common/tst-scanf-bz27650.c new file mode 100644 index 0000000000..3a742bc865 --- /dev/null +++ b/stdio-common/tst-scanf-bz27650.c @@ -0,0 +1,108 @@ +/* Test for BZ #27650, formatted input matching beyond INT_MAX. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* Produce a stream of more than INT_MAX characters via buffer BUF of + size SIZE according to bookkeeping in COOKIE and then return EOF. */ + +static ssize_t +io_read (void *cookie, char *buf, size_t size) +{ + unsigned int *written = cookie; + unsigned int w = *written; + + if (w > INT_MAX) + return 0; + + memset (buf, 'a', size); + *written = w + size; + return size; +} + +/* Consume a stream of more than INT_MAX characters from an artificial + input stream of which none is the new line character. The call to + fscanf is supposed to complete upon the EOF condition of input, + however in the presence of BZ #27650 it will terminate prematurely + with characters still outstanding in input. Diagnose the condition + and return status accordingly. */ + +int +do_test (void) +{ + static cookie_io_functions_t io_funcs = { .read = io_read }; + unsigned int written = 0; + FILE *in; + int v; + + mtrace (); + + in = fopencookie (&written, "r", io_funcs); + if (in == NULL) + { + FAIL ("fopencookie: %m"); + goto out; + } + + v = fscanf (in, "%*[^\n]"); + if (ferror (in)) + { + FAIL ("fscanf: input failure, at %u: %m", written); + goto out_close; + } + else if (v == EOF) + { + FAIL ("fscanf: unexpected end of file, at %u", written); + goto out_close; + } + + if (!feof (in)) + { + v = fgetc (in); + if (ferror (in)) + FAIL ("fgetc: input failure: %m"); + else if (v == EOF) + FAIL ("fgetc: unexpected end of file after missing end of file"); + else if (v == '\n') + FAIL ("unexpected new line character received"); + else + FAIL ("character received after end of file expected: \\x%02x", v); + } + +out_close: + if (fclose (in) != 0) + FAIL ("fclose: %m"); + +out: + return EXIT_SUCCESS; +} + +#define TIMEOUT (DEFAULT_TIMEOUT * 8) +#include diff --git a/stdio-common/tst-ungetc-leak.c b/stdio-common/tst-ungetc-leak.c new file mode 100644 index 0000000000..6c5152b43f --- /dev/null +++ b/stdio-common/tst-ungetc-leak.c @@ -0,0 +1,32 @@ +/* Test for memory leak with ungetc when stream is unused. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + mtrace (); + TEST_COMPARE (ungetc('y', stdin), 'y'); + return 0; +} + +#include diff --git a/stdio-common/tst-ungetc.c b/stdio-common/tst-ungetc.c index 1344b2b591..388b202493 100644 --- a/stdio-common/tst-ungetc.c +++ b/stdio-common/tst-ungetc.c @@ -1,70 +1,74 @@ -/* Test for ungetc bugs. */ +/* Test for ungetc bugs. + Copyright (C) 1996-2024 Free Software Foundation, Inc. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ #include #include -#include - -#undef assert -#define assert(x) \ - if (!(x)) \ - { \ - fputs ("test failed: " #x "\n", stderr); \ - retval = 1; \ - goto the_end; \ - } +#include +#include +#include +#include +#include -int -main (int argc, char *argv[]) +static int +do_test (void) { - char name[] = "/tmp/tst-ungetc.XXXXXX"; + char *name = NULL; FILE *fp = NULL; - int retval = 0; int c; char buffer[64]; - int fd = mkstemp (name); + int fd = create_temp_file ("tst-ungetc.", &name); if (fd == -1) - { - printf ("mkstemp failed: %m\n"); - return 1; - } - close (fd); - fp = fopen (name, "w"); - assert (fp != NULL) - fputs ("bla", fp); - fclose (fp); - fp = NULL; + FAIL_EXIT1 ("cannot create temporary file: %m"); + xclose (fd); - fp = fopen (name, "r"); - assert (fp != NULL); - assert (ungetc ('z', fp) == 'z'); - assert (getc (fp) == 'z'); - assert (getc (fp) == 'b'); - assert (getc (fp) == 'l'); - assert (ungetc ('m', fp) == 'm'); - assert (getc (fp) == 'm'); - assert ((c = getc (fp)) == 'a'); - assert (getc (fp) == EOF); - assert (ungetc (c, fp) == c); - assert (feof (fp) == 0); - assert (getc (fp) == c); - assert (getc (fp) == EOF); - fclose (fp); - fp = NULL; + fp = xfopen (name, "w"); + fputs ("bla", fp); + xfclose (fp); - fp = fopen (name, "r"); - assert (fp != NULL); - assert (getc (fp) == 'b'); - assert (getc (fp) == 'l'); - assert (ungetc ('b', fp) == 'b'); - assert (fread (buffer, 1, 64, fp) == 2); - assert (buffer[0] == 'b'); - assert (buffer[1] == 'a'); + fp = xfopen (name, "r"); + TEST_VERIFY_EXIT (ungetc ('z', fp) == 'z'); + TEST_VERIFY_EXIT (getc (fp) == 'z'); + TEST_VERIFY_EXIT (getc (fp) == 'b'); + TEST_VERIFY_EXIT (getc (fp) == 'l'); + TEST_VERIFY_EXIT (ungetc ('m', fp) == 'm'); + TEST_VERIFY_EXIT (ungetc ('n', fp) == 'n'); + TEST_VERIFY_EXIT (getc (fp) == 'n'); + TEST_VERIFY_EXIT (getc (fp) == 'm'); + TEST_VERIFY_EXIT ((c = getc (fp)) == 'a'); + TEST_VERIFY_EXIT (getc (fp) == EOF); + TEST_VERIFY_EXIT (ungetc (c, fp) == c); + TEST_VERIFY_EXIT (feof (fp) == 0); + TEST_VERIFY_EXIT (getc (fp) == c); + TEST_VERIFY_EXIT (getc (fp) == EOF); + xfclose (fp); -the_end: - if (fp != NULL) - fclose (fp); - unlink (name); + fp = xfopen (name, "r"); + TEST_VERIFY_EXIT (getc (fp) == 'b'); + TEST_VERIFY_EXIT (getc (fp) == 'l'); + TEST_VERIFY_EXIT (ungetc ('b', fp) == 'b'); + TEST_VERIFY_EXIT (fread (buffer, 1, 64, fp) == 2); + TEST_VERIFY_EXIT (buffer[0] == 'b'); + TEST_VERIFY_EXIT (buffer[1] == 'a'); + xfclose (fp); - return retval; + return 0; } + +#include diff --git a/stdlib/Makefile b/stdlib/Makefile index 8b0ac63ddb..d3a84fa641 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -307,6 +307,7 @@ tests := \ tst-setcontext9 \ tst-setcontext10 \ tst-setcontext11 \ + tst-setenv-environ \ tst-stdbit-Wconversion \ tst-stdbit-builtins \ tst-stdc_bit_ceil \ @@ -603,6 +604,8 @@ $(objpfx)bug-strtod2: $(libm) $(objpfx)tst-strtod-round: $(libm) $(objpfx)tst-tininess: $(libm) $(objpfx)tst-strtod-underflow: $(libm) +$(objpfx)tst-strtod5: $(libm) +$(objpfx)tst-strtod5i: $(libm) $(objpfx)tst-strtod6: $(libm) $(objpfx)tst-strtod-nan-locale: $(libm) $(objpfx)tst-strtod-nan-sign: $(libm) diff --git a/stdlib/bits/stdlib.h b/stdlib/bits/stdlib.h index 1557b862b1..9c78ecf458 100644 --- a/stdlib/bits/stdlib.h +++ b/stdlib/bits/stdlib.h @@ -43,16 +43,16 @@ __NTH (realpath (const char *__restrict __name, "bytes long buffer") #endif { - size_t sz = __glibc_objsize (__resolved); + size_t __sz = __glibc_objsize (__resolved); - if (sz == (size_t) -1) + if (__sz == (size_t) -1) return __realpath_alias (__name, __resolved); #if !__fortify_use_clang && defined _LIBC_LIMITS_H_ && defined PATH_MAX - if (__glibc_unsafe_len (PATH_MAX, sizeof (char), sz)) - return __realpath_chk_warn (__name, __resolved, sz); + if (__glibc_unsafe_len (PATH_MAX, sizeof (char), __sz)) + return __realpath_chk_warn (__name, __resolved, __sz); #endif - return __realpath_chk (__name, __resolved, sz); + return __realpath_chk (__name, __resolved, __sz); } diff --git a/stdlib/gen-tst-strtod-round.c b/stdlib/gen-tst-strtod-round.c index e48bf4d6ea..7ce735f81d 100644 --- a/stdlib/gen-tst-strtod-round.c +++ b/stdlib/gen-tst-strtod-round.c @@ -46,6 +46,7 @@ static int string_to_fp (mpfr_t f, const char *s, mpfr_rnd_t rnd) { mpfr_clear_overflow (); + mpfr_clear_underflow (); #ifdef WORKAROUND mpfr_t f2; mpfr_init2 (f2, 100000); @@ -53,12 +54,16 @@ string_to_fp (mpfr_t f, const char *s, mpfr_rnd_t rnd) int r = mpfr_set (f, f2, rnd); r |= mpfr_subnormalize (f, r, rnd); mpfr_clear (f2); - return r0 | r; + r |= r0; #else int r = mpfr_strtofr (f, s, NULL, 0, rnd); r |= mpfr_subnormalize (f, r, rnd); - return r; #endif + if (r == 0) + /* The MPFR underflow flag is set for exact subnormal results, + which is not wanted here. */ + mpfr_clear_underflow (); + return r; } void @@ -70,6 +75,21 @@ print_fp (FILE *fout, mpfr_t f, const char *suffix) mpfr_fprintf (fout, "\t%Ra%s", f, suffix); } +static const char * +suffix_to_print (bool overflow, bool underflow, bool underflow_before_rounding, + bool with_comma) +{ + if (overflow) + return with_comma ? ", true, false,\n" : ", true, false"; + if (underflow) + return with_comma ? ", false, true,\n" : ", false, true"; + if (underflow_before_rounding) + return (with_comma + ? ", false, !TININESS_AFTER_ROUNDING,\n" + : ", false, !TININESS_AFTER_ROUNDING"); + return with_comma ? ", false, false,\n" : ", false, false"; +} + static void round_str (FILE *fout, const char *s, int prec, int emin, int emax, bool ibm_ld) @@ -80,8 +100,11 @@ round_str (FILE *fout, const char *s, int prec, int emin, int emax, mpfr_set_emin (emin); mpfr_set_emax (emax); mpfr_init (f); + string_to_fp (f, s, MPFR_RNDZ); + bool underflow_before_rounding = mpfr_underflow_p () != 0; int r = string_to_fp (f, s, MPFR_RNDD); bool overflow = mpfr_overflow_p () != 0; + bool underflow = mpfr_underflow_p () != 0; if (ibm_ld) { assert (prec == 106 && emin == -1073 && emax == 1024); @@ -97,19 +120,27 @@ round_str (FILE *fout, const char *s, int prec, int emin, int emax, } } mpfr_fprintf (fout, "\t%s,\n", r ? "false" : "true"); - print_fp (fout, f, overflow ? ", true,\n" : ", false,\n"); + print_fp (fout, f, + suffix_to_print (overflow, underflow, underflow_before_rounding, + true)); string_to_fp (f, s, MPFR_RNDN); overflow = (mpfr_overflow_p () != 0 || (ibm_ld && mpfr_cmpabs (f, max_value) > 0)); - print_fp (fout, f, overflow ? ", true,\n" : ", false,\n"); + print_fp (fout, f, + suffix_to_print (overflow, underflow, underflow_before_rounding, + true)); string_to_fp (f, s, MPFR_RNDZ); overflow = (mpfr_overflow_p () != 0 || (ibm_ld && mpfr_cmpabs (f, max_value) > 0)); - print_fp (fout, f, overflow ? ", true,\n" : ", false,\n"); + print_fp (fout, f, + suffix_to_print (overflow, underflow, underflow_before_rounding, + true)); string_to_fp (f, s, MPFR_RNDU); overflow = (mpfr_overflow_p () != 0 || (ibm_ld && mpfr_cmpabs (f, max_value) > 0)); - print_fp (fout, f, overflow ? ", true" : ", false"); + print_fp (fout, f, + suffix_to_print (overflow, underflow, underflow_before_rounding, + false)); mpfr_clear (f); if (ibm_ld) mpfr_clear (max_value); diff --git a/stdlib/strtod_l.c b/stdlib/strtod_l.c index be515ce659..beb97b3d0c 100644 --- a/stdlib/strtod_l.c +++ b/stdlib/strtod_l.c @@ -222,6 +222,7 @@ round_and_return (mp_limb_t *retval, intmax_t exponent, int negative, mp_size_t shift = MIN_EXP - 1 - exponent; bool is_tiny = true; + bool old_half_bit = (round_limb & (((mp_limb_t) 1) << round_bit)) != 0; more_bits |= (round_limb & ((((mp_limb_t) 1) << round_bit) - 1)) != 0; if (shift == MANT_DIG) @@ -292,6 +293,7 @@ round_and_return (mp_limb_t *retval, intmax_t exponent, int negative, round_bit = shift - 1; (void) __mpn_rshift (retval, retval, RETURN_LIMB_SIZE, shift); } + more_bits |= old_half_bit; /* This is a hook for the m68k long double format, where the exponent bias is the same for normalized and denormalized numbers. */ diff --git a/stdlib/strtod_nan_main.c b/stdlib/strtod_nan_main.c index 4cb286d2b3..39fb7e9f75 100644 --- a/stdlib/strtod_nan_main.c +++ b/stdlib/strtod_nan_main.c @@ -16,6 +16,7 @@ License along with the GNU C Library; if not, see . */ +#include #include #include #include @@ -50,7 +51,9 @@ STRTOD_NAN (const STRING_TYPE *str, STRING_TYPE **endptr, STRING_TYPE endc) STRING_TYPE *endp; unsigned long long int mant; + int save_errno = errno; mant = STRTOULL (str, &endp, 0); + __set_errno (save_errno); if (endp == cp) SET_NAN_PAYLOAD (retval, mant); diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c index cc26ed6d15..cefee58d46 100644 --- a/stdlib/tst-secure-getenv.c +++ b/stdlib/tst-secure-getenv.c @@ -57,13 +57,7 @@ do_test (void) exit (1); } - int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT); - - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - return EXIT_UNSUPPORTED; - - if (!WIFEXITED (status)) - FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); + support_capture_subprogram_self_sgid (MAGIC_ARGUMENT); return 0; } @@ -82,6 +76,7 @@ alternative_main (int argc, char **argv) if (secure_getenv ("PATH") != NULL) FAIL_EXIT (4, "PATH variable not filtered out\n"); + support_record_failure_barrier (); exit (EXIT_SUCCESS); } } diff --git a/stdlib/tst-setenv-environ.c b/stdlib/tst-setenv-environ.c new file mode 100644 index 0000000000..02fcef96d0 --- /dev/null +++ b/stdlib/tst-setenv-environ.c @@ -0,0 +1,36 @@ +/* Test using setenv with updated environ. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +extern char **environ; + +int +do_test (void) +{ + char *valp; + static char *dummy_environ[] = { NULL }; + environ = dummy_environ; + setenv ("A", "1", 0); + valp = getenv ("A"); + TEST_VERIFY_EXIT (valp[0] == '1' && valp[1] == '\0'); + return 0; +} + +#include diff --git a/stdlib/tst-strtod-round-data b/stdlib/tst-strtod-round-data index 84ab705709..9489fbcc9c 100644 --- a/stdlib/tst-strtod-round-data +++ b/stdlib/tst-strtod-round-data @@ -265,3 +265,15 @@ 1.000000000000000000000000000000000385185988877447170611195588516985463707620329643077639047987759113311767578125 1.0000000000000000000000000000000001925929944387235853055977942584927318538101648215388195239938795566558837890625 1.00000000000000000000000000000000009629649721936179265279889712924636592690508241076940976199693977832794189453125 +0x30000002222225p-1077 +0x0.7fffffffffffeap-1022 +0x0.7fffffffffffe9p-1022 +0x0.7ffffd4p-126 +0x0.7ffffffffffffffd4p-16382 +0x0.7ffffffffffffffd4p-16383 +0x0.7ffffffffffffffffffffffffffeap-16382 +0x0.7000004p-126 +0x0.70000000000002p-1022 +0x0.70000000000000004p-16382 +0x0.70000000000000004p-16383 +0x0.70000000000000000000000000002p-16382 diff --git a/stdlib/tst-strtod-round-data.h b/stdlib/tst-strtod-round-data.h index 8899d15f9b..ed50eb2537 100644 --- a/stdlib/tst-strtod-round-data.h +++ b/stdlib/tst-strtod-round-data.h @@ -2,1852 +2,1852 @@ static const struct test tests[] = { TEST ("3.518437208883201171875E+013", false, - 0x2p+44, false, - 0x2p+44, false, - 0x2p+44, false, - 0x2.000004p+44, false, - false, - 0x2.0000000000002p+44, false, - 0x2.0000000000004p+44, false, - 0x2.0000000000002p+44, false, - 0x2.0000000000004p+44, false, - true, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - true, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - true, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - true, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false, - 0x2.0000000000003p+44, false), + 0x2p+44, false, false, + 0x2p+44, false, false, + 0x2p+44, false, false, + 0x2.000004p+44, false, false, + false, + 0x2.0000000000002p+44, false, false, + 0x2.0000000000004p+44, false, false, + 0x2.0000000000002p+44, false, false, + 0x2.0000000000004p+44, false, false, + true, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + true, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + true, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + true, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false, + 0x2.0000000000003p+44, false, false), TEST ("1.00000005960464477550", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000004p+0, false, - false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000004p+0, false, - false, - 0x1.0000010000000002048242f2ffp+0, false, - 0x1.0000010000000002048242f2ff8p+0, false, - 0x1.0000010000000002048242f2ffp+0, false, - 0x1.0000010000000002048242f2ff8p+0, false, - false, - 0x1.0000010000000002048242f2ff66p+0, false, - 0x1.0000010000000002048242f2ff67p+0, false, - 0x1.0000010000000002048242f2ff66p+0, false, - 0x1.0000010000000002048242f2ff67p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000004p+0, false, false, + false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000004p+0, false, false, + false, + 0x1.0000010000000002048242f2ffp+0, false, false, + 0x1.0000010000000002048242f2ff8p+0, false, false, + 0x1.0000010000000002048242f2ffp+0, false, false, + 0x1.0000010000000002048242f2ff8p+0, false, false, + false, + 0x1.0000010000000002048242f2ff66p+0, false, false, + 0x1.0000010000000002048242f2ff67p+0, false, false, + 0x1.0000010000000002048242f2ff66p+0, false, false, + 0x1.0000010000000002048242f2ff67p+0, false, false), TEST ("1.0000000596046447755", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000004p+0, false, - false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000002p+0, false, - 0x1.0000010000000004p+0, false, - false, - 0x1.0000010000000002048242f2ffp+0, false, - 0x1.0000010000000002048242f2ff8p+0, false, - 0x1.0000010000000002048242f2ffp+0, false, - 0x1.0000010000000002048242f2ff8p+0, false, - false, - 0x1.0000010000000002048242f2ff66p+0, false, - 0x1.0000010000000002048242f2ff67p+0, false, - 0x1.0000010000000002048242f2ff66p+0, false, - 0x1.0000010000000002048242f2ff67p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000004p+0, false, false, + false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000002p+0, false, false, + 0x1.0000010000000004p+0, false, false, + false, + 0x1.0000010000000002048242f2ffp+0, false, false, + 0x1.0000010000000002048242f2ff8p+0, false, false, + 0x1.0000010000000002048242f2ffp+0, false, false, + 0x1.0000010000000002048242f2ff8p+0, false, false, + false, + 0x1.0000010000000002048242f2ff66p+0, false, false, + 0x1.0000010000000002048242f2ff67p+0, false, false, + 0x1.0000010000000002048242f2ff66p+0, false, false, + 0x1.0000010000000002048242f2ff67p+0, false, false), TEST ("1.000000059604644776", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.000001000000000ap+0, false, - 0x1.000001000000000cp+0, false, - 0x1.000001000000000ap+0, false, - 0x1.000001000000000cp+0, false, - false, - 0x1.000001000000000ap+0, false, - 0x1.000001000000000cp+0, false, - 0x1.000001000000000ap+0, false, - 0x1.000001000000000cp+0, false, - false, - 0x1.000001000000000b3db12bdc21p+0, false, - 0x1.000001000000000b3db12bdc21p+0, false, - 0x1.000001000000000b3db12bdc21p+0, false, - 0x1.000001000000000b3db12bdc218p+0, false, - false, - 0x1.000001000000000b3db12bdc213cp+0, false, - 0x1.000001000000000b3db12bdc213dp+0, false, - 0x1.000001000000000b3db12bdc213cp+0, false, - 0x1.000001000000000b3db12bdc213dp+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.000001000000000ap+0, false, false, + 0x1.000001000000000cp+0, false, false, + 0x1.000001000000000ap+0, false, false, + 0x1.000001000000000cp+0, false, false, + false, + 0x1.000001000000000ap+0, false, false, + 0x1.000001000000000cp+0, false, false, + 0x1.000001000000000ap+0, false, false, + 0x1.000001000000000cp+0, false, false, + false, + 0x1.000001000000000b3db12bdc21p+0, false, false, + 0x1.000001000000000b3db12bdc21p+0, false, false, + 0x1.000001000000000b3db12bdc21p+0, false, false, + 0x1.000001000000000b3db12bdc218p+0, false, false, + false, + 0x1.000001000000000b3db12bdc213cp+0, false, false, + 0x1.000001000000000b3db12bdc213dp+0, false, false, + 0x1.000001000000000b3db12bdc213cp+0, false, false, + 0x1.000001000000000b3db12bdc213dp+0, false, false), TEST ("1.000000059604644775", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffffffp+0, false, - 0x1.000001p+0, false, - 0x1.000000fffffffp+0, false, - 0x1.000001p+0, false, - false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffffap+0, false, - false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffff8p+0, false, - 0x1.000000fffffffffap+0, false, - false, - 0x1.000000fffffffff8cb535a09dd8p+0, false, - 0x1.000000fffffffff8cb535a09dd8p+0, false, - 0x1.000000fffffffff8cb535a09dd8p+0, false, - 0x1.000000fffffffff8cb535a09dep+0, false, - false, - 0x1.000000fffffffff8cb535a09dd9p+0, false, - 0x1.000000fffffffff8cb535a09dd91p+0, false, - 0x1.000000fffffffff8cb535a09dd9p+0, false, - 0x1.000000fffffffff8cb535a09dd91p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffffffp+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000000fffffffp+0, false, false, + 0x1.000001p+0, false, false, + false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffffap+0, false, false, + false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffff8p+0, false, false, + 0x1.000000fffffffffap+0, false, false, + false, + 0x1.000000fffffffff8cb535a09dd8p+0, false, false, + 0x1.000000fffffffff8cb535a09dd8p+0, false, false, + 0x1.000000fffffffff8cb535a09dd8p+0, false, false, + 0x1.000000fffffffff8cb535a09dep+0, false, false, + false, + 0x1.000000fffffffff8cb535a09dd9p+0, false, false, + 0x1.000000fffffffff8cb535a09dd91p+0, false, false, + 0x1.000000fffffffff8cb535a09dd9p+0, false, false, + 0x1.000000fffffffff8cb535a09dd91p+0, false, false), TEST ("1.00000005960464478", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.0000010000000054p+0, false, - 0x1.0000010000000056p+0, false, - 0x1.0000010000000054p+0, false, - 0x1.0000010000000056p+0, false, - false, - 0x1.0000010000000054p+0, false, - 0x1.0000010000000056p+0, false, - 0x1.0000010000000054p+0, false, - 0x1.0000010000000056p+0, false, - false, - 0x1.0000010000000055072873252f8p+0, false, - 0x1.0000010000000055072873253p+0, false, - 0x1.0000010000000055072873252f8p+0, false, - 0x1.0000010000000055072873253p+0, false, - false, - 0x1.0000010000000055072873252febp+0, false, - 0x1.0000010000000055072873252febp+0, false, - 0x1.0000010000000055072873252febp+0, false, - 0x1.0000010000000055072873252fecp+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.0000010000000054p+0, false, false, + 0x1.0000010000000056p+0, false, false, + 0x1.0000010000000054p+0, false, false, + 0x1.0000010000000056p+0, false, false, + false, + 0x1.0000010000000054p+0, false, false, + 0x1.0000010000000056p+0, false, false, + 0x1.0000010000000054p+0, false, false, + 0x1.0000010000000056p+0, false, false, + false, + 0x1.0000010000000055072873252f8p+0, false, false, + 0x1.0000010000000055072873253p+0, false, false, + 0x1.0000010000000055072873252f8p+0, false, false, + 0x1.0000010000000055072873253p+0, false, false, + false, + 0x1.0000010000000055072873252febp+0, false, false, + 0x1.0000010000000055072873252febp+0, false, false, + 0x1.0000010000000055072873252febp+0, false, false, + 0x1.0000010000000055072873252fecp+0, false, false), TEST ("1.0000000596046448", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.00000100000001c4p+0, false, - 0x1.00000100000001c6p+0, false, - 0x1.00000100000001c4p+0, false, - 0x1.00000100000001c6p+0, false, - false, - 0x1.00000100000001c4p+0, false, - 0x1.00000100000001c6p+0, false, - 0x1.00000100000001c4p+0, false, - 0x1.00000100000001c6p+0, false, - false, - 0x1.00000100000001c5f67cd79279p+0, false, - 0x1.00000100000001c5f67cd792798p+0, false, - 0x1.00000100000001c5f67cd79279p+0, false, - 0x1.00000100000001c5f67cd792798p+0, false, - false, - 0x1.00000100000001c5f67cd7927953p+0, false, - 0x1.00000100000001c5f67cd7927954p+0, false, - 0x1.00000100000001c5f67cd7927953p+0, false, - 0x1.00000100000001c5f67cd7927954p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.00000100000001c4p+0, false, false, + 0x1.00000100000001c6p+0, false, false, + 0x1.00000100000001c4p+0, false, false, + 0x1.00000100000001c6p+0, false, false, + false, + 0x1.00000100000001c4p+0, false, false, + 0x1.00000100000001c6p+0, false, false, + 0x1.00000100000001c4p+0, false, false, + 0x1.00000100000001c6p+0, false, false, + false, + 0x1.00000100000001c5f67cd79279p+0, false, false, + 0x1.00000100000001c5f67cd792798p+0, false, false, + 0x1.00000100000001c5f67cd79279p+0, false, false, + 0x1.00000100000001c5f67cd792798p+0, false, false, + false, + 0x1.00000100000001c5f67cd7927953p+0, false, false, + 0x1.00000100000001c5f67cd7927954p+0, false, false, + 0x1.00000100000001c5f67cd7927953p+0, false, false, + 0x1.00000100000001c5f67cd7927954p+0, false, false), TEST ("1.000000059604645", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.0000010000001p+0, false, - 0x1.0000010000001p+0, false, - 0x1.0000010000001p+0, false, - 0x1.0000010000002p+0, false, - false, - 0x1.000001000000102ep+0, false, - 0x1.000001000000103p+0, false, - 0x1.000001000000102ep+0, false, - 0x1.000001000000103p+0, false, - false, - 0x1.000001000000102ep+0, false, - 0x1.000001000000103p+0, false, - 0x1.000001000000102ep+0, false, - 0x1.000001000000103p+0, false, - false, - 0x1.000001000000102f4fc8c3d757p+0, false, - 0x1.000001000000102f4fc8c3d7578p+0, false, - 0x1.000001000000102f4fc8c3d757p+0, false, - 0x1.000001000000102f4fc8c3d7578p+0, false, - false, - 0x1.000001000000102f4fc8c3d75769p+0, false, - 0x1.000001000000102f4fc8c3d75769p+0, false, - 0x1.000001000000102f4fc8c3d75769p+0, false, - 0x1.000001000000102f4fc8c3d7576ap+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.0000010000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + 0x1.0000010000002p+0, false, false, + false, + 0x1.000001000000102ep+0, false, false, + 0x1.000001000000103p+0, false, false, + 0x1.000001000000102ep+0, false, false, + 0x1.000001000000103p+0, false, false, + false, + 0x1.000001000000102ep+0, false, false, + 0x1.000001000000103p+0, false, false, + 0x1.000001000000102ep+0, false, false, + 0x1.000001000000103p+0, false, false, + false, + 0x1.000001000000102f4fc8c3d757p+0, false, false, + 0x1.000001000000102f4fc8c3d7578p+0, false, false, + 0x1.000001000000102f4fc8c3d757p+0, false, false, + 0x1.000001000000102f4fc8c3d7578p+0, false, false, + false, + 0x1.000001000000102f4fc8c3d75769p+0, false, false, + 0x1.000001000000102f4fc8c3d75769p+0, false, false, + 0x1.000001000000102f4fc8c3d75769p+0, false, false, + 0x1.000001000000102f4fc8c3d7576ap+0, false, false), TEST ("1.00000005960464", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffffeap+0, false, - 0x1.000000fffffeap+0, false, - 0x1.000000fffffeap+0, false, - 0x1.000000fffffebp+0, false, - false, - 0x1.000000fffffea7e4p+0, false, - 0x1.000000fffffea7e6p+0, false, - 0x1.000000fffffea7e4p+0, false, - 0x1.000000fffffea7e6p+0, false, - false, - 0x1.000000fffffea7e4p+0, false, - 0x1.000000fffffea7e6p+0, false, - 0x1.000000fffffea7e4p+0, false, - 0x1.000000fffffea7e6p+0, false, - false, - 0x1.000000fffffea7e5975eb11da7p+0, false, - 0x1.000000fffffea7e5975eb11da78p+0, false, - 0x1.000000fffffea7e5975eb11da7p+0, false, - 0x1.000000fffffea7e5975eb11da78p+0, false, - false, - 0x1.000000fffffea7e5975eb11da74ap+0, false, - 0x1.000000fffffea7e5975eb11da74bp+0, false, - 0x1.000000fffffea7e5975eb11da74ap+0, false, - 0x1.000000fffffea7e5975eb11da74bp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffffeap+0, false, false, + 0x1.000000fffffeap+0, false, false, + 0x1.000000fffffeap+0, false, false, + 0x1.000000fffffebp+0, false, false, + false, + 0x1.000000fffffea7e4p+0, false, false, + 0x1.000000fffffea7e6p+0, false, false, + 0x1.000000fffffea7e4p+0, false, false, + 0x1.000000fffffea7e6p+0, false, false, + false, + 0x1.000000fffffea7e4p+0, false, false, + 0x1.000000fffffea7e6p+0, false, false, + 0x1.000000fffffea7e4p+0, false, false, + 0x1.000000fffffea7e6p+0, false, false, + false, + 0x1.000000fffffea7e5975eb11da7p+0, false, false, + 0x1.000000fffffea7e5975eb11da78p+0, false, false, + 0x1.000000fffffea7e5975eb11da7p+0, false, false, + 0x1.000000fffffea7e5975eb11da78p+0, false, false, + false, + 0x1.000000fffffea7e5975eb11da74ap+0, false, false, + 0x1.000000fffffea7e5975eb11da74bp+0, false, false, + 0x1.000000fffffea7e5975eb11da74ap+0, false, false, + 0x1.000000fffffea7e5975eb11da74bp+0, false, false), TEST ("1.0000000596046", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffff36p+0, false, - 0x1.000000fffff36p+0, false, - 0x1.000000fffff36p+0, false, - 0x1.000000fffff37p+0, false, - false, - 0x1.000000fffff36596p+0, false, - 0x1.000000fffff36598p+0, false, - 0x1.000000fffff36596p+0, false, - 0x1.000000fffff36598p+0, false, - false, - 0x1.000000fffff36596p+0, false, - 0x1.000000fffff36598p+0, false, - 0x1.000000fffff36596p+0, false, - 0x1.000000fffff36598p+0, false, - false, - 0x1.000000fffff36597d40e1b5026p+0, false, - 0x1.000000fffff36597d40e1b50268p+0, false, - 0x1.000000fffff36597d40e1b5026p+0, false, - 0x1.000000fffff36597d40e1b50268p+0, false, - false, - 0x1.000000fffff36597d40e1b502655p+0, false, - 0x1.000000fffff36597d40e1b502656p+0, false, - 0x1.000000fffff36597d40e1b502655p+0, false, - 0x1.000000fffff36597d40e1b502656p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffff36p+0, false, false, + 0x1.000000fffff36p+0, false, false, + 0x1.000000fffff36p+0, false, false, + 0x1.000000fffff37p+0, false, false, + false, + 0x1.000000fffff36596p+0, false, false, + 0x1.000000fffff36598p+0, false, false, + 0x1.000000fffff36596p+0, false, false, + 0x1.000000fffff36598p+0, false, false, + false, + 0x1.000000fffff36596p+0, false, false, + 0x1.000000fffff36598p+0, false, false, + 0x1.000000fffff36596p+0, false, false, + 0x1.000000fffff36598p+0, false, false, + false, + 0x1.000000fffff36597d40e1b5026p+0, false, false, + 0x1.000000fffff36597d40e1b50268p+0, false, false, + 0x1.000000fffff36597d40e1b5026p+0, false, false, + 0x1.000000fffff36597d40e1b50268p+0, false, false, + false, + 0x1.000000fffff36597d40e1b502655p+0, false, false, + 0x1.000000fffff36597d40e1b502656p+0, false, false, + 0x1.000000fffff36597d40e1b502655p+0, false, false, + 0x1.000000fffff36597d40e1b502656p+0, false, false), TEST ("1.000000059605", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001000063fp+0, false, - 0x1.000001000064p+0, false, - 0x1.000001000063fp+0, false, - 0x1.000001000064p+0, false, - false, - 0x1.000001000063fcap+0, false, - 0x1.000001000063fca2p+0, false, - 0x1.000001000063fcap+0, false, - 0x1.000001000063fca2p+0, false, - false, - 0x1.000001000063fcap+0, false, - 0x1.000001000063fca2p+0, false, - 0x1.000001000063fcap+0, false, - 0x1.000001000063fca2p+0, false, - false, - 0x1.000001000063fca17533f5572f8p+0, false, - 0x1.000001000063fca17533f5573p+0, false, - 0x1.000001000063fca17533f5572f8p+0, false, - 0x1.000001000063fca17533f5573p+0, false, - false, - 0x1.000001000063fca17533f5572fe9p+0, false, - 0x1.000001000063fca17533f5572feap+0, false, - 0x1.000001000063fca17533f5572fe9p+0, false, - 0x1.000001000063fca17533f5572feap+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001000063fp+0, false, false, + 0x1.000001000064p+0, false, false, + 0x1.000001000063fp+0, false, false, + 0x1.000001000064p+0, false, false, + false, + 0x1.000001000063fcap+0, false, false, + 0x1.000001000063fca2p+0, false, false, + 0x1.000001000063fcap+0, false, false, + 0x1.000001000063fca2p+0, false, false, + false, + 0x1.000001000063fcap+0, false, false, + 0x1.000001000063fca2p+0, false, false, + 0x1.000001000063fcap+0, false, false, + 0x1.000001000063fca2p+0, false, false, + false, + 0x1.000001000063fca17533f5572f8p+0, false, false, + 0x1.000001000063fca17533f5573p+0, false, false, + 0x1.000001000063fca17533f5572f8p+0, false, false, + 0x1.000001000063fca17533f5573p+0, false, false, + false, + 0x1.000001000063fca17533f5572fe9p+0, false, false, + 0x1.000001000063fca17533f5572feap+0, false, false, + 0x1.000001000063fca17533f5572fe9p+0, false, false, + 0x1.000001000063fca17533f5572feap+0, false, false), TEST ("1.00000005960", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffae49p+0, false, - 0x1.000000fffae4ap+0, false, - 0x1.000000fffae49p+0, false, - 0x1.000000fffae4ap+0, false, - false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff388p+0, false, - false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382ep+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffae49p+0, false, false, + 0x1.000000fffae4ap+0, false, false, + 0x1.000000fffae49p+0, false, false, + 0x1.000000fffae4ap+0, false, false, + false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff388p+0, false, false, + false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382ep+0, false, false), TEST ("1.0000000596", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffae49p+0, false, - 0x1.000000fffae4ap+0, false, - 0x1.000000fffae49p+0, false, - 0x1.000000fffae4ap+0, false, - false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - 0x1.000000fffae49ca8p+0, false, - 0x1.000000fffae49caap+0, false, - false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff38p+0, false, - 0x1.000000fffae49ca916dacfff388p+0, false, - false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382dp+0, false, - 0x1.000000fffae49ca916dacfff382ep+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffae49p+0, false, false, + 0x1.000000fffae4ap+0, false, false, + 0x1.000000fffae49p+0, false, false, + 0x1.000000fffae4ap+0, false, false, + false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + 0x1.000000fffae49ca8p+0, false, false, + 0x1.000000fffae49caap+0, false, false, + false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff38p+0, false, false, + 0x1.000000fffae49ca916dacfff388p+0, false, false, + false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382dp+0, false, false, + 0x1.000000fffae49ca916dacfff382ep+0, false, false), TEST ("1.000000060", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.00000101b2b29p+0, false, - 0x1.00000101b2b2ap+0, false, - 0x1.00000101b2b29p+0, false, - 0x1.00000101b2b2ap+0, false, - false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a48p+0, false, - false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a48p+0, false, - false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca38p+0, false, - false, - 0x1.00000101b2b29a4692b67b7ca313p+0, false, - 0x1.00000101b2b29a4692b67b7ca314p+0, false, - 0x1.00000101b2b29a4692b67b7ca313p+0, false, - 0x1.00000101b2b29a4692b67b7ca314p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.00000101b2b29p+0, false, false, + 0x1.00000101b2b2ap+0, false, false, + 0x1.00000101b2b29p+0, false, false, + 0x1.00000101b2b2ap+0, false, false, + false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a48p+0, false, false, + false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a48p+0, false, false, + false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca38p+0, false, false, + false, + 0x1.00000101b2b29a4692b67b7ca313p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca314p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca313p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca314p+0, false, false), TEST ("1.00000006", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.00000101b2b29p+0, false, - 0x1.00000101b2b2ap+0, false, - 0x1.00000101b2b29p+0, false, - 0x1.00000101b2b2ap+0, false, - false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a48p+0, false, - false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a46p+0, false, - 0x1.00000101b2b29a48p+0, false, - false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca3p+0, false, - 0x1.00000101b2b29a4692b67b7ca38p+0, false, - false, - 0x1.00000101b2b29a4692b67b7ca313p+0, false, - 0x1.00000101b2b29a4692b67b7ca314p+0, false, - 0x1.00000101b2b29a4692b67b7ca313p+0, false, - 0x1.00000101b2b29a4692b67b7ca314p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.00000101b2b29p+0, false, false, + 0x1.00000101b2b2ap+0, false, false, + 0x1.00000101b2b29p+0, false, false, + 0x1.00000101b2b2ap+0, false, false, + false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a48p+0, false, false, + false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a46p+0, false, false, + 0x1.00000101b2b29a48p+0, false, false, + false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca3p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca38p+0, false, false, + false, + 0x1.00000101b2b29a4692b67b7ca313p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca314p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca313p+0, false, false, + 0x1.00000101b2b29a4692b67b7ca314p+0, false, false), TEST ("1.0000001", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001ad7f29ap+0, false, - 0x1.000001ad7f29bp+0, false, - 0x1.000001ad7f29ap+0, false, - 0x1.000001ad7f29bp+0, false, - false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abccp+0, false, - false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abcap+0, false, - 0x1.000001ad7f29abccp+0, false, - false, - 0x1.000001ad7f29abcaf485787a65p+0, false, - 0x1.000001ad7f29abcaf485787a65p+0, false, - 0x1.000001ad7f29abcaf485787a65p+0, false, - 0x1.000001ad7f29abcaf485787a658p+0, false, - false, - 0x1.000001ad7f29abcaf485787a652p+0, false, - 0x1.000001ad7f29abcaf485787a6521p+0, false, - 0x1.000001ad7f29abcaf485787a652p+0, false, - 0x1.000001ad7f29abcaf485787a6521p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001ad7f29ap+0, false, false, + 0x1.000001ad7f29bp+0, false, false, + 0x1.000001ad7f29ap+0, false, false, + 0x1.000001ad7f29bp+0, false, false, + false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abccp+0, false, false, + false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abcap+0, false, false, + 0x1.000001ad7f29abccp+0, false, false, + false, + 0x1.000001ad7f29abcaf485787a65p+0, false, false, + 0x1.000001ad7f29abcaf485787a65p+0, false, false, + 0x1.000001ad7f29abcaf485787a65p+0, false, false, + 0x1.000001ad7f29abcaf485787a658p+0, false, false, + false, + 0x1.000001ad7f29abcaf485787a652p+0, false, false, + 0x1.000001ad7f29abcaf485787a6521p+0, false, false, + 0x1.000001ad7f29abcaf485787a652p+0, false, false, + 0x1.000001ad7f29abcaf485787a6521p+0, false, false), TEST ("1.000000", true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - true, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + true, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false), TEST ("1.00000000000000011113", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.0000000000000801fc96557232p+0, false, - 0x1.0000000000000801fc96557232p+0, false, - 0x1.0000000000000801fc96557232p+0, false, - 0x1.0000000000000801fc965572328p+0, false, - false, - 0x1.0000000000000801fc9655723222p+0, false, - 0x1.0000000000000801fc9655723222p+0, false, - 0x1.0000000000000801fc9655723222p+0, false, - 0x1.0000000000000801fc9655723223p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.0000000000000801fc96557232p+0, false, false, + 0x1.0000000000000801fc96557232p+0, false, false, + 0x1.0000000000000801fc96557232p+0, false, false, + 0x1.0000000000000801fc965572328p+0, false, false, + false, + 0x1.0000000000000801fc9655723222p+0, false, false, + 0x1.0000000000000801fc9655723222p+0, false, false, + 0x1.0000000000000801fc9655723222p+0, false, false, + 0x1.0000000000000801fc9655723223p+0, false, false), TEST ("1.00000000000000011103", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008002459c076c48p+0, false, - 0x1.00000000000008002459c076c5p+0, false, - 0x1.00000000000008002459c076c48p+0, false, - 0x1.00000000000008002459c076c5p+0, false, - false, - 0x1.00000000000008002459c076c4f7p+0, false, - 0x1.00000000000008002459c076c4f8p+0, false, - 0x1.00000000000008002459c076c4f7p+0, false, - 0x1.00000000000008002459c076c4f8p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008002459c076c48p+0, false, false, + 0x1.00000000000008002459c076c5p+0, false, false, + 0x1.00000000000008002459c076c48p+0, false, false, + 0x1.00000000000008002459c076c5p+0, false, false, + false, + 0x1.00000000000008002459c076c4f7p+0, false, false, + 0x1.00000000000008002459c076c4f8p+0, false, false, + 0x1.00000000000008002459c076c4f7p+0, false, false, + 0x1.00000000000008002459c076c4f8p+0, false, false), TEST ("1.00000000000000011102", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007fff5207e5dap+0, false, - 0x1.00000000000007fff5207e5da08p+0, false, - 0x1.00000000000007fff5207e5dap+0, false, - 0x1.00000000000007fff5207e5da08p+0, false, - false, - 0x1.00000000000007fff5207e5da073p+0, false, - 0x1.00000000000007fff5207e5da073p+0, false, - 0x1.00000000000007fff5207e5da073p+0, false, - 0x1.00000000000007fff5207e5da074p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007fff5207e5dap+0, false, false, + 0x1.00000000000007fff5207e5da08p+0, false, false, + 0x1.00000000000007fff5207e5dap+0, false, false, + 0x1.00000000000007fff5207e5da08p+0, false, false, + false, + 0x1.00000000000007fff5207e5da073p+0, false, false, + 0x1.00000000000007fff5207e5da073p+0, false, false, + 0x1.00000000000007fff5207e5da073p+0, false, false, + 0x1.00000000000007fff5207e5da074p+0, false, false), TEST ("1.00000000000000011101", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007ffc5e73c447b8p+0, false, - 0x1.00000000000007ffc5e73c447cp+0, false, - 0x1.00000000000007ffc5e73c447b8p+0, false, - 0x1.00000000000007ffc5e73c447cp+0, false, - false, - 0x1.00000000000007ffc5e73c447befp+0, false, - 0x1.00000000000007ffc5e73c447befp+0, false, - 0x1.00000000000007ffc5e73c447befp+0, false, - 0x1.00000000000007ffc5e73c447bfp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007ffc5e73c447b8p+0, false, false, + 0x1.00000000000007ffc5e73c447cp+0, false, false, + 0x1.00000000000007ffc5e73c447b8p+0, false, false, + 0x1.00000000000007ffc5e73c447cp+0, false, false, + false, + 0x1.00000000000007ffc5e73c447befp+0, false, false, + 0x1.00000000000007ffc5e73c447befp+0, false, false, + 0x1.00000000000007ffc5e73c447befp+0, false, false, + 0x1.00000000000007ffc5e73c447bfp+0, false, false), TEST ("1.0000000000000001111", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - 0x1.00000000000008p+0, false, - 0x1.0000000000000802p+0, false, - false, - 0x1.00000000000008016eea8f26c48p+0, false, - 0x1.00000000000008016eea8f26c48p+0, false, - 0x1.00000000000008016eea8f26c48p+0, false, - 0x1.00000000000008016eea8f26c5p+0, false, - false, - 0x1.00000000000008016eea8f26c495p+0, false, - 0x1.00000000000008016eea8f26c496p+0, false, - 0x1.00000000000008016eea8f26c495p+0, false, - 0x1.00000000000008016eea8f26c496p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.0000000000000802p+0, false, false, + false, + 0x1.00000000000008016eea8f26c48p+0, false, false, + 0x1.00000000000008016eea8f26c48p+0, false, false, + 0x1.00000000000008016eea8f26c48p+0, false, false, + 0x1.00000000000008016eea8f26c5p+0, false, false, + false, + 0x1.00000000000008016eea8f26c495p+0, false, false, + 0x1.00000000000008016eea8f26c496p+0, false, false, + 0x1.00000000000008016eea8f26c495p+0, false, false, + 0x1.00000000000008016eea8f26c496p+0, false, false), TEST ("1.000000000000000111", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000007fep+0, false, - 0x1.00000000000008p+0, false, - false, - 0x1.00000000000007ff96adfa2b57p+0, false, - 0x1.00000000000007ff96adfa2b578p+0, false, - 0x1.00000000000007ff96adfa2b57p+0, false, - 0x1.00000000000007ff96adfa2b578p+0, false, - false, - 0x1.00000000000007ff96adfa2b576ap+0, false, - 0x1.00000000000007ff96adfa2b576bp+0, false, - 0x1.00000000000007ff96adfa2b576ap+0, false, - 0x1.00000000000007ff96adfa2b576bp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000007fep+0, false, false, + 0x1.00000000000008p+0, false, false, + false, + 0x1.00000000000007ff96adfa2b57p+0, false, false, + 0x1.00000000000007ff96adfa2b578p+0, false, false, + 0x1.00000000000007ff96adfa2b57p+0, false, false, + 0x1.00000000000007ff96adfa2b578p+0, false, false, + false, + 0x1.00000000000007ff96adfa2b576ap+0, false, false, + 0x1.00000000000007ff96adfa2b576bp+0, false, false, + 0x1.00000000000007ff96adfa2b576ap+0, false, false, + 0x1.00000000000007ff96adfa2b576bp+0, false, false), TEST ("1.00000000000000011", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.00000000000007ecp+0, false, - 0x1.00000000000007eep+0, false, - 0x1.00000000000007ecp+0, false, - 0x1.00000000000007eep+0, false, - false, - 0x1.00000000000007ecp+0, false, - 0x1.00000000000007eep+0, false, - 0x1.00000000000007ecp+0, false, - 0x1.00000000000007eep+0, false, - false, - 0x1.00000000000007ed24502859138p+0, false, - 0x1.00000000000007ed24502859138p+0, false, - 0x1.00000000000007ed24502859138p+0, false, - 0x1.00000000000007ed2450285914p+0, false, - false, - 0x1.00000000000007ed2450285913bfp+0, false, - 0x1.00000000000007ed2450285913bfp+0, false, - 0x1.00000000000007ed2450285913bfp+0, false, - 0x1.00000000000007ed2450285913cp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.00000000000007ecp+0, false, false, + 0x1.00000000000007eep+0, false, false, + 0x1.00000000000007ecp+0, false, false, + 0x1.00000000000007eep+0, false, false, + false, + 0x1.00000000000007ecp+0, false, false, + 0x1.00000000000007eep+0, false, false, + 0x1.00000000000007ecp+0, false, false, + 0x1.00000000000007eep+0, false, false, + false, + 0x1.00000000000007ed24502859138p+0, false, false, + 0x1.00000000000007ed24502859138p+0, false, false, + 0x1.00000000000007ed24502859138p+0, false, false, + 0x1.00000000000007ed2450285914p+0, false, false, + false, + 0x1.00000000000007ed2450285913bfp+0, false, false, + 0x1.00000000000007ed2450285913bfp+0, false, false, + 0x1.00000000000007ed2450285913bfp+0, false, false, + 0x1.00000000000007ed2450285913cp+0, false, false), TEST ("1.0000000000000001", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000736p+0, false, - false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000734p+0, false, - 0x1.0000000000000736p+0, false, - false, - 0x1.0000000000000734aca5f6226fp+0, false, - 0x1.0000000000000734aca5f6226fp+0, false, - 0x1.0000000000000734aca5f6226fp+0, false, - 0x1.0000000000000734aca5f6226f8p+0, false, - false, - 0x1.0000000000000734aca5f6226f0ap+0, false, - 0x1.0000000000000734aca5f6226f0bp+0, false, - 0x1.0000000000000734aca5f6226f0ap+0, false, - 0x1.0000000000000734aca5f6226f0bp+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000736p+0, false, false, + false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000734p+0, false, false, + 0x1.0000000000000736p+0, false, false, + false, + 0x1.0000000000000734aca5f6226fp+0, false, false, + 0x1.0000000000000734aca5f6226fp+0, false, false, + 0x1.0000000000000734aca5f6226fp+0, false, false, + 0x1.0000000000000734aca5f6226f8p+0, false, false, + false, + 0x1.0000000000000734aca5f6226f0ap+0, false, false, + 0x1.0000000000000734aca5f6226f0bp+0, false, false, + 0x1.0000000000000734aca5f6226f0ap+0, false, false, + 0x1.0000000000000734aca5f6226f0bp+0, false, false), TEST ("3929201589819414e-25", false, - 0x1.b0053p-32, false, - 0x1.b00532p-32, false, - 0x1.b0053p-32, false, - 0x1.b00532p-32, false, - false, - 0x1.b005314e2421ep-32, false, - 0x1.b005314e2421ep-32, false, - 0x1.b005314e2421ep-32, false, - 0x1.b005314e2421fp-32, false, - false, - 0x1.b005314e2421e7fep-32, false, - 0x1.b005314e2421e8p-32, false, - 0x1.b005314e2421e7fep-32, false, - 0x1.b005314e2421e8p-32, false, - false, - 0x1.b005314e2421e7fep-32, false, - 0x1.b005314e2421e8p-32, false, - 0x1.b005314e2421e7fep-32, false, - 0x1.b005314e2421e8p-32, false, - false, - 0x1.b005314e2421e7ffb472840c5ap-32, false, - 0x1.b005314e2421e7ffb472840c5a8p-32, false, - 0x1.b005314e2421e7ffb472840c5ap-32, false, - 0x1.b005314e2421e7ffb472840c5a8p-32, false, - false, - 0x1.b005314e2421e7ffb472840c5a6ep-32, false, - 0x1.b005314e2421e7ffb472840c5a6fp-32, false, - 0x1.b005314e2421e7ffb472840c5a6ep-32, false, - 0x1.b005314e2421e7ffb472840c5a6fp-32, false), + 0x1.b0053p-32, false, false, + 0x1.b00532p-32, false, false, + 0x1.b0053p-32, false, false, + 0x1.b00532p-32, false, false, + false, + 0x1.b005314e2421ep-32, false, false, + 0x1.b005314e2421ep-32, false, false, + 0x1.b005314e2421ep-32, false, false, + 0x1.b005314e2421fp-32, false, false, + false, + 0x1.b005314e2421e7fep-32, false, false, + 0x1.b005314e2421e8p-32, false, false, + 0x1.b005314e2421e7fep-32, false, false, + 0x1.b005314e2421e8p-32, false, false, + false, + 0x1.b005314e2421e7fep-32, false, false, + 0x1.b005314e2421e8p-32, false, false, + 0x1.b005314e2421e7fep-32, false, false, + 0x1.b005314e2421e8p-32, false, false, + false, + 0x1.b005314e2421e7ffb472840c5ap-32, false, false, + 0x1.b005314e2421e7ffb472840c5a8p-32, false, false, + 0x1.b005314e2421e7ffb472840c5ap-32, false, false, + 0x1.b005314e2421e7ffb472840c5a8p-32, false, false, + false, + 0x1.b005314e2421e7ffb472840c5a6ep-32, false, false, + 0x1.b005314e2421e7ffb472840c5a6fp-32, false, false, + 0x1.b005314e2421e7ffb472840c5a6ep-32, false, false, + 0x1.b005314e2421e7ffb472840c5a6fp-32, false, false), TEST ("0.0000000000000000000000000000000000000000000021019476964872" "256063855943749348741969203929128147736576356024258346866240" "28790902229957282543182373046875", false, - 0x8p-152, false, - 0x1p-148, false, - 0x8p-152, false, - 0x1p-148, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false), + 0x8p-152, false, true, + 0x1p-148, false, true, + 0x8p-152, false, true, + 0x1p-148, false, true, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false), TEST ("1.00000005960464477539062499", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000000fffffffp+0, false, - 0x1.000001p+0, false, - 0x1.000000fffffffp+0, false, - 0x1.000001p+0, false, - false, - 0x1.000000fffffffffep+0, false, - 0x1.000001p+0, false, - 0x1.000000fffffffffep+0, false, - 0x1.000001p+0, false, - false, - 0x1.000000fffffffffep+0, false, - 0x1.000001p+0, false, - 0x1.000000fffffffffep+0, false, - 0x1.000001p+0, false, - false, - 0x1.000000fffffffffffffffce7b78p+0, false, - 0x1.000000fffffffffffffffce7b8p+0, false, - 0x1.000000fffffffffffffffce7b78p+0, false, - 0x1.000000fffffffffffffffce7b8p+0, false, - false, - 0x1.000000fffffffffffffffce7b7e7p+0, false, - 0x1.000000fffffffffffffffce7b7e7p+0, false, - 0x1.000000fffffffffffffffce7b7e7p+0, false, - 0x1.000000fffffffffffffffce7b7e8p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000000fffffffp+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000000fffffffp+0, false, false, + 0x1.000001p+0, false, false, + false, + 0x1.000000fffffffffep+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000000fffffffffep+0, false, false, + 0x1.000001p+0, false, false, + false, + 0x1.000000fffffffffep+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000000fffffffffep+0, false, false, + 0x1.000001p+0, false, false, + false, + 0x1.000000fffffffffffffffce7b78p+0, false, false, + 0x1.000000fffffffffffffffce7b8p+0, false, false, + 0x1.000000fffffffffffffffce7b78p+0, false, false, + 0x1.000000fffffffffffffffce7b8p+0, false, false, + false, + 0x1.000000fffffffffffffffce7b7e7p+0, false, false, + 0x1.000000fffffffffffffffce7b7e7p+0, false, false, + 0x1.000000fffffffffffffffce7b7e7p+0, false, false, + 0x1.000000fffffffffffffffce7b7e8p+0, false, false), TEST ("1.000000059604644775390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - true, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + true, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false), TEST ("1.00000005960464477539062501", false, - 0x1p+0, false, - 0x1.000002p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000001p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000000002p+0, false, - false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.000001p+0, false, - 0x1.0000010000000002p+0, false, - false, - 0x1.00000100000000000000031848p+0, false, - 0x1.00000100000000000000031848p+0, false, - 0x1.00000100000000000000031848p+0, false, - 0x1.000001000000000000000318488p+0, false, - false, - 0x1.0000010000000000000003184818p+0, false, - 0x1.0000010000000000000003184819p+0, false, - 0x1.0000010000000000000003184818p+0, false, - 0x1.0000010000000000000003184819p+0, false), + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000001p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000000002p+0, false, false, + false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.000001p+0, false, false, + 0x1.0000010000000002p+0, false, false, + false, + 0x1.00000100000000000000031848p+0, false, false, + 0x1.00000100000000000000031848p+0, false, false, + 0x1.00000100000000000000031848p+0, false, false, + 0x1.000001000000000000000318488p+0, false, false, + false, + 0x1.0000010000000000000003184818p+0, false, false, + 0x1.0000010000000000000003184819p+0, false, false, + 0x1.0000010000000000000003184818p+0, false, false, + 0x1.0000010000000000000003184819p+0, false, false), TEST ("1.00000011920928955078125", true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false), + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false), TEST ("1.00000017881393432617187499", false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - false, - 0x1.000002fffffffp+0, false, - 0x1.000003p+0, false, - 0x1.000002fffffffp+0, false, - 0x1.000003p+0, false, - false, - 0x1.000002fffffffffep+0, false, - 0x1.000003p+0, false, - 0x1.000002fffffffffep+0, false, - 0x1.000003p+0, false, - false, - 0x1.000002fffffffffep+0, false, - 0x1.000003p+0, false, - 0x1.000002fffffffffep+0, false, - 0x1.000003p+0, false, - false, - 0x1.000002fffffffffffffffce7b78p+0, false, - 0x1.000002fffffffffffffffce7b8p+0, false, - 0x1.000002fffffffffffffffce7b78p+0, false, - 0x1.000002fffffffffffffffce7b8p+0, false, - false, - 0x1.000002fffffffffffffffce7b7e7p+0, false, - 0x1.000002fffffffffffffffce7b7e7p+0, false, - 0x1.000002fffffffffffffffce7b7e7p+0, false, - 0x1.000002fffffffffffffffce7b7e8p+0, false), + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + false, + 0x1.000002fffffffp+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000002fffffffp+0, false, false, + 0x1.000003p+0, false, false, + false, + 0x1.000002fffffffffep+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000002fffffffffep+0, false, false, + 0x1.000003p+0, false, false, + false, + 0x1.000002fffffffffep+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000002fffffffffep+0, false, false, + 0x1.000003p+0, false, false, + false, + 0x1.000002fffffffffffffffce7b78p+0, false, false, + 0x1.000002fffffffffffffffce7b8p+0, false, false, + 0x1.000002fffffffffffffffce7b78p+0, false, false, + 0x1.000002fffffffffffffffce7b8p+0, false, false, + false, + 0x1.000002fffffffffffffffce7b7e7p+0, false, false, + 0x1.000002fffffffffffffffce7b7e7p+0, false, false, + 0x1.000002fffffffffffffffce7b7e7p+0, false, false, + 0x1.000002fffffffffffffffce7b7e8p+0, false, false), TEST ("1.000000178813934326171875", false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - true, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false), + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + true, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false), TEST ("1.00000017881393432617187501", false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - 0x1.000002p+0, false, - 0x1.000004p+0, false, - false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.0000030000001p+0, false, - false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.0000030000000002p+0, false, - false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.000003p+0, false, - 0x1.0000030000000002p+0, false, - false, - 0x1.00000300000000000000031848p+0, false, - 0x1.00000300000000000000031848p+0, false, - 0x1.00000300000000000000031848p+0, false, - 0x1.000003000000000000000318488p+0, false, - false, - 0x1.0000030000000000000003184818p+0, false, - 0x1.0000030000000000000003184819p+0, false, - 0x1.0000030000000000000003184818p+0, false, - 0x1.0000030000000000000003184819p+0, false), + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000002p+0, false, false, + 0x1.000004p+0, false, false, + false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.0000030000001p+0, false, false, + false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.0000030000000002p+0, false, false, + false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.000003p+0, false, false, + 0x1.0000030000000002p+0, false, false, + false, + 0x1.00000300000000000000031848p+0, false, false, + 0x1.00000300000000000000031848p+0, false, false, + 0x1.00000300000000000000031848p+0, false, false, + 0x1.000003000000000000000318488p+0, false, false, + false, + 0x1.0000030000000000000003184818p+0, false, false, + 0x1.0000030000000000000003184819p+0, false, false, + 0x1.0000030000000000000003184818p+0, false, false, + 0x1.0000030000000000000003184819p+0, false, false), TEST ("1.0000002384185791015625", true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - true, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false, - 0x1.000004p+0, false), + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + true, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false, + 0x1.000004p+0, false, false), TEST ("1.08420217248550443400745280086994171142578125e-19", true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - true, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false), + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + true, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false), TEST ("1.0842022371089897897127399001987457793916291848290711641311" "645507812499e-19", false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2.000004p-64, false, - false, - 0x2.000001ffffffep-64, false, - 0x2.000002p-64, false, - 0x2.000001ffffffep-64, false, - 0x2.000002p-64, false, - false, - 0x2.000001fffffffffcp-64, false, - 0x2.000002p-64, false, - 0x2.000001fffffffffcp-64, false, - 0x2.000002p-64, false, - false, - 0x2.000001fffffffffcp-64, false, - 0x2.000002p-64, false, - 0x2.000001fffffffffcp-64, false, - 0x2.000002p-64, false, - false, - 0x2.000001ffffffffffffffffffffp-64, false, - 0x2.000002p-64, false, - 0x2.000001ffffffffffffffffffffp-64, false, - 0x2.000002p-64, false, - false, - 0x2.000001fffffffffffffffffffffep-64, false, - 0x2.000002p-64, false, - 0x2.000001fffffffffffffffffffffep-64, false, - 0x2.000002p-64, false), + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2.000004p-64, false, false, + false, + 0x2.000001ffffffep-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001ffffffep-64, false, false, + 0x2.000002p-64, false, false, + false, + 0x2.000001fffffffffcp-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001fffffffffcp-64, false, false, + 0x2.000002p-64, false, false, + false, + 0x2.000001fffffffffcp-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001fffffffffcp-64, false, false, + 0x2.000002p-64, false, false, + false, + 0x2.000001ffffffffffffffffffffp-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001ffffffffffffffffffffp-64, false, false, + 0x2.000002p-64, false, false, + false, + 0x2.000001fffffffffffffffffffffep-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000001fffffffffffffffffffffep-64, false, false, + 0x2.000002p-64, false, false), TEST ("1.0842022371089897897127399001987457793916291848290711641311" "6455078125e-19", false, - 0x2p-64, false, - 0x2p-64, false, - 0x2p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - true, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false), + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + true, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false), TEST ("1.0842022371089897897127399001987457793916291848290711641311" "645507812501e-19", false, - 0x2p-64, false, - 0x2.000004p-64, false, - 0x2p-64, false, - 0x2.000004p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.0000020000002p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.0000020000000004p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.0000020000000004p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.00000200000000000000000001p-64, false, - false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.000002p-64, false, - 0x2.0000020000000000000000000002p-64, false), + 0x2p-64, false, false, + 0x2.000004p-64, false, false, + 0x2p-64, false, false, + 0x2.000004p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.0000020000002p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.0000020000000004p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.0000020000000004p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.00000200000000000000000001p-64, false, false, + false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.000002p-64, false, false, + 0x2.0000020000000000000000000002p-64, false, false), TEST ("1.0842023017324751454180269995275498473574771196581423282623" "291015625e-19", true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - true, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false), + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + true, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false), TEST ("1.0842023663559605011233140988563539153233250544872134923934" "936523437499e-19", false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - false, - 0x2.000005ffffffep-64, false, - 0x2.000006p-64, false, - 0x2.000005ffffffep-64, false, - 0x2.000006p-64, false, - false, - 0x2.000005fffffffffcp-64, false, - 0x2.000006p-64, false, - 0x2.000005fffffffffcp-64, false, - 0x2.000006p-64, false, - false, - 0x2.000005fffffffffcp-64, false, - 0x2.000006p-64, false, - 0x2.000005fffffffffcp-64, false, - 0x2.000006p-64, false, - false, - 0x2.000005ffffffffffffffffffffp-64, false, - 0x2.000006p-64, false, - 0x2.000005ffffffffffffffffffffp-64, false, - 0x2.000006p-64, false, - false, - 0x2.000005fffffffffffffffffffffep-64, false, - 0x2.000006p-64, false, - 0x2.000005fffffffffffffffffffffep-64, false, - 0x2.000006p-64, false), + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + false, + 0x2.000005ffffffep-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005ffffffep-64, false, false, + 0x2.000006p-64, false, false, + false, + 0x2.000005fffffffffcp-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005fffffffffcp-64, false, false, + 0x2.000006p-64, false, false, + false, + 0x2.000005fffffffffcp-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005fffffffffcp-64, false, false, + 0x2.000006p-64, false, false, + false, + 0x2.000005ffffffffffffffffffffp-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005ffffffffffffffffffffp-64, false, false, + 0x2.000006p-64, false, false, + false, + 0x2.000005fffffffffffffffffffffep-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000005fffffffffffffffffffffep-64, false, false, + 0x2.000006p-64, false, false), TEST ("1.0842023663559605011233140988563539153233250544872134923934" "9365234375e-19", false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - true, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false), + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + true, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false), TEST ("1.0842023663559605011233140988563539153233250544872134923934" "936523437501e-19", false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - 0x2.000004p-64, false, - 0x2.000008p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.0000060000002p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.0000060000000004p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.0000060000000004p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.00000600000000000000000001p-64, false, - false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.000006p-64, false, - 0x2.0000060000000000000000000002p-64, false), + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000004p-64, false, false, + 0x2.000008p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.0000060000002p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.0000060000000004p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.0000060000000004p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.00000600000000000000000001p-64, false, false, + false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.000006p-64, false, false, + 0x2.0000060000000000000000000002p-64, false, false), TEST ("1.0842024309794458568286011981851579832891729893162846565246" "58203125e-19", true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - true, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false, - 0x2.000008p-64, false), + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + true, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false, + 0x2.000008p-64, false, false), TEST ("7.5231638452626400509999138382223723380394595633413601376560" "1092018187046051025390625e-37", true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - true, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false), + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + true, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false), TEST ("7.5231642936781486349413765338158389908126215730251815381410" "578824437213052434003657253924757242202758789062499e-37", false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1.000002p-120, false, - false, - 0x1.000000fffffffp-120, false, - 0x1.000001p-120, false, - 0x1.000000fffffffp-120, false, - 0x1.000001p-120, false, - false, - 0x1.000000fffffffffep-120, false, - 0x1.000001p-120, false, - 0x1.000000fffffffffep-120, false, - 0x1.000001p-120, false, - false, - 0x1.000000fffffffffep-120, false, - 0x1.000001p-120, false, - 0x1.000000fffffffffep-120, false, - 0x1.000001p-120, false, - false, - 0x1.000000ffffffffffffffffffff8p-120, false, - 0x1.000001p-120, false, - 0x1.000000ffffffffffffffffffff8p-120, false, - 0x1.000001p-120, false, - false, - 0x1.000000ffffffffffffffffffffffp-120, false, - 0x1.000001p-120, false, - 0x1.000000ffffffffffffffffffffffp-120, false, - 0x1.000001p-120, false), + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1.000002p-120, false, false, + false, + 0x1.000000fffffffp-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000fffffffp-120, false, false, + 0x1.000001p-120, false, false, + false, + 0x1.000000fffffffffep-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000fffffffffep-120, false, false, + 0x1.000001p-120, false, false, + false, + 0x1.000000fffffffffep-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000fffffffffep-120, false, false, + 0x1.000001p-120, false, false, + false, + 0x1.000000ffffffffffffffffffff8p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000ffffffffffffffffffff8p-120, false, false, + 0x1.000001p-120, false, false, + false, + 0x1.000000ffffffffffffffffffffffp-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000000ffffffffffffffffffffffp-120, false, false, + 0x1.000001p-120, false, false), TEST ("7.5231642936781486349413765338158389908126215730251815381410" "5788244372130524340036572539247572422027587890625e-37", false, - 0x1p-120, false, - 0x1p-120, false, - 0x1p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - true, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false), + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + true, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false), TEST ("7.5231642936781486349413765338158389908126215730251815381410" "578824437213052434003657253924757242202758789062501e-37", false, - 0x1p-120, false, - 0x1.000002p-120, false, - 0x1p-120, false, - 0x1.000002p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.0000010000001p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.0000010000000002p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.0000010000000002p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001000000000000000000008p-120, false, - false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.000001p-120, false, - 0x1.0000010000000000000000000001p-120, false), + 0x1p-120, false, false, + 0x1.000002p-120, false, false, + 0x1p-120, false, false, + 0x1.000002p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.0000010000001p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.0000010000000002p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.0000010000000002p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001000000000000000000008p-120, false, false, + false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.000001p-120, false, false, + 0x1.0000010000000000000000000001p-120, false, false), TEST ("7.5231647420936572188828392294093056435857835827090029386261" "048447055721499765468252007849514484405517578125e-37", true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - true, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false), + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + true, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false), TEST ("7.5231651905091658028243019250027722963589455923928243391111" "518069674229947096932846761774271726608276367187499e-37", false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - false, - 0x1.000002fffffffp-120, false, - 0x1.000003p-120, false, - 0x1.000002fffffffp-120, false, - 0x1.000003p-120, false, - false, - 0x1.000002fffffffffep-120, false, - 0x1.000003p-120, false, - 0x1.000002fffffffffep-120, false, - 0x1.000003p-120, false, - false, - 0x1.000002fffffffffep-120, false, - 0x1.000003p-120, false, - 0x1.000002fffffffffep-120, false, - 0x1.000003p-120, false, - false, - 0x1.000002ffffffffffffffffffff8p-120, false, - 0x1.000003p-120, false, - 0x1.000002ffffffffffffffffffff8p-120, false, - 0x1.000003p-120, false, - false, - 0x1.000002ffffffffffffffffffffffp-120, false, - 0x1.000003p-120, false, - 0x1.000002ffffffffffffffffffffffp-120, false, - 0x1.000003p-120, false), + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + false, + 0x1.000002fffffffp-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002fffffffp-120, false, false, + 0x1.000003p-120, false, false, + false, + 0x1.000002fffffffffep-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002fffffffffep-120, false, false, + 0x1.000003p-120, false, false, + false, + 0x1.000002fffffffffep-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002fffffffffep-120, false, false, + 0x1.000003p-120, false, false, + false, + 0x1.000002ffffffffffffffffffff8p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002ffffffffffffffffffff8p-120, false, false, + 0x1.000003p-120, false, false, + false, + 0x1.000002ffffffffffffffffffffffp-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000002ffffffffffffffffffffffp-120, false, false, + 0x1.000003p-120, false, false), TEST ("7.5231651905091658028243019250027722963589455923928243391111" "5180696742299470969328467617742717266082763671875e-37", false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - true, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false), + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + true, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false), TEST ("7.5231651905091658028243019250027722963589455923928243391111" "518069674229947096932846761774271726608276367187501e-37", false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - 0x1.000002p-120, false, - 0x1.000004p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.0000030000001p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.0000030000000002p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.0000030000000002p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003000000000000000000008p-120, false, - false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.000003p-120, false, - 0x1.0000030000000000000000000001p-120, false), + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000002p-120, false, false, + 0x1.000004p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.0000030000001p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.0000030000000002p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.0000030000000002p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003000000000000000000008p-120, false, false, + false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.000003p-120, false, false, + 0x1.0000030000000000000000000001p-120, false, false), TEST ("7.5231656389246743867657646205962389491321076020766457395961" "98769229273839442839744151569902896881103515625e-37", true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - true, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false, - 0x1.000004p-120, false), + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + true, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false, + 0x1.000004p-120, false, false), TEST ("340282356779733661637539395458142568447.999", false, - 0xf.fffffp+124, false, - 0xf.fffffp+124, false, - 0xf.fffffp+124, false, - INF, true, - false, - 0xf.fffff7ffffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7ffffff8p+124, false, - 0xf.fffff8p+124, false, - false, - 0xf.fffff7fffffffffp+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7fffffffffp+124, false, - 0xf.fffff8p+124, false, - false, - 0xf.fffff7fffffffffp+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7fffffffffp+124, false, - 0xf.fffff8p+124, false, - false, - 0xf.fffff7fffffffffffffffffffcp+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7fffffffffffffffffffcp+124, false, - 0xf.fffff8p+124, false, - false, - 0xf.fffff7fffffffffffffffffffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff7fffffffffffffffffffff8p+124, false, - 0xf.fffff8p+124, false), + 0xf.fffffp+124, false, false, + 0xf.fffffp+124, false, false, + 0xf.fffffp+124, false, false, + INF, true, false, + false, + 0xf.fffff7ffffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7ffffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + false, + 0xf.fffff7fffffffffp+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7fffffffffp+124, false, false, + 0xf.fffff8p+124, false, false, + false, + 0xf.fffff7fffffffffp+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7fffffffffp+124, false, false, + 0xf.fffff8p+124, false, false, + false, + 0xf.fffff7fffffffffffffffffffcp+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7fffffffffffffffffffcp+124, false, false, + 0xf.fffff8p+124, false, false, + false, + 0xf.fffff7fffffffffffffffffffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff7fffffffffffffffffffff8p+124, false, false, + 0xf.fffff8p+124, false, false), TEST ("340282356779733661637539395458142568448", false, - 0xf.fffffp+124, false, - INF, true, - 0xf.fffffp+124, false, - INF, true, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - true, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false), + 0xf.fffffp+124, false, false, + INF, true, false, + 0xf.fffffp+124, false, false, + INF, true, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + true, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false), TEST ("340282356779733661637539395458142568448.001", false, - 0xf.fffffp+124, false, - INF, true, - 0xf.fffffp+124, false, - INF, true, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff80000008p+124, false, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8000000001p+124, false, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8000000001p+124, false, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff800000000000000000004p+124, false, - false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff8p+124, false, - 0xf.fffff80000000000000000000008p+124, false), + 0xf.fffffp+124, false, false, + INF, true, false, + 0xf.fffffp+124, false, false, + INF, true, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff80000008p+124, false, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8000000001p+124, false, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8000000001p+124, false, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff800000000000000000004p+124, false, false, + false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff8p+124, false, false, + 0xf.fffff80000000000000000000008p+124, false, false), TEST ("-340282356779733661637539395458142568447.999", false, - -INF, true, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7ffffff8p+124, false, - -0xf.fffff7ffffff8p+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7fffffffffp+124, false, - -0xf.fffff7fffffffffp+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7fffffffffp+124, false, - -0xf.fffff7fffffffffp+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7fffffffffffffffffffcp+124, false, - -0xf.fffff7fffffffffffffffffffcp+124, false, - false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff7fffffffffffffffffffff8p+124, false, - -0xf.fffff7fffffffffffffffffffff8p+124, false), + -INF, true, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7ffffff8p+124, false, false, + -0xf.fffff7ffffff8p+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7fffffffffp+124, false, false, + -0xf.fffff7fffffffffp+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7fffffffffp+124, false, false, + -0xf.fffff7fffffffffp+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7fffffffffffffffffffcp+124, false, false, + -0xf.fffff7fffffffffffffffffffcp+124, false, false, + false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff7fffffffffffffffffffff8p+124, false, false, + -0xf.fffff7fffffffffffffffffffff8p+124, false, false), TEST ("-340282356779733661637539395458142568448", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - true, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + true, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false), TEST ("-340282356779733661637539395458142568448.001", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - false, - -0xf.fffff80000008p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - false, - -0xf.fffff8000000001p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - false, - -0xf.fffff8000000001p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - false, - -0xf.fffff800000000000000000004p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - false, - -0xf.fffff80000000000000000000008p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false, - -0xf.fffff8p+124, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + false, + -0xf.fffff80000008p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + false, + -0xf.fffff8000000001p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + false, + -0xf.fffff8000000001p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + false, + -0xf.fffff800000000000000000004p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + false, + -0xf.fffff80000000000000000000008p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false, + -0xf.fffff8p+124, false, false), TEST ("179769313486231580793728971405303415079934132710037826936173" "778980444968292764750946649017977587207096330286416692887910" "946555547851940402630657488671505820681908902000708383676273" @@ -1855,35 +1855,35 @@ static const struct test tests[] = { "936475292719074168444365510704342711559699508093042880177904" "174497791.999", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, false, - 0xf.ffffffffffff8p+1020, false, - 0xf.ffffffffffff8p+1020, false, - INF, true, - false, - 0xf.ffffffffffffbffp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffbffp+1020, false, - 0xf.ffffffffffffcp+1020, false, - false, - 0xf.ffffffffffffbffp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffbffp+1020, false, - 0xf.ffffffffffffcp+1020, false, - false, - 0xf.ffffffffffffbffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffbffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, true, - false, - 0xf.ffffffffffffbffffffffffffff8p+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffbffffffffffffff8p+1020, false, - 0xf.ffffffffffffcp+1020, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, false, false, + 0xf.ffffffffffff8p+1020, false, false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + false, + 0xf.ffffffffffffbffp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffbffp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + false, + 0xf.ffffffffffffbffp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffbffp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + false, + 0xf.ffffffffffffbffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffbffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, true, false, + false, + 0xf.ffffffffffffbffffffffffffff8p+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffbffffffffffffff8p+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false), TEST ("179769313486231580793728971405303415079934132710037826936173" "778980444968292764750946649017977587207096330286416692887910" "946555547851940402630657488671505820681908902000708383676273" @@ -1891,35 +1891,35 @@ static const struct test tests[] = { "936475292719074168444365510704342711559699508093042880177904" "174497792", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, false, - INF, true, - 0xf.ffffffffffff8p+1020, false, - INF, true, - true, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - true, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - false, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - true, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + true, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + true, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + true, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false), TEST ("179769313486231580793728971405303415079934132710037826936173" "778980444968292764750946649017977587207096330286416692887910" "946555547851940402630657488671505820681908902000708383676273" @@ -1927,35 +1927,35 @@ static const struct test tests[] = { "936475292719074168444365510704342711559699508093042880177904" "174497792.001", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, false, - INF, true, - 0xf.ffffffffffff8p+1020, false, - INF, true, - false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffc01p+1020, false, - false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffc01p+1020, false, - false, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffcp+1020, true, - 0xf.ffffffffffffc0000000000004p+1020, true, - false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffcp+1020, false, - 0xf.ffffffffffffc000000000000008p+1020, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffc01p+1020, false, false, + false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffc01p+1020, false, false, + false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffcp+1020, true, false, + 0xf.ffffffffffffc0000000000004p+1020, true, false, + false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffcp+1020, false, false, + 0xf.ffffffffffffc000000000000008p+1020, false, false), TEST ("-17976931348623158079372897140530341507993413271003782693617" "377898044496829276475094664901797758720709633028641669288791" "094655554785194040263065748867150582068190890200070838367627" @@ -1963,35 +1963,35 @@ static const struct test tests[] = { "493647529271907416844436551070434271155969950809304288017790" "4174497791.999", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffbffp+1020, false, - -0xf.ffffffffffffbffp+1020, false, - false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffbffp+1020, false, - -0xf.ffffffffffffbffp+1020, false, - false, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffbffffffffffffcp+1020, false, - -0xf.ffffffffffffbffffffffffffcp+1020, false, - false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffbffffffffffffff8p+1020, false, - -0xf.ffffffffffffbffffffffffffff8p+1020, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffbffp+1020, false, false, + -0xf.ffffffffffffbffp+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffbffp+1020, false, false, + -0xf.ffffffffffffbffp+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffbffffffffffffcp+1020, false, false, + -0xf.ffffffffffffbffffffffffffcp+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffbffffffffffffff8p+1020, false, false, + -0xf.ffffffffffffbffffffffffffff8p+1020, false, false), TEST ("-17976931348623158079372897140530341507993413271003782693617" "377898044496829276475094664901797758720709633028641669288791" "094655554785194040263065748867150582068190890200070838367627" @@ -1999,35 +1999,35 @@ static const struct test tests[] = { "493647529271907416844436551070434271155969950809304288017790" "4174497792", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - true, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - true, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - false, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - true, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + true, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + true, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + true, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false), TEST ("-17976931348623158079372897140530341507993413271003782693617" "377898044496829276475094664901797758720709633028641669288791" "094655554785194040263065748867150582068190890200070838367627" @@ -2035,35 +2035,35 @@ static const struct test tests[] = { "493647529271907416844436551070434271155969950809304288017790" "4174497792.001", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - false, - -0xf.ffffffffffffc01p+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - false, - -0xf.ffffffffffffc01p+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - false, - -0xf.ffffffffffffc0000000000004p+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - -0xf.ffffffffffffcp+1020, true, - false, - -0xf.ffffffffffffc000000000000008p+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false, - -0xf.ffffffffffffcp+1020, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + false, + -0xf.ffffffffffffc01p+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + false, + -0xf.ffffffffffffc01p+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + false, + -0xf.ffffffffffffc0000000000004p+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + -0xf.ffffffffffffcp+1020, true, false, + false, + -0xf.ffffffffffffc000000000000008p+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false, + -0xf.ffffffffffffcp+1020, false, false), TEST ("118973149535723176505351158982948866796625400469556721895649" "927756249918185172720476044944290457046138433056764616744328" "666255526748948793023632513609765434237723241753648908036202" @@ -2148,35 +2148,35 @@ static const struct test tests[] = { "578031503869424406179027994752890226443351619365453243328968" "8740976918527.999", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - 0xf.fffffffffffffffp+16380, false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - 0xf.fffffffffffffffp+16380, false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffff7fffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff7fffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + 0xf.fffffffffffffffp+16380, false, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + 0xf.fffffffffffffffp+16380, false, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffff7fffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff7fffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false), TEST ("118973149535723176505351158982948866796625400469556721895649" "927756249918185172720476044944290457046138433056764616744328" "666255526748948793023632513609765434237723241753648908036202" @@ -2261,35 +2261,35 @@ static const struct test tests[] = { "578031503869424406179027994752890226443351619365453243328968" "8740976918528", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - true, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + true, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false), TEST ("118973149535723176505351158982948866796625400469556721895649" "927756249918185172720476044944290457046138433056764616744328" "666255526748948793023632513609765434237723241753648908036202" @@ -2374,35 +2374,35 @@ static const struct test tests[] = { "578031503869424406179027994752890226443351619365453243328968" "8740976918528.001", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8p+16380, false, - 0xf.fffffffffffffff8000000000008p+16380, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffff8000000000008p+16380, false, false), TEST ("-11897314953572317650535115898294886679662540046955672189564" "992775624991818517272047604494429045704613843305676461674432" "866625552674894879302363251360976543423772324175364890803620" @@ -2487,35 +2487,35 @@ static const struct test tests[] = { "557803150386942440617902799475289022644335161936545324332896" "88740976918527.999", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff7fffffffffff8p+16380, false, - -0xf.fffffffffffffff7fffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff7fffffffffff8p+16380, false, false, + -0xf.fffffffffffffff7fffffffffff8p+16380, false, false), TEST ("-11897314953572317650535115898294886679662540046955672189564" "992775624991818517272047604494429045704613843305676461674432" "866625552674894879302363251360976543423772324175364890803620" @@ -2600,35 +2600,35 @@ static const struct test tests[] = { "557803150386942440617902799475289022644335161936545324332896" "88740976918528", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - true, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + true, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false), TEST ("-11897314953572317650535115898294886679662540046955672189564" "992775624991818517272047604494429045704613843305676461674432" "866625552674894879302363251360976543423772324175364890803620" @@ -2713,35 +2713,35 @@ static const struct test tests[] = { "557803150386942440617902799475289022644335161936545324332896" "88740976918528.001", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -0xf.fffffffffffffff8000000000008p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false, - -0xf.fffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -0xf.fffffffffffffff8000000000008p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffff8p+16380, false, false), TEST ("118973149535723176508575932662800707347995686986910214150118" "685272271246896789803961473130416053705672050873552479421805" "932646640744124594447361172514341324846716679654551308018400" @@ -2826,35 +2826,35 @@ static const struct test tests[] = { "972233447491583165728635513802591543441145939539353470970452" "5536550715391.999", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false), TEST ("118973149535723176508575932662800707347995686986910214150118" "685272271246896789803961473130416053705672050873552479421805" "932646640744124594447361172514341324846716679654551308018400" @@ -2939,35 +2939,35 @@ static const struct test tests[] = { "972233447491583165728635513802591543441145939539353470970452" "5536550715392", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false), TEST ("118973149535723176508575932662800707347995686986910214150118" "685272271246896789803961473130416053705672050873552479421805" "932646640744124594447361172514341324846716679654551308018400" @@ -3052,35 +3052,35 @@ static const struct test tests[] = { "972233447491583165728635513802591543441145939539353470970452" "5536550715392.001", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false), TEST ("-11897314953572317650857593266280070734799568698691021415011" "868527227124689678980396147313041605370567205087355247942180" "593264664074412459444736117251434132484671667965455130801840" @@ -3165,35 +3165,35 @@ static const struct test tests[] = { "097223344749158316572863551380259154344114593953935347097045" "25536550715391.999", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -INF, true, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false), TEST ("-11897314953572317650857593266280070734799568698691021415011" "868527227124689678980396147313041605370567205087355247942180" "593264664074412459444736117251434132484671667965455130801840" @@ -3278,35 +3278,35 @@ static const struct test tests[] = { "097223344749158316572863551380259154344114593953935347097045" "25536550715392", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false), TEST ("-11897314953572317650857593266280070734799568698691021415011" "868527227124689678980396147313041605370567205087355247942180" "593264664074412459444736117251434132484671667965455130801840" @@ -3391,419 +3391,419 @@ static const struct test tests[] = { "097223344749158316572863551380259154344114593953935347097045" "25536550715392.001", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false), TEST ("2.1019476964872256063855943749348741969203929128147736576356" "0242583468662402879090222995728254318237304687499e-45", false, - 0x8p-152, false, - 0x8p-152, false, - 0x8p-152, false, - 0x1p-148, false, - false, - 0xb.ffffffffffff8p-152, false, - 0xcp-152, false, - 0xb.ffffffffffff8p-152, false, - 0xcp-152, false, - false, - 0xb.fffffffffffffffp-152, false, - 0xcp-152, false, - 0xb.fffffffffffffffp-152, false, - 0xcp-152, false, - false, - 0xb.fffffffffffffffp-152, false, - 0xcp-152, false, - 0xb.fffffffffffffffp-152, false, - 0xcp-152, false, - false, - 0xb.fffffffffffffffffffffffffcp-152, false, - 0xcp-152, false, - 0xb.fffffffffffffffffffffffffcp-152, false, - 0xcp-152, false, - false, - 0xb.fffffffffffffffffffffffffff8p-152, false, - 0xcp-152, false, - 0xb.fffffffffffffffffffffffffff8p-152, false, - 0xcp-152, false), + 0x8p-152, false, true, + 0x8p-152, false, true, + 0x8p-152, false, true, + 0x1p-148, false, true, + false, + 0xb.ffffffffffff8p-152, false, false, + 0xcp-152, false, false, + 0xb.ffffffffffff8p-152, false, false, + 0xcp-152, false, false, + false, + 0xb.fffffffffffffffp-152, false, false, + 0xcp-152, false, false, + 0xb.fffffffffffffffp-152, false, false, + 0xcp-152, false, false, + false, + 0xb.fffffffffffffffp-152, false, false, + 0xcp-152, false, false, + 0xb.fffffffffffffffp-152, false, false, + 0xcp-152, false, false, + false, + 0xb.fffffffffffffffffffffffffcp-152, false, false, + 0xcp-152, false, false, + 0xb.fffffffffffffffffffffffffcp-152, false, false, + 0xcp-152, false, false, + false, + 0xb.fffffffffffffffffffffffffff8p-152, false, false, + 0xcp-152, false, false, + 0xb.fffffffffffffffffffffffffff8p-152, false, false, + 0xcp-152, false, false), TEST ("2.1019476964872256063855943749348741969203929128147736576356" "02425834686624028790902229957282543182373046875e-45", false, - 0x8p-152, false, - 0x1p-148, false, - 0x8p-152, false, - 0x1p-148, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - true, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false), + 0x8p-152, false, true, + 0x1p-148, false, true, + 0x8p-152, false, true, + 0x1p-148, false, true, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + true, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false), TEST ("2.1019476964872256063855943749348741969203929128147736576356" "0242583468662402879090222995728254318237304687501e-45", false, - 0x8p-152, false, - 0x1p-148, false, - 0x8p-152, false, - 0x1p-148, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.0000000000008p-152, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.000000000000001p-152, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.000000000000001p-152, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.00000000000000000000000004p-152, false, - false, - 0xcp-152, false, - 0xcp-152, false, - 0xcp-152, false, - 0xc.0000000000000000000000000008p-152, false), + 0x8p-152, false, true, + 0x1p-148, false, true, + 0x8p-152, false, true, + 0x1p-148, false, true, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.0000000000008p-152, false, false, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.000000000000001p-152, false, false, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.000000000000001p-152, false, false, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.00000000000000000000000004p-152, false, false, + false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xcp-152, false, false, + 0xc.0000000000000000000000000008p-152, false, false), TEST ("-2.101947696487225606385594374934874196920392912814773657635" "60242583468662402879090222995728254318237304687499e-45", false, - -0x1p-148, false, - -0x8p-152, false, - -0x8p-152, false, - -0x8p-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.ffffffffffff8p-152, false, - -0xb.ffffffffffff8p-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.fffffffffffffffp-152, false, - -0xb.fffffffffffffffp-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.fffffffffffffffp-152, false, - -0xb.fffffffffffffffp-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.fffffffffffffffffffffffffcp-152, false, - -0xb.fffffffffffffffffffffffffcp-152, false, - false, - -0xcp-152, false, - -0xcp-152, false, - -0xb.fffffffffffffffffffffffffff8p-152, false, - -0xb.fffffffffffffffffffffffffff8p-152, false), + -0x1p-148, false, true, + -0x8p-152, false, true, + -0x8p-152, false, true, + -0x8p-152, false, true, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.ffffffffffff8p-152, false, false, + -0xb.ffffffffffff8p-152, false, false, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.fffffffffffffffp-152, false, false, + -0xb.fffffffffffffffp-152, false, false, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.fffffffffffffffp-152, false, false, + -0xb.fffffffffffffffp-152, false, false, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.fffffffffffffffffffffffffcp-152, false, false, + -0xb.fffffffffffffffffffffffffcp-152, false, false, + false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xb.fffffffffffffffffffffffffff8p-152, false, false, + -0xb.fffffffffffffffffffffffffff8p-152, false, false), TEST ("-2.101947696487225606385594374934874196920392912814773657635" "602425834686624028790902229957282543182373046875e-45", false, - -0x1p-148, false, - -0x1p-148, false, - -0x8p-152, false, - -0x8p-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - true, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false), + -0x1p-148, false, true, + -0x1p-148, false, true, + -0x8p-152, false, true, + -0x8p-152, false, true, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + true, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false), TEST ("-2.101947696487225606385594374934874196920392912814773657635" "60242583468662402879090222995728254318237304687501e-45", false, - -0x1p-148, false, - -0x1p-148, false, - -0x8p-152, false, - -0x8p-152, false, - false, - -0xc.0000000000008p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - false, - -0xc.000000000000001p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - false, - -0xc.000000000000001p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - false, - -0xc.00000000000000000000000004p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false, - false, - -0xc.0000000000000000000000000008p-152, false, - -0xcp-152, false, - -0xcp-152, false, - -0xcp-152, false), + -0x1p-148, false, true, + -0x1p-148, false, true, + -0x8p-152, false, true, + -0x8p-152, false, true, + false, + -0xc.0000000000008p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + false, + -0xc.000000000000001p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + false, + -0xc.000000000000001p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + false, + -0xc.00000000000000000000000004p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + false, + -0xc.0000000000000000000000000008p-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false, + -0xcp-152, false, false), TEST ("3.5032461608120426773093239582247903282006548546912894293926" "7070972447770671465150371659547090530395507812499e-45", false, - 0x1p-148, false, - 0x1p-148, false, - 0x1p-148, false, - 0x1.8p-148, false, - false, - 0x1.3ffffffffffffp-148, false, - 0x1.4p-148, false, - 0x1.3ffffffffffffp-148, false, - 0x1.4p-148, false, - false, - 0x1.3ffffffffffffffep-148, false, - 0x1.4p-148, false, - 0x1.3ffffffffffffffep-148, false, - 0x1.4p-148, false, - false, - 0x1.3ffffffffffffffep-148, false, - 0x1.4p-148, false, - 0x1.3ffffffffffffffep-148, false, - 0x1.4p-148, false, - false, - 0x1.3fffffffffffffffffffffffff8p-148, false, - 0x1.4p-148, false, - 0x1.3fffffffffffffffffffffffff8p-148, false, - 0x1.4p-148, false, - false, - 0x1.3fffffffffffffffffffffffffffp-148, false, - 0x1.4p-148, false, - 0x1.3fffffffffffffffffffffffffffp-148, false, - 0x1.4p-148, false), + 0x1p-148, false, true, + 0x1p-148, false, true, + 0x1p-148, false, true, + 0x1.8p-148, false, true, + false, + 0x1.3ffffffffffffp-148, false, false, + 0x1.4p-148, false, false, + 0x1.3ffffffffffffp-148, false, false, + 0x1.4p-148, false, false, + false, + 0x1.3ffffffffffffffep-148, false, false, + 0x1.4p-148, false, false, + 0x1.3ffffffffffffffep-148, false, false, + 0x1.4p-148, false, false, + false, + 0x1.3ffffffffffffffep-148, false, false, + 0x1.4p-148, false, false, + 0x1.3ffffffffffffffep-148, false, false, + 0x1.4p-148, false, false, + false, + 0x1.3fffffffffffffffffffffffff8p-148, false, false, + 0x1.4p-148, false, false, + 0x1.3fffffffffffffffffffffffff8p-148, false, false, + 0x1.4p-148, false, false, + false, + 0x1.3fffffffffffffffffffffffffffp-148, false, false, + 0x1.4p-148, false, false, + 0x1.3fffffffffffffffffffffffffffp-148, false, false, + 0x1.4p-148, false, false), TEST ("3.5032461608120426773093239582247903282006548546912894293926" "70709724477706714651503716595470905303955078125e-45", false, - 0x1p-148, false, - 0x1p-148, false, - 0x1p-148, false, - 0x1.8p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - true, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false), + 0x1p-148, false, true, + 0x1p-148, false, true, + 0x1p-148, false, true, + 0x1.8p-148, false, true, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + true, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false), TEST ("3.5032461608120426773093239582247903282006548546912894293926" "7070972447770671465150371659547090530395507812501e-45", false, - 0x1p-148, false, - 0x1.8p-148, false, - 0x1p-148, false, - 0x1.8p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4000000000001p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4000000000000002p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4000000000000002p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.400000000000000000000000008p-148, false, - false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4p-148, false, - 0x1.4000000000000000000000000001p-148, false), + 0x1p-148, false, true, + 0x1.8p-148, false, true, + 0x1p-148, false, true, + 0x1.8p-148, false, true, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4000000000001p-148, false, false, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4000000000000002p-148, false, false, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4000000000000002p-148, false, false, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.400000000000000000000000008p-148, false, false, + false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4p-148, false, false, + 0x1.4000000000000000000000000001p-148, false, false), TEST ("-3.503246160812042677309323958224790328200654854691289429392" "67070972447770671465150371659547090530395507812499e-45", false, - -0x1.8p-148, false, - -0x1p-148, false, - -0x1p-148, false, - -0x1p-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3ffffffffffffp-148, false, - -0x1.3ffffffffffffp-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3ffffffffffffffep-148, false, - -0x1.3ffffffffffffffep-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3ffffffffffffffep-148, false, - -0x1.3ffffffffffffffep-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3fffffffffffffffffffffffff8p-148, false, - -0x1.3fffffffffffffffffffffffff8p-148, false, - false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.3fffffffffffffffffffffffffffp-148, false, - -0x1.3fffffffffffffffffffffffffffp-148, false), + -0x1.8p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3ffffffffffffp-148, false, false, + -0x1.3ffffffffffffp-148, false, false, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3ffffffffffffffep-148, false, false, + -0x1.3ffffffffffffffep-148, false, false, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3ffffffffffffffep-148, false, false, + -0x1.3ffffffffffffffep-148, false, false, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3fffffffffffffffffffffffff8p-148, false, false, + -0x1.3fffffffffffffffffffffffff8p-148, false, false, + false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.3fffffffffffffffffffffffffffp-148, false, false, + -0x1.3fffffffffffffffffffffffffffp-148, false, false), TEST ("-3.503246160812042677309323958224790328200654854691289429392" "670709724477706714651503716595470905303955078125e-45", false, - -0x1.8p-148, false, - -0x1p-148, false, - -0x1p-148, false, - -0x1p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - true, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false), + -0x1.8p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + true, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false), TEST ("-3.503246160812042677309323958224790328200654854691289429392" "67070972447770671465150371659547090530395507812501e-45", false, - -0x1.8p-148, false, - -0x1.8p-148, false, - -0x1p-148, false, - -0x1p-148, false, - false, - -0x1.4000000000001p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - false, - -0x1.4000000000000002p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - false, - -0x1.4000000000000002p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - false, - -0x1.400000000000000000000000008p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - false, - -0x1.4000000000000000000000000001p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false, - -0x1.4p-148, false), + -0x1.8p-148, false, true, + -0x1.8p-148, false, true, + -0x1p-148, false, true, + -0x1p-148, false, true, + false, + -0x1.4000000000001p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + false, + -0x1.4000000000000002p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + false, + -0x1.4000000000000002p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + false, + -0x1.400000000000000000000000008p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + false, + -0x1.4000000000000000000000000001p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false, + -0x1.4p-148, false, false), TEST ("7.4109846876186981626485318930233205854758970392148714663837" "852375101326090531312779794975454245398856969484704316857659" "638998506553390969459816219401617281718945106978546710679176" @@ -3818,35 +3818,35 @@ static const struct test tests[] = { "337560846003984904972149117463085539556354188641513168478436" "31308023759629577398300170898437499e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-1076, false, - 0x4p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - false, - 0x5.fffffffffffffff8p-1076, false, - 0x6p-1076, false, - 0x5.fffffffffffffff8p-1076, false, - 0x6p-1076, false, - false, - 0x5.fffffffffffffff8p-1076, false, - 0x6p-1076, false, - 0x5.fffffffffffffff8p-1076, false, - 0x6p-1076, false, - false, - 0x4p-1076, false, - 0x4p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - false, - 0x5.fffffffffffffffffffffffffffcp-1076, false, - 0x6p-1076, false, - 0x5.fffffffffffffffffffffffffffcp-1076, false, - 0x6p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-1076, false, true, + 0x4p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + false, + 0x5.fffffffffffffff8p-1076, false, false, + 0x6p-1076, false, false, + 0x5.fffffffffffffff8p-1076, false, false, + 0x6p-1076, false, false, + false, + 0x5.fffffffffffffff8p-1076, false, false, + 0x6p-1076, false, false, + 0x5.fffffffffffffff8p-1076, false, false, + 0x6p-1076, false, false, + false, + 0x4p-1076, false, true, + 0x4p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + false, + 0x5.fffffffffffffffffffffffffffcp-1076, false, false, + 0x6p-1076, false, false, + 0x5.fffffffffffffffffffffffffffcp-1076, false, false, + 0x6p-1076, false, false), TEST ("7.4109846876186981626485318930233205854758970392148714663837" "852375101326090531312779794975454245398856969484704316857659" "638998506553390969459816219401617281718945106978546710679176" @@ -3861,35 +3861,35 @@ static const struct test tests[] = { "337560846003984904972149117463085539556354188641513168478436" "313080237596295773983001708984375e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-1076, false, - 0x8p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - true, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - true, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - false, - 0x4p-1076, false, - 0x8p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - true, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + true, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + true, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + false, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + true, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false), TEST ("7.4109846876186981626485318930233205854758970392148714663837" "852375101326090531312779794975454245398856969484704316857659" "638998506553390969459816219401617281718945106978546710679176" @@ -3904,35 +3904,35 @@ static const struct test tests[] = { "337560846003984904972149117463085539556354188641513168478436" "31308023759629577398300170898437501e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-1076, false, - 0x8p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6.0000000000000008p-1076, false, - false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6.0000000000000008p-1076, false, - false, - 0x4p-1076, false, - 0x8p-1076, false, - 0x4p-1076, false, - 0x8p-1076, false, - false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6p-1076, false, - 0x6.0000000000000000000000000004p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6.0000000000000008p-1076, false, false, + false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6.0000000000000008p-1076, false, false, + false, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + 0x4p-1076, false, true, + 0x8p-1076, false, true, + false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6p-1076, false, false, + 0x6.0000000000000000000000000004p-1076, false, false), TEST ("-7.410984687618698162648531893023320585475897039214871466383" "785237510132609053131277979497545424539885696948470431685765" "963899850655339096945981621940161728171894510697854671067917" @@ -3947,35 +3947,35 @@ static const struct test tests[] = { "433756084600398490497214911746308553955635418864151316847843" "631308023759629577398300170898437499e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x5.fffffffffffffff8p-1076, false, - -0x5.fffffffffffffff8p-1076, false, - false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x5.fffffffffffffff8p-1076, false, - -0x5.fffffffffffffff8p-1076, false, - false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x5.fffffffffffffffffffffffffffcp-1076, false, - -0x5.fffffffffffffffffffffffffffcp-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x5.fffffffffffffff8p-1076, false, false, + -0x5.fffffffffffffff8p-1076, false, false, + false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x5.fffffffffffffff8p-1076, false, false, + -0x5.fffffffffffffff8p-1076, false, false, + false, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x5.fffffffffffffffffffffffffffcp-1076, false, false, + -0x5.fffffffffffffffffffffffffffcp-1076, false, false), TEST ("-7.410984687618698162648531893023320585475897039214871466383" "785237510132609053131277979497545424539885696948470431685765" "963899850655339096945981621940161728171894510697854671067917" @@ -3990,35 +3990,35 @@ static const struct test tests[] = { "433756084600398490497214911746308553955635418864151316847843" "6313080237596295773983001708984375e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-1076, false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - true, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - true, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - false, - -0x8p-1076, false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - true, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-1076, false, true, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + true, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + true, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + false, + -0x8p-1076, false, true, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + true, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false), TEST ("-7.410984687618698162648531893023320585475897039214871466383" "785237510132609053131277979497545424539885696948470431685765" "963899850655339096945981621940161728171894510697854671067917" @@ -4033,35 +4033,35 @@ static const struct test tests[] = { "433756084600398490497214911746308553955635418864151316847843" "631308023759629577398300170898437501e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-1076, false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - false, - -0x6.0000000000000008p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - false, - -0x6.0000000000000008p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - false, - -0x8p-1076, false, - -0x8p-1076, false, - -0x4p-1076, false, - -0x4p-1076, false, - false, - -0x6.0000000000000000000000000004p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false, - -0x6p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-1076, false, true, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + false, + -0x6.0000000000000008p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + false, + -0x6.0000000000000008p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + false, + -0x8p-1076, false, true, + -0x8p-1076, false, true, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + false, + -0x6.0000000000000000000000000004p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false, + -0x6p-1076, false, false), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -4255,35 +4255,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "683343374046671669930219650268554687499e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x8p-16448, false, - 0x8p-16448, false, - 0x1p-16444, false, - false, - 0x8p-16448, false, - 0xcp-16448, false, - 0x8p-16448, false, - 0xcp-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0xb.fffffffffffcp-16448, false, - 0xcp-16448, false, - 0xb.fffffffffffcp-16448, false, - 0xcp-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x8p-16448, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + false, + 0x8p-16448, false, true, + 0xcp-16448, false, true, + 0x8p-16448, false, true, + 0xcp-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xb.fffffffffffcp-16448, false, true, + 0xcp-16448, false, true, + 0xb.fffffffffffcp-16448, false, true, + 0xcp-16448, false, true), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -4477,35 +4477,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "6833433740466716699302196502685546875e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x1p-16444, false, - 0x8p-16448, false, - 0x1p-16444, false, - true, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + true, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -4699,35 +4699,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "683343374046671669930219650268554687501e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x1p-16444, false, - 0x8p-16448, false, - 0x1p-16444, false, - false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0x1p-16444, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xc.000000000004p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + false, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0x1p-16444, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xc.000000000004p-16448, false, true), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -4921,35 +4921,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "2683343374046671669930219650268554687499e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0xcp-16448, false, - -0xcp-16448, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xb.fffffffffffcp-16448, false, - -0xb.fffffffffffcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xb.fffffffffffcp-16448, false, true, + -0xb.fffffffffffcp-16448, false, true), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -5143,35 +5143,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "26833433740466716699302196502685546875e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - true, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + true, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -5365,35 +5365,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "2683343374046671669930219650268554687501e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0x1p-16444, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0xc.000000000004p-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0x1p-16444, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0xc.000000000004p-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -5587,35 +5587,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "683343374046671669930219650268554687499e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x8p-16448, false, - 0x8p-16448, false, - 0x1p-16444, false, - false, - 0x8p-16448, false, - 0xcp-16448, false, - 0x8p-16448, false, - 0xcp-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0xb.fffffffffffcp-16448, false, - 0xcp-16448, false, - 0xb.fffffffffffcp-16448, false, - 0xcp-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x8p-16448, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + false, + 0x8p-16448, false, true, + 0xcp-16448, false, true, + 0x8p-16448, false, true, + 0xcp-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xb.fffffffffffcp-16448, false, true, + 0xcp-16448, false, true, + 0xb.fffffffffffcp-16448, false, true, + 0xcp-16448, false, true), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -5809,35 +5809,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "6833433740466716699302196502685546875e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x1p-16444, false, - 0x8p-16448, false, - 0x1p-16444, false, - true, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + true, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false, + 0xcp-16448, false, false), TEST ("5.4677992978237119037926089004291297245985762235403450155814" "707305425575329500966052143410629387408077958710210208052966" "529504784489330482549602621133847135082257338717668975178538" @@ -6031,35 +6031,35 @@ static const struct test tests[] = { "866268925981702690270202829595794350800918257913991744455922" "683343374046671669930219650268554687501e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x8p-16448, false, - 0x1p-16444, false, - 0x8p-16448, false, - 0x1p-16444, false, - false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0x1p-16444, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xcp-16448, false, - 0xc.000000000004p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + 0x8p-16448, false, true, + 0x1p-16444, false, true, + false, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0x1p-16444, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xcp-16448, false, true, + 0xc.000000000004p-16448, false, true), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -6253,35 +6253,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "2683343374046671669930219650268554687499e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0xcp-16448, false, - -0xcp-16448, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xb.fffffffffffcp-16448, false, - -0xb.fffffffffffcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xb.fffffffffffcp-16448, false, true, + -0xb.fffffffffffcp-16448, false, true), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -6475,35 +6475,35 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "26833433740466716699302196502685546875e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - true, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + true, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false, + -0xcp-16448, false, false), TEST ("-5.467799297823711903792608900429129724598576223540345015581" "470730542557532950096605214341062938740807795871021020805296" "652950478448933048254960262113384713508225733871766897517853" @@ -6697,630 +6697,630 @@ static const struct test tests[] = { "386626892598170269027020282959579435080091825791399174445592" "2683343374046671669930219650268554687501e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x1p-16444, false, - -0x1p-16444, false, - -0x8p-16448, false, - -0x8p-16448, false, - false, - -0x1p-16444, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0xc.000000000004p-16448, false, - -0xcp-16448, false, - -0xcp-16448, false, - -0xcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x1p-16444, false, true, + -0x1p-16444, false, true, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + false, + -0x1p-16444, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0xc.000000000004p-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true, + -0xcp-16448, false, true), TEST ("-0x0.7p-149", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - true, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false, - -0x3.8p-152, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + true, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false, + -0x3.8p-152, false, false), TEST ("-0x0.7p-1074", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - true, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false, - -0x1.cp-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + true, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false, + -0x1.cp-1076, false, false), TEST ("-0x0.7p-16445", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x3.8p-16448, false, - -0x3.8p-16448, false, - -0x3.8p-16448, false, - -0x3.8p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x3.8p-16448, false, false, + -0x3.8p-16448, false, false, + -0x3.8p-16448, false, false, + -0x3.8p-16448, false, false), TEST ("-0x0.7p-16494", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST ("0x1p-150", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false), TEST ("0x1p-1075", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false), TEST ("0x1p-16446", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - true, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + true, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false), TEST ("0x1p-16495", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16496, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16496, false, true), TEST ("-0x1p-150", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false), TEST ("-0x1p-1075", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false), TEST ("-0x1p-16446", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false), TEST ("-0x1p-16495", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST (".70064923216240853546186479164495807e-45", false, - 0x0p+0, false, - 0x8p-152, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000004p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000008p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000008p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.00000000000000000000000002p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000000000000000004p-152, false), + 0x0p+0, false, true, + 0x8p-152, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000004p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000008p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000008p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.00000000000000000000000002p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000000000000000004p-152, false, false), TEST ("7.0064923216240853546186479164495806564013097093825788587853" "4141944895541342930300743319094181060791015624e-46", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x3.ffffffffffffep-152, false, - 0x4p-152, false, - 0x3.ffffffffffffep-152, false, - 0x4p-152, false, - false, - 0x3.fffffffffffffffcp-152, false, - 0x4p-152, false, - 0x3.fffffffffffffffcp-152, false, - 0x4p-152, false, - false, - 0x3.fffffffffffffffcp-152, false, - 0x4p-152, false, - 0x3.fffffffffffffffcp-152, false, - 0x4p-152, false, - false, - 0x3.ffffffffffffffffffffffffffp-152, false, - 0x4p-152, false, - 0x3.ffffffffffffffffffffffffffp-152, false, - 0x4p-152, false, - false, - 0x3.fffffffffffffffffffffffffffep-152, false, - 0x4p-152, false, - 0x3.fffffffffffffffffffffffffffep-152, false, - 0x4p-152, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x3.ffffffffffffep-152, false, false, + 0x4p-152, false, false, + 0x3.ffffffffffffep-152, false, false, + 0x4p-152, false, false, + false, + 0x3.fffffffffffffffcp-152, false, false, + 0x4p-152, false, false, + 0x3.fffffffffffffffcp-152, false, false, + 0x4p-152, false, false, + false, + 0x3.fffffffffffffffcp-152, false, false, + 0x4p-152, false, false, + 0x3.fffffffffffffffcp-152, false, false, + 0x4p-152, false, false, + false, + 0x3.ffffffffffffffffffffffffffp-152, false, false, + 0x4p-152, false, false, + 0x3.ffffffffffffffffffffffffffp-152, false, false, + 0x4p-152, false, false, + false, + 0x3.fffffffffffffffffffffffffffep-152, false, false, + 0x4p-152, false, false, + 0x3.fffffffffffffffffffffffffffep-152, false, false, + 0x4p-152, false, false), TEST ("7.0064923216240853546186479164495806564013097093825788587853" "4141944895541342930300743319094181060791015625e-46", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - true, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + true, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false), TEST ("7.0064923216240853546186479164495806564013097093825788587853" "4141944895541342930300743319094181060791015626e-46", false, - 0x0p+0, false, - 0x8p-152, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000004p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000008p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000008p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.00000000000000000000000002p-152, false, - false, - 0x4p-152, false, - 0x4p-152, false, - 0x4p-152, false, - 0x4.0000000000000000000000000004p-152, false), + 0x0p+0, false, true, + 0x8p-152, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000004p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000008p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000008p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.00000000000000000000000002p-152, false, false, + false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4p-152, false, false, + 0x4.0000000000000000000000000004p-152, false, false), TEST ("-7.006492321624085354618647916449580656401309709382578858785" "34141944895541342930300743319094181060791015624e-46", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.ffffffffffffep-152, false, - -0x3.ffffffffffffep-152, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.fffffffffffffffcp-152, false, - -0x3.fffffffffffffffcp-152, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.fffffffffffffffcp-152, false, - -0x3.fffffffffffffffcp-152, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.ffffffffffffffffffffffffffp-152, false, - -0x3.ffffffffffffffffffffffffffp-152, false, - false, - -0x4p-152, false, - -0x4p-152, false, - -0x3.fffffffffffffffffffffffffffep-152, false, - -0x3.fffffffffffffffffffffffffffep-152, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.ffffffffffffep-152, false, false, + -0x3.ffffffffffffep-152, false, false, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.fffffffffffffffcp-152, false, false, + -0x3.fffffffffffffffcp-152, false, false, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.fffffffffffffffcp-152, false, false, + -0x3.fffffffffffffffcp-152, false, false, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.ffffffffffffffffffffffffffp-152, false, false, + -0x3.ffffffffffffffffffffffffffp-152, false, false, + false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x3.fffffffffffffffffffffffffffep-152, false, false, + -0x3.fffffffffffffffffffffffffffep-152, false, false), TEST ("-7.006492321624085354618647916449580656401309709382578858785" "34141944895541342930300743319094181060791015625e-46", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - true, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + true, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false), TEST ("-7.006492321624085354618647916449580656401309709382578858785" "34141944895541342930300743319094181060791015626e-46", false, - -0x8p-152, false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4.0000000000004p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - false, - -0x4.0000000000000008p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - false, - -0x4.0000000000000008p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - false, - -0x4.00000000000000000000000002p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false, - false, - -0x4.0000000000000000000000000004p-152, false, - -0x4p-152, false, - -0x4p-152, false, - -0x4p-152, false), + -0x8p-152, false, true, + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4.0000000000004p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + false, + -0x4.0000000000000008p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + false, + -0x4.0000000000000008p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + false, + -0x4.00000000000000000000000002p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + false, + -0x4.0000000000000000000000000004p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false, + -0x4p-152, false, false), TEST ("2.4703282292062327208828439643411068618252990130716238221279" "284125033775363510437593264991818081799618989828234772285886" "546332835517796989819938739800539093906315035659515570226392" @@ -7335,35 +7335,35 @@ static const struct test tests[] = { "779186948667994968324049705821028513185451396213837722826145" "437693412532098591327667236328124e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x1.fffffffffffffffep-1076, false, - 0x2p-1076, false, - 0x1.fffffffffffffffep-1076, false, - 0x2p-1076, false, - false, - 0x1.fffffffffffffffep-1076, false, - 0x2p-1076, false, - 0x1.fffffffffffffffep-1076, false, - 0x2p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x1.ffffffffffffffffffffffffffffp-1076, false, - 0x2p-1076, false, - 0x1.ffffffffffffffffffffffffffffp-1076, false, - 0x2p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffffffep-1076, false, false, + 0x2p-1076, false, false, + 0x1.fffffffffffffffep-1076, false, false, + 0x2p-1076, false, false, + false, + 0x1.fffffffffffffffep-1076, false, false, + 0x2p-1076, false, false, + 0x1.fffffffffffffffep-1076, false, false, + 0x2p-1076, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.ffffffffffffffffffffffffffffp-1076, false, false, + 0x2p-1076, false, false, + 0x1.ffffffffffffffffffffffffffffp-1076, false, false, + 0x2p-1076, false, false), TEST ("2.4703282292062327208828439643411068618252990130716238221279" "284125033775363510437593264991818081799618989828234772285886" "546332835517796989819938739800539093906315035659515570226392" @@ -7378,35 +7378,35 @@ static const struct test tests[] = { "779186948667994968324049705821028513185451396213837722826145" "437693412532098591327667236328125e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false), TEST ("2.4703282292062327208828439643411068618252990130716238221279" "284125033775363510437593264991818081799618989828234772285886" "546332835517796989819938739800539093906315035659515570226392" @@ -7421,35 +7421,35 @@ static const struct test tests[] = { "779186948667994968324049705821028513185451396213837722826145" "437693412532098591327667236328126e-324", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x4p-1076, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2.0000000000000004p-1076, false, - false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2.0000000000000004p-1076, false, - false, - 0x0p+0, false, - 0x4p-1076, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2p-1076, false, - 0x2.0000000000000000000000000002p-1076, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x4p-1076, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2.0000000000000004p-1076, false, false, + false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2.0000000000000004p-1076, false, false, + false, + 0x0p+0, false, true, + 0x4p-1076, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2p-1076, false, false, + 0x2.0000000000000000000000000002p-1076, false, false), TEST ("-2.470328229206232720882843964341106861825299013071623822127" "928412503377536351043759326499181808179961898982823477228588" "654633283551779698981993873980053909390631503565951557022639" @@ -7464,35 +7464,35 @@ static const struct test tests[] = { "477918694866799496832404970582102851318545139621383772282614" "5437693412532098591327667236328124e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x1.fffffffffffffffep-1076, false, - -0x1.fffffffffffffffep-1076, false, - false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x1.fffffffffffffffep-1076, false, - -0x1.fffffffffffffffep-1076, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x1.ffffffffffffffffffffffffffffp-1076, false, - -0x1.ffffffffffffffffffffffffffffp-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x1.fffffffffffffffep-1076, false, false, + -0x1.fffffffffffffffep-1076, false, false, + false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x1.fffffffffffffffep-1076, false, false, + -0x1.fffffffffffffffep-1076, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x1.ffffffffffffffffffffffffffffp-1076, false, false, + -0x1.ffffffffffffffffffffffffffffp-1076, false, false), TEST ("-2.470328229206232720882843964341106861825299013071623822127" "928412503377536351043759326499181808179961898982823477228588" "654633283551779698981993873980053909390631503565951557022639" @@ -7507,35 +7507,35 @@ static const struct test tests[] = { "477918694866799496832404970582102851318545139621383772282614" "5437693412532098591327667236328125e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false), TEST ("-2.470328229206232720882843964341106861825299013071623822127" "928412503377536351043759326499181808179961898982823477228588" "654633283551779698981993873980053909390631503565951557022639" @@ -7550,35 +7550,35 @@ static const struct test tests[] = { "477918694866799496832404970582102851318545139621383772282614" "5437693412532098591327667236328126e-324", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2.0000000000000004p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - false, - -0x2.0000000000000004p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - false, - -0x4p-1076, false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2.0000000000000000000000000002p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false, - -0x2p-1076, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2.0000000000000004p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + false, + -0x2.0000000000000004p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + false, + -0x4p-1076, false, true, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2.0000000000000000000000000002p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false, + -0x2p-1076, false, false), TEST ("1.8225997659412373012642029668097099081995254078467816718604" "902435141858443166988684047803543129136025986236736736017655" "509834928163110160849867540377949045027419112905889658392846" @@ -7772,35 +7772,35 @@ static const struct test tests[] = { "622089641993900896756734276531931450266972752637997248151974" "2277811246822238899767398834228515624e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x4p-16448, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x3.fffffffffffcp-16448, false, - 0x4p-16448, false, - 0x3.fffffffffffcp-16448, false, - 0x4p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x4p-16448, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x3.fffffffffffcp-16448, false, true, + 0x4p-16448, false, true, + 0x3.fffffffffffcp-16448, false, true, + 0x4p-16448, false, true), TEST ("1.8225997659412373012642029668097099081995254078467816718604" "902435141858443166988684047803543129136025986236736736017655" "509834928163110160849867540377949045027419112905889658392846" @@ -7994,35 +7994,35 @@ static const struct test tests[] = { "622089641993900896756734276531931450266972752637997248151974" "2277811246822238899767398834228515625e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - true, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + true, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false, + 0x4p-16448, false, false), TEST ("1.8225997659412373012642029668097099081995254078467816718604" "902435141858443166988684047803543129136025986236736736017655" "509834928163110160849867540377949045027419112905889658392846" @@ -8216,35 +8216,35 @@ static const struct test tests[] = { "622089641993900896756734276531931450266972752637997248151974" "2277811246822238899767398834228515626e-4951", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x8p-16448, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4p-16448, false, - 0x4.000000000004p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x8p-16448, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x4p-16448, false, true, + 0x4p-16448, false, true, + 0x4p-16448, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x4p-16448, false, true, + 0x4p-16448, false, true, + 0x4p-16448, false, true, + 0x4.000000000004p-16448, false, true), TEST ("-1.822599765941237301264202966809709908199525407846781671860" "490243514185844316698868404780354312913602598623673673601765" "550983492816311016084986754037794904502741911290588965839284" @@ -8438,35 +8438,35 @@ static const struct test tests[] = { "462208964199390089675673427653193145026697275263799724815197" "42277811246822238899767398834228515624e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x3.fffffffffffcp-16448, false, - -0x3.fffffffffffcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x3.fffffffffffcp-16448, false, true, + -0x3.fffffffffffcp-16448, false, true), TEST ("-1.822599765941237301264202966809709908199525407846781671860" "490243514185844316698868404780354312913602598623673673601765" "550983492816311016084986754037794904502741911290588965839284" @@ -8660,35 +8660,35 @@ static const struct test tests[] = { "462208964199390089675673427653193145026697275263799724815197" "42277811246822238899767398834228515625e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false, + -0x4p-16448, false, false), TEST ("-1.822599765941237301264202966809709908199525407846781671860" "490243514185844316698868404780354312913602598623673673601765" "550983492816311016084986754037794904502741911290588965839284" @@ -8882,35 +8882,35 @@ static const struct test tests[] = { "462208964199390089675673427653193145026697275263799724815197" "42277811246822238899767398834228515626e-4951", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4.000000000004p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x4p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4.000000000004p-16448, false, true, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x4p-16448, false, true), TEST ("9.1129988297061865063210148340485495409976270392339083593024" "512175709292215834943420239017715645680129931183683680088277" "549174640815550804249337701889745225137095564529448291964230" @@ -9104,35 +9104,35 @@ static const struct test tests[] = { "110448209969504483783671382659657251334863763189986240759871" "1389056234111194498836994171142578124e-4952", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x1.fffffffffffcp-16448, false, - 0x2p-16448, false, - 0x1.fffffffffffcp-16448, false, - 0x2p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffcp-16448, false, true, + 0x2p-16448, false, true, + 0x1.fffffffffffcp-16448, false, true, + 0x2p-16448, false, true), TEST ("9.1129988297061865063210148340485495409976270392339083593024" "512175709292215834943420239017715645680129931183683680088277" "549174640815550804249337701889745225137095564529448291964230" @@ -9326,35 +9326,35 @@ static const struct test tests[] = { "110448209969504483783671382659657251334863763189986240759871" "1389056234111194498836994171142578125e-4952", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - true, - 0x2p-16448, false, - 0x2p-16448, false, - 0x2p-16448, false, - 0x2p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x2p-16448, false, false, + 0x2p-16448, false, false, + 0x2p-16448, false, false, + 0x2p-16448, false, false), TEST ("9.1129988297061865063210148340485495409976270392339083593024" "512175709292215834943420239017715645680129931183683680088277" "549174640815550804249337701889745225137095564529448291964230" @@ -9548,35 +9548,35 @@ static const struct test tests[] = { "110448209969504483783671382659657251334863763189986240759871" "1389056234111194498836994171142578126e-4952", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x4p-16448, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x2p-16448, false, - 0x2p-16448, false, - 0x2p-16448, false, - 0x2.000000000004p-16448, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x4p-16448, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x2p-16448, false, true, + 0x2p-16448, false, true, + 0x2p-16448, false, true, + 0x2.000000000004p-16448, false, true), TEST ("-9.112998829706186506321014834048549540997627039233908359302" "451217570929221583494342023901771564568012993118368368008827" "754917464081555080424933770188974522513709556452944829196423" @@ -9770,35 +9770,35 @@ static const struct test tests[] = { "311044820996950448378367138265965725133486376318998624075987" "11389056234111194498836994171142578124e-4952", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2p-16448, false, - -0x2p-16448, false, - -0x1.fffffffffffcp-16448, false, - -0x1.fffffffffffcp-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2p-16448, false, true, + -0x2p-16448, false, true, + -0x1.fffffffffffcp-16448, false, true, + -0x1.fffffffffffcp-16448, false, true), TEST ("-9.112998829706186506321014834048549540997627039233908359302" "451217570929221583494342023901771564568012993118368368008827" "754917464081555080424933770188974522513709556452944829196423" @@ -9992,35 +9992,35 @@ static const struct test tests[] = { "311044820996950448378367138265965725133486376318998624075987" "11389056234111194498836994171142578125e-4952", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - true, - -0x2p-16448, false, - -0x2p-16448, false, - -0x2p-16448, false, - -0x2p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + true, + -0x2p-16448, false, false, + -0x2p-16448, false, false, + -0x2p-16448, false, false, + -0x2p-16448, false, false), TEST ("-9.112998829706186506321014834048549540997627039233908359302" "451217570929221583494342023901771564568012993118368368008827" "754917464081555080424933770188974522513709556452944829196423" @@ -10214,35 +10214,35 @@ static const struct test tests[] = { "311044820996950448378367138265965725133486376318998624075987" "11389056234111194498836994171142578126e-4952", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x2.000000000004p-16448, false, - -0x2p-16448, false, - -0x2p-16448, false, - -0x2p-16448, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x2.000000000004p-16448, false, true, + -0x2p-16448, false, true, + -0x2p-16448, false, true, + -0x2p-16448, false, true), TEST ("3.2375875597190125554622194791138232762497846690173405048449" "421945985197700620596855088357456383249701279390707384240598" "382936099431912710233425550359863089915213963553756674672083" @@ -10437,35 +10437,35 @@ static const struct test tests[] = { "182358152808745703724362178773168996492870519432472065091133" "11767578124e-4966", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16496, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16496, false, true), TEST ("3.2375875597190125554622194791138232762497846690173405048449" "421945985197700620596855088357456383249701279390707384240598" "382936099431912710233425550359863089915213963553756674672083" @@ -10660,35 +10660,35 @@ static const struct test tests[] = { "182358152808745703724362178773168996492870519432472065091133" "11767578125e-4966", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16496, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16496, false, true), TEST ("3.2375875597190125554622194791138232762497846690173405048449" "421945985197700620596855088357456383249701279390707384240598" "382936099431912710233425550359863089915213963553756674672083" @@ -10883,35 +10883,35 @@ static const struct test tests[] = { "182358152808745703724362178773168996492870519432472065091133" "11767578126e-4966", false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-152, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x8p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-16448, false, - false, - 0x0p+0, false, - 0x0p+0, false, - 0x0p+0, false, - 0x4p-1076, false, - false, - 0x0p+0, false, - 0x4p-16496, false, - 0x0p+0, false, - 0x4p-16496, false), + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-16448, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x0p+0, false, true, + 0x4p-16496, false, true, + 0x0p+0, false, true, + 0x4p-16496, false, true), TEST ("-3.237587559719012555462219479113823276249784669017340504844" "942194598519770062059685508835745638324970127939070738424059" "838293609943191271023342555035986308991521396355375667467208" @@ -11106,35 +11106,35 @@ static const struct test tests[] = { "218235815280874570372436217877316899649287051943247206509113" "311767578124e-4966", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST ("-3.237587559719012555462219479113823276249784669017340504844" "942194598519770062059685508835745638324970127939070738424059" "838293609943191271023342555035986308991521396355375667467208" @@ -11329,35 +11329,35 @@ static const struct test tests[] = { "218235815280874570372436217877316899649287051943247206509113" "311767578125e-4966", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST ("-3.237587559719012555462219479113823276249784669017340504844" "942194598519770062059685508835745638324970127939070738424059" "838293609943191271023342555035986308991521396355375667467208" @@ -11552,66 +11552,66 @@ static const struct test tests[] = { "218235815280874570372436217877316899649287051943247206509113" "311767578126e-4966", false, - -0x8p-152, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x8p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16448, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-1076, false, - -0x0p+0, false, - -0x0p+0, false, - -0x0p+0, false, - false, - -0x4p-16496, false, - -0x4p-16496, false, - -0x0p+0, false, - -0x0p+0, false), + -0x8p-152, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x8p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16448, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-1076, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true, + false, + -0x4p-16496, false, true, + -0x4p-16496, false, true, + -0x0p+0, false, true, + -0x0p+0, false, true), TEST ("340282366920938463463374607431768211455", false, - 0xf.fffffp+124, false, - INF, true, - 0xf.fffffp+124, false, - INF, true, - false, - 0xf.ffffffffffff8p+124, false, - 0x1p+128, false, - 0xf.ffffffffffff8p+124, false, - 0x1p+128, false, - false, - 0xf.fffffffffffffffp+124, false, - 0x1p+128, false, - 0xf.fffffffffffffffp+124, false, - 0x1p+128, false, - false, - 0xf.fffffffffffffffp+124, false, - 0x1p+128, false, - 0xf.fffffffffffffffp+124, false, - 0x1p+128, false, - false, - 0xf.fffffffffffffffffffffffffcp+124, false, - 0x1p+128, false, - 0xf.fffffffffffffffffffffffffcp+124, false, - 0x1p+128, false, - false, - 0xf.fffffffffffffffffffffffffff8p+124, false, - 0x1p+128, false, - 0xf.fffffffffffffffffffffffffff8p+124, false, - 0x1p+128, false), + 0xf.fffffp+124, false, false, + INF, true, false, + 0xf.fffffp+124, false, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+124, false, false, + 0x1p+128, false, false, + 0xf.ffffffffffff8p+124, false, false, + 0x1p+128, false, false, + false, + 0xf.fffffffffffffffp+124, false, false, + 0x1p+128, false, false, + 0xf.fffffffffffffffp+124, false, false, + 0x1p+128, false, false, + false, + 0xf.fffffffffffffffp+124, false, false, + 0x1p+128, false, false, + 0xf.fffffffffffffffp+124, false, false, + 0x1p+128, false, false, + false, + 0xf.fffffffffffffffffffffffffcp+124, false, false, + 0x1p+128, false, false, + 0xf.fffffffffffffffffffffffffcp+124, false, false, + 0x1p+128, false, false, + false, + 0xf.fffffffffffffffffffffffffff8p+124, false, false, + 0x1p+128, false, false, + 0xf.fffffffffffffffffffffffffff8p+124, false, false, + 0x1p+128, false, false), TEST ("179769313486231590772930519078902473361797697894230657273430" "081157732675805500963132708477322407536021120113879871393357" "658789768814416622492847430639474124377767893424865485276302" @@ -11619,35 +11619,35 @@ static const struct test tests[] = { "540827237163350510684586298239947245938479716304835356329624" "224137215", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, false, - INF, true, - 0xf.ffffffffffff8p+1020, false, - INF, true, - false, - 0xf.fffffffffffffffp+1020, false, - 0x1p+1024, false, - 0xf.fffffffffffffffp+1020, false, - 0x1p+1024, false, - false, - 0xf.fffffffffffffffp+1020, false, - 0x1p+1024, false, - 0xf.fffffffffffffffp+1020, false, - 0x1p+1024, false, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+1020, false, - 0x1p+1024, false, - 0xf.fffffffffffffffffffffffffff8p+1020, false, - 0x1p+1024, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+1020, false, false, + 0x1p+1024, false, false, + 0xf.fffffffffffffffp+1020, false, false, + 0x1p+1024, false, false, + false, + 0xf.fffffffffffffffp+1020, false, false, + 0x1p+1024, false, false, + 0xf.fffffffffffffffp+1020, false, false, + 0x1p+1024, false, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+1020, false, false, + 0x1p+1024, false, false, + 0xf.fffffffffffffffffffffffffff8p+1020, false, false, + 0x1p+1024, false, false), TEST ("118973149535723176508575932662800713076344468709651023747267" "482123326135818048368690448859547261203991511543748483930925" "889766738130868742627452469834156500608087163436600489752214" @@ -11732,66 +11732,66 @@ static const struct test tests[] = { "047398248889922809181821393428829567971736994315246044702729" "0669964066815", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffp+16380, false, - INF, true, - 0xf.fffffffffffffffp+16380, false, - INF, true, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true, - 0xf.fffffffffffffffffffffffffff8p+16380, false, - INF, true), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffp+16380, false, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffff8p+16380, false, false, + INF, true, false), TEST ("-340282366920938463463374607431768211455", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, false, - -0xf.fffffp+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.ffffffffffff8p+124, false, - -0xf.ffffffffffff8p+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.fffffffffffffffp+124, false, - -0xf.fffffffffffffffp+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.fffffffffffffffp+124, false, - -0xf.fffffffffffffffp+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.fffffffffffffffffffffffffcp+124, false, - -0xf.fffffffffffffffffffffffffcp+124, false, - false, - -0x1p+128, false, - -0x1p+128, false, - -0xf.fffffffffffffffffffffffffff8p+124, false, - -0xf.fffffffffffffffffffffffffff8p+124, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, false, false, + -0xf.fffffp+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.ffffffffffff8p+124, false, false, + -0xf.ffffffffffff8p+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.fffffffffffffffp+124, false, false, + -0xf.fffffffffffffffp+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.fffffffffffffffp+124, false, false, + -0xf.fffffffffffffffp+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.fffffffffffffffffffffffffcp+124, false, false, + -0xf.fffffffffffffffffffffffffcp+124, false, false, + false, + -0x1p+128, false, false, + -0x1p+128, false, false, + -0xf.fffffffffffffffffffffffffff8p+124, false, false, + -0xf.fffffffffffffffffffffffffff8p+124, false, false), TEST ("-17976931348623159077293051907890247336179769789423065727343" "008115773267580550096313270847732240753602112011387987139335" "765878976881441662249284743063947412437776789342486548527630" @@ -11799,35 +11799,35 @@ static const struct test tests[] = { "054082723716335051068458629823994724593847971630483535632962" "4224137215", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, false, - -0xf.ffffffffffff8p+1020, false, - false, - -0x1p+1024, false, - -0x1p+1024, false, - -0xf.fffffffffffffffp+1020, false, - -0xf.fffffffffffffffp+1020, false, - false, - -0x1p+1024, false, - -0x1p+1024, false, - -0xf.fffffffffffffffp+1020, false, - -0xf.fffffffffffffffp+1020, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -0x1p+1024, false, - -0x1p+1024, false, - -0xf.fffffffffffffffffffffffffff8p+1020, false, - -0xf.fffffffffffffffffffffffffff8p+1020, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, false, false, + -0xf.ffffffffffff8p+1020, false, false, + false, + -0x1p+1024, false, false, + -0x1p+1024, false, false, + -0xf.fffffffffffffffp+1020, false, false, + -0xf.fffffffffffffffp+1020, false, false, + false, + -0x1p+1024, false, false, + -0x1p+1024, false, false, + -0xf.fffffffffffffffp+1020, false, false, + -0xf.fffffffffffffffp+1020, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -0x1p+1024, false, false, + -0x1p+1024, false, false, + -0xf.fffffffffffffffffffffffffff8p+1020, false, false, + -0xf.fffffffffffffffffffffffffff8p+1020, false, false), TEST ("-11897314953572317650857593266280071307634446870965102374726" "748212332613581804836869044885954726120399151154374848393092" "588976673813086874262745246983415650060808716343660048975221" @@ -11912,3529 +11912,3901 @@ static const struct test tests[] = { "904739824888992280918182139342882956797173699431524604470272" "90669964066815", false, - -INF, true, - -INF, true, - -0xf.fffffp+124, true, - -0xf.fffffp+124, true, - false, - -INF, true, - -INF, true, - -0xf.ffffffffffff8p+1020, true, - -0xf.ffffffffffff8p+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffp+16380, false, - -0xf.fffffffffffffffp+16380, false, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - -0xf.fffffffffffffffffffffffffcp+1020, true, - false, - -INF, true, - -INF, true, - -0xf.fffffffffffffffffffffffffff8p+16380, false, - -0xf.fffffffffffffffffffffffffff8p+16380, false), + -INF, true, false, + -INF, true, false, + -0xf.fffffp+124, true, false, + -0xf.fffffp+124, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.ffffffffffff8p+1020, true, false, + -0xf.ffffffffffff8p+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffp+16380, false, false, + -0xf.fffffffffffffffp+16380, false, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + -0xf.fffffffffffffffffffffffffcp+1020, true, false, + false, + -INF, true, false, + -INF, true, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false, + -0xf.fffffffffffffffffffffffffff8p+16380, false, false), TEST ("+0x.80000000000000000000000000000001p1025", false, - 0xf.fffffp+124, true, - INF, true, - 0xf.fffffp+124, true, - INF, true, - false, - 0xf.ffffffffffff8p+1020, true, - INF, true, - 0xf.ffffffffffff8p+1020, true, - INF, true, - false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1.0000000000000002p+1024, false, - false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1.0000000000000002p+1024, false, - false, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - 0xf.fffffffffffffffffffffffffcp+1020, true, - INF, true, - false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1p+1024, false, - 0x1.0000000000000000000000000001p+1024, false), + 0xf.fffffp+124, true, false, + INF, true, false, + 0xf.fffffp+124, true, false, + INF, true, false, + false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + 0xf.ffffffffffff8p+1020, true, false, + INF, true, false, + false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1.0000000000000002p+1024, false, false, + false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1.0000000000000002p+1024, false, false, + false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + 0xf.fffffffffffffffffffffffffcp+1020, true, false, + INF, true, false, + false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1p+1024, false, false, + 0x1.0000000000000000000000000001p+1024, false, false), TEST ("1.5", true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - true, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false, - 0x1.8p+0, false), + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + true, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false, + 0x1.8p+0, false, false), TEST ("1.25", true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - true, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false, - 0x1.4p+0, false), + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + true, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false, + 0x1.4p+0, false, false), TEST ("1.125", true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - true, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false, - 0x1.2p+0, false), + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + true, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false, + 0x1.2p+0, false, false), TEST ("1.0625", true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - true, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false, - 0x1.1p+0, false), + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + true, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false, + 0x1.1p+0, false, false), TEST ("1.03125", true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - true, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false, - 0x1.08p+0, false), + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + true, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false, + 0x1.08p+0, false, false), TEST ("1.015625", true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - true, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false, - 0x1.04p+0, false), + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + true, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false, + 0x1.04p+0, false, false), TEST ("1.0078125", true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - true, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false, - 0x1.02p+0, false), + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + true, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false, + 0x1.02p+0, false, false), TEST ("1.00390625", true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - true, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false, - 0x1.01p+0, false), + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + true, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false, + 0x1.01p+0, false, false), TEST ("1.001953125", true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - true, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false, - 0x1.008p+0, false), + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + true, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false, + 0x1.008p+0, false, false), TEST ("1.0009765625", true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - true, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false, - 0x1.004p+0, false), + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + true, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false, + 0x1.004p+0, false, false), TEST ("1.00048828125", true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - true, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false, - 0x1.002p+0, false), + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + true, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false, + 0x1.002p+0, false, false), TEST ("1.000244140625", true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - true, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false, - 0x1.001p+0, false), + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + true, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false, + 0x1.001p+0, false, false), TEST ("1.0001220703125", true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - true, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false, - 0x1.0008p+0, false), + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + true, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false, + 0x1.0008p+0, false, false), TEST ("1.00006103515625", true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - true, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false, - 0x1.0004p+0, false), + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + true, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false, + 0x1.0004p+0, false, false), TEST ("1.000030517578125", true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - true, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false, - 0x1.0002p+0, false), + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + true, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false, + 0x1.0002p+0, false, false), TEST ("1.0000152587890625", true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - true, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false, - 0x1.0001p+0, false), + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + true, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false, + 0x1.0001p+0, false, false), TEST ("1.00000762939453125", true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - true, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false, - 0x1.00008p+0, false), + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + true, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false, + 0x1.00008p+0, false, false), TEST ("1.000003814697265625", true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - true, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false, - 0x1.00004p+0, false), + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + true, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false, + 0x1.00004p+0, false, false), TEST ("1.0000019073486328125", true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - true, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false, - 0x1.00002p+0, false), + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + true, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false, + 0x1.00002p+0, false, false), TEST ("1.00000095367431640625", true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - true, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false, - 0x1.00001p+0, false), + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + true, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false, + 0x1.00001p+0, false, false), TEST ("1.000000476837158203125", true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - true, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false, - 0x1.000008p+0, false), + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + true, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false, + 0x1.000008p+0, false, false), TEST ("1.0000000298023223876953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - true, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false, - 0x1.0000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + true, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false, + 0x1.0000008p+0, false, false), TEST ("1.00000001490116119384765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - true, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false, - 0x1.0000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + true, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false, + 0x1.0000004p+0, false, false), TEST ("1.000000007450580596923828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - true, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false, - 0x1.0000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + true, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false, + 0x1.0000002p+0, false, false), TEST ("1.0000000037252902984619140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - true, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false, - 0x1.0000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + true, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false, + 0x1.0000001p+0, false, false), TEST ("1.00000000186264514923095703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - true, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false, - 0x1.00000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + true, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false, + 0x1.00000008p+0, false, false), TEST ("1.000000000931322574615478515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - true, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false, - 0x1.00000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + true, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false, + 0x1.00000004p+0, false, false), TEST ("1.0000000004656612873077392578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - true, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false, - 0x1.00000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + true, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false, + 0x1.00000002p+0, false, false), TEST ("1.00000000023283064365386962890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - true, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false, - 0x1.00000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + true, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false, + 0x1.00000001p+0, false, false), TEST ("1.000000000116415321826934814453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - true, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false, - 0x1.000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + true, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false, + 0x1.000000008p+0, false, false), TEST ("1.0000000000582076609134674072265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - true, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false, - 0x1.000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + true, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false, + 0x1.000000004p+0, false, false), TEST ("1.00000000002910383045673370361328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - true, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false, - 0x1.000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + true, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false, + 0x1.000000002p+0, false, false), TEST ("1.000000000014551915228366851806640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - true, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false, - 0x1.000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + true, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false, + 0x1.000000001p+0, false, false), TEST ("1.0000000000072759576141834259033203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - true, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false, - 0x1.0000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + true, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false, + 0x1.0000000008p+0, false, false), TEST ("1.00000000000363797880709171295166015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - true, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false, - 0x1.0000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + true, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false, + 0x1.0000000004p+0, false, false), TEST ("1.000000000001818989403545856475830078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - true, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false, - 0x1.0000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + true, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false, + 0x1.0000000002p+0, false, false), TEST ("1.0000000000009094947017729282379150390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - true, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false, - 0x1.0000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + true, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false, + 0x1.0000000001p+0, false, false), TEST ("1.00000000000045474735088646411895751953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - true, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false, - 0x1.00000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + true, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false, + 0x1.00000000008p+0, false, false), TEST ("1.000000000000227373675443232059478759765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - true, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false, - 0x1.00000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + true, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false, + 0x1.00000000004p+0, false, false), TEST ("1.0000000000001136868377216160297393798828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - true, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false, - 0x1.00000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + true, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false, + 0x1.00000000002p+0, false, false), TEST ("1.00000000000005684341886080801486968994140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - true, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false, - 0x1.00000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + true, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false, + 0x1.00000000001p+0, false, false), TEST ("1.000000000000028421709430404007434844970703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - true, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false, - 0x1.000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + true, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false, + 0x1.000000000008p+0, false, false), TEST ("1.0000000000000142108547152020037174224853515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - true, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false, - 0x1.000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + true, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false, + 0x1.000000000004p+0, false, false), TEST ("1.00000000000000710542735760100185871124267578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - true, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false, - 0x1.000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + true, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false, + 0x1.000000000002p+0, false, false), TEST ("1.000000000000003552713678800500929355621337890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - true, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false, - 0x1.000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + true, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false, + 0x1.000000000001p+0, false, false), TEST ("1.0000000000000017763568394002504646778106689453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - true, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false, - 0x1.0000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + true, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false, + 0x1.0000000000008p+0, false, false), TEST ("1.00000000000000088817841970012523233890533447265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - true, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false, - 0x1.0000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + true, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false, + 0x1.0000000000004p+0, false, false), TEST ("1.000000000000000444089209850062616169452667236328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - true, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false, - 0x1.0000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + true, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false, + 0x1.0000000000002p+0, false, false), TEST ("1.0000000000000002220446049250313080847263336181640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false, - 0x1.0000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false, + 0x1.0000000000001p+0, false, false), TEST ("1.00000000000000011102230246251565404236316680908203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - true, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - true, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - true, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false, - 0x1.00000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + true, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + true, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + true, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false, + 0x1.00000000000008p+0, false, false), TEST ("1.000000000000000055511151231257827021181583404541015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - true, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - true, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - true, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false, - 0x1.00000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + true, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + true, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + true, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false, + 0x1.00000000000004p+0, false, false), TEST ("1.0000000000000000277555756156289135105907917022705078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - true, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - true, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - true, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false, - 0x1.00000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + true, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + true, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + true, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false, + 0x1.00000000000002p+0, false, false), TEST ("1.00000000000000001387778780781445675529539585113525390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - true, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - true, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - true, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false, - 0x1.00000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + true, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + true, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + true, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false, + 0x1.00000000000001p+0, false, false), TEST ("1.000000000000000006938893903907228377647697925567626953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - true, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - true, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - true, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false, - 0x1.000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + true, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + true, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + true, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false, + 0x1.000000000000008p+0, false, false), TEST ("1.0000000000000000034694469519536141888238489627838134765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - true, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - true, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - true, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false, - 0x1.000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + true, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + true, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + true, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false, + 0x1.000000000000004p+0, false, false), TEST ("1.0000000000000000017347234759768070944119244813919067382812" "5", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - true, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - true, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - true, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false, - 0x1.000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + true, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + true, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + true, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false, + 0x1.000000000000002p+0, false, false), TEST ("1.0000000000000000008673617379884035472059622406959533691406" "25", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - true, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - true, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - true, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false, - 0x1.000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + true, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + true, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + true, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false, + 0x1.000000000000001p+0, false, false), TEST ("1.0000000000000000004336808689942017736029811203479766845703" "125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - true, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - true, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - true, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false, - 0x1.0000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + true, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + true, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + true, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false, + 0x1.0000000000000008p+0, false, false), TEST ("1.0000000000000000002168404344971008868014905601739883422851" "5625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - true, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - true, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - true, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false, - 0x1.0000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + true, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + true, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + true, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false, + 0x1.0000000000000004p+0, false, false), TEST ("1.0000000000000000001084202172485504434007452800869941711425" "78125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - true, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false, - 0x1.0000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + true, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false, + 0x1.0000000000000002p+0, false, false), TEST ("1.0000000000000000000542101086242752217003726400434970855712" "890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - true, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false, - 0x1.0000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + true, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false, + 0x1.0000000000000001p+0, false, false), TEST ("1.0000000000000000000271050543121376108501863200217485427856" "4453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - true, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false, - 0x1.00000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + true, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false, + 0x1.00000000000000008p+0, false, false), TEST ("1.0000000000000000000135525271560688054250931600108742713928" "22265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - true, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false, - 0x1.00000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + true, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false, + 0x1.00000000000000004p+0, false, false), TEST ("1.0000000000000000000067762635780344027125465800054371356964" "111328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - true, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false, - 0x1.00000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + true, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false, + 0x1.00000000000000002p+0, false, false), TEST ("1.0000000000000000000033881317890172013562732900027185678482" "0556640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - true, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false, - 0x1.00000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + true, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false, + 0x1.00000000000000001p+0, false, false), TEST ("1.0000000000000000000016940658945086006781366450013592839241" "02783203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - true, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false, - 0x1.000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + true, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false, + 0x1.000000000000000008p+0, false, false), TEST ("1.0000000000000000000008470329472543003390683225006796419620" "513916015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - true, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false, - 0x1.000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + true, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false, + 0x1.000000000000000004p+0, false, false), TEST ("1.0000000000000000000004235164736271501695341612503398209810" "2569580078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - true, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false, - 0x1.000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + true, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false, + 0x1.000000000000000002p+0, false, false), TEST ("1.0000000000000000000002117582368135750847670806251699104905" "12847900390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - true, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false, - 0x1.000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + true, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false, + 0x1.000000000000000001p+0, false, false), TEST ("1.0000000000000000000001058791184067875423835403125849552452" "564239501953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - true, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false, - 0x1.0000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + true, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false, + 0x1.0000000000000000008p+0, false, false), TEST ("1.0000000000000000000000529395592033937711917701562924776226" "2821197509765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - true, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false, - 0x1.0000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + true, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false, + 0x1.0000000000000000004p+0, false, false), TEST ("1.0000000000000000000000264697796016968855958850781462388113" "14105987548828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - true, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false, - 0x1.0000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + true, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false, + 0x1.0000000000000000002p+0, false, false), TEST ("1.0000000000000000000000132348898008484427979425390731194056" "570529937744140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - true, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false, - 0x1.0000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + true, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false, + 0x1.0000000000000000001p+0, false, false), TEST ("1.0000000000000000000000066174449004242213989712695365597028" "2852649688720703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - true, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false, - 0x1.00000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + true, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false, + 0x1.00000000000000000008p+0, false, false), TEST ("1.0000000000000000000000033087224502121106994856347682798514" "14263248443603515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - true, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false, - 0x1.00000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + true, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false, + 0x1.00000000000000000004p+0, false, false), TEST ("1.0000000000000000000000016543612251060553497428173841399257" "071316242218017578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - true, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false, - 0x1.00000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + true, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false, + 0x1.00000000000000000002p+0, false, false), TEST ("1.0000000000000000000000008271806125530276748714086920699628" "5356581211090087890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - true, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false, - 0x1.00000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + true, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false, + 0x1.00000000000000000001p+0, false, false), TEST ("1.0000000000000000000000004135903062765138374357043460349814" "26782906055450439453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - true, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false, - 0x1.000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false, + 0x1.000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000002067951531382569187178521730174907" "133914530277252197265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - true, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false, - 0x1.000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + true, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false, + 0x1.000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000001033975765691284593589260865087453" "5669572651386260986328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - true, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false, - 0x1.000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + true, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false, + 0x1.000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000516987882845642296794630432543726" "78347863256931304931640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - true, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false, - 0x1.000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + true, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false, + 0x1.000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000258493941422821148397315216271863" "391739316284656524658203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - true, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false, - 0x1.0000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false, + 0x1.0000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000129246970711410574198657608135931" "6958696581423282623291015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - true, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false, - 0x1.0000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + true, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false, + 0x1.0000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000064623485355705287099328804067965" "84793482907116413116455078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - true, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false, - 0x1.0000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + true, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false, + 0x1.0000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000032311742677852643549664402033982" "923967414535582065582275390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - true, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false, - 0x1.0000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + true, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false, + 0x1.0000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000016155871338926321774832201016991" "4619837072677910327911376953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - true, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false, - 0x1.00000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + true, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false, + 0x1.00000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000008077935669463160887416100508495" "73099185363389551639556884765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - true, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false, - 0x1.00000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + true, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false, + 0x1.00000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000004038967834731580443708050254247" "865495926816947758197784423828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - true, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false, - 0x1.00000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + true, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false, + 0x1.00000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000002019483917365790221854025127123" "9327479634084738790988922119140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - true, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false, - 0x1.00000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + true, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false, + 0x1.00000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000001009741958682895110927012563561" "96637398170423693954944610595703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false, - 0x1.000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false, + 0x1.000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000504870979341447555463506281780" "983186990852118469774723052978515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - true, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false, - 0x1.000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + true, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false, + 0x1.000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000252435489670723777731753140890" "4915934954260592348873615264892578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - true, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false, - 0x1.000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + true, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false, + 0x1.000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000126217744835361888865876570445" "24579674771302961744368076324462890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - true, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false, - 0x1.000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + true, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false, + 0x1.000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000063108872417680944432938285222" "622898373856514808721840381622314453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false, - 0x1.0000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000031554436208840472216469142611" "3114491869282574043609201908111572265625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - true, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false, - 0x1.0000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + true, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000015777218104420236108234571305" "65572459346412870218046009540557861328125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - true, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false, - 0x1.0000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000007888609052210118054117285652" "827862296732064351090230047702789306640625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - true, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false, - 0x1.0000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + true, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000003944304526105059027058642826" "4139311483660321755451150238513946533203125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - true, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false, - 0x1.00000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + true, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false, + 0x1.00000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000001972152263052529513529321413" "20696557418301608777255751192569732666015625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - true, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false, - 0x1.00000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + true, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false, + 0x1.00000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000000986076131526264756764660706" "603482787091508043886278755962848663330078125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - true, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false, - 0x1.00000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false, + 0x1.00000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000000493038065763132378382330353" "3017413935457540219431393779814243316650390625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - true, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false, - 0x1.00000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + true, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false, + 0x1.00000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000000246519032881566189191165176" "65087069677287701097156968899071216583251953125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - true, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false, - 0x1.000000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + true, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000000123259516440783094595582588" "325435348386438505485784844495356082916259765625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000000004p+0, false, - 0x1.000000000000000000000000004p+0, false, - 0x1.000000000000000000000000004p+0, false, - 0x1.000000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000000004p+0, false, false, + 0x1.000000000000000000000000004p+0, false, false, + 0x1.000000000000000000000000004p+0, false, false, + 0x1.000000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000000061629758220391547297791294" "1627176741932192527428924222476780414581298828125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000000002p+0, false, - 0x1.000000000000000000000000002p+0, false, - 0x1.000000000000000000000000002p+0, false, - 0x1.000000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000000002p+0, false, false, + 0x1.000000000000000000000000002p+0, false, false, + 0x1.000000000000000000000000002p+0, false, false, + 0x1.000000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000000030814879110195773648895647" "08135883709660962637144621112383902072906494140625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.000000000000000000000000001p+0, false, - 0x1.000000000000000000000000001p+0, false, - 0x1.000000000000000000000000001p+0, false, - 0x1.000000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.000000000000000000000000001p+0, false, false, + 0x1.000000000000000000000000001p+0, false, false, + 0x1.000000000000000000000000001p+0, false, false, + 0x1.000000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000000015407439555097886824447823" "540679418548304813185723105561919510364532470703125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000000008p+0, false, - 0x1.0000000000000000000000000008p+0, false, - 0x1.0000000000000000000000000008p+0, false, - 0x1.0000000000000000000000000008p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000000008p+0, false, false, + 0x1.0000000000000000000000000008p+0, false, false), TEST ("1.0000000000000000000000000000000007703719777548943412223911" "7703397092741524065928615527809597551822662353515625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000000004p+0, false, - 0x1.0000000000000000000000000004p+0, false, - 0x1.0000000000000000000000000004p+0, false, - 0x1.0000000000000000000000000004p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000000004p+0, false, false, + 0x1.0000000000000000000000000004p+0, false, false), TEST ("1.0000000000000000000000000000000003851859888774471706111955" "88516985463707620329643077639047987759113311767578125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000000002p+0, false, - 0x1.0000000000000000000000000002p+0, false, - 0x1.0000000000000000000000000002p+0, false, - 0x1.0000000000000000000000000002p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000000002p+0, false, false, + 0x1.0000000000000000000000000002p+0, false, false), TEST ("1.0000000000000000000000000000000001925929944387235853055977" "942584927318538101648215388195239938795566558837890625", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - true, - 0x1.0000000000000000000000000001p+0, false, - 0x1.0000000000000000000000000001p+0, false, - 0x1.0000000000000000000000000001p+0, false, - 0x1.0000000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + true, + 0x1.0000000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000000001p+0, false, false, + 0x1.0000000000000000000000000001p+0, false, false), TEST ("1.0000000000000000000000000000000000962964972193617926527988" "9712924636592690508241076940976199693977832794189453125", false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000001p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000002p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.000000000000000000000000008p+0, false, - false, - 0x1p+0, false, - 0x1p+0, false, - 0x1p+0, false, - 0x1.0000000000000000000000000001p+0, false), + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000001p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000002p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.000000000000000000000000008p+0, false, false, + false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1p+0, false, false, + 0x1.0000000000000000000000000001p+0, false, false), + TEST ("0x30000002222225p-1077", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x1.800000111111p-1024, false, true, + 0x1.8000001111114p-1024, false, true, + 0x1.800000111111p-1024, false, true, + 0x1.8000001111114p-1024, false, true, + true, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + true, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + false, + 0x1.800000111111p-1024, false, true, + 0x1.8000001111114p-1024, false, true, + 0x1.800000111111p-1024, false, true, + 0x1.8000001111114p-1024, false, true, + true, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false, + 0x1.80000011111128p-1024, false, false), + TEST ("0x0.7fffffffffffeap-1022", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + true, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + true, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + false, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + true, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false, + 0x1.ffffffffffffa8p-1024, false, false), + TEST ("0x0.7fffffffffffe9p-1022", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + true, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + true, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + false, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + 0x1.ffffffffffff8p-1024, false, true, + 0x1.ffffffffffffcp-1024, false, true, + true, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false, + 0x1.ffffffffffffa4p-1024, false, false), + TEST ("0x0.7ffffd4p-126", + false, + 0x1.fffffp-128, false, true, + 0x1.fffff8p-128, false, true, + 0x1.fffffp-128, false, true, + 0x1.fffff8p-128, false, true, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + true, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false, + 0x1.fffff5p-128, false, false), + TEST ("0x0.7ffffffffffffffd4p-16382", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffffffp-16384, false, true, + 0x1.fffffffffffffff8p-16384, false, true, + 0x1.fffffffffffffffp-16384, false, true, + 0x1.fffffffffffffff8p-16384, false, true, + false, + 0x1.fffffffffffffff4p-16384, false, true, + 0x1.fffffffffffffff4p-16384, false, true, + 0x1.fffffffffffffff4p-16384, false, true, + 0x1.fffffffffffffff8p-16384, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x1.fffffffffffffff5p-16384, false, false, + 0x1.fffffffffffffff5p-16384, false, false, + 0x1.fffffffffffffff5p-16384, false, false, + 0x1.fffffffffffffff5p-16384, false, false), + TEST ("0x0.7ffffffffffffffd4p-16383", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xf.ffffffffffffff8p-16388, false, true, + 0xf.ffffffffffffff8p-16388, false, true, + 0xf.ffffffffffffff8p-16388, false, true, + 0x1p-16384, false, true, + false, + 0xf.ffffffffffffff8p-16388, false, true, + 0xf.ffffffffffffffcp-16388, false, true, + 0xf.ffffffffffffff8p-16388, false, true, + 0xf.ffffffffffffffcp-16388, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0xf.ffffffffffffffa8p-16388, false, false, + 0xf.ffffffffffffffa8p-16388, false, false, + 0xf.ffffffffffffffa8p-16388, false, false, + 0xf.ffffffffffffffa8p-16388, false, false), + TEST ("0x0.7ffffffffffffffffffffffffffeap-16382", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffffff8p-16384, false, true, + 0x2p-16384, false, true, + 0x1.fffffffffffffff8p-16384, false, true, + 0x2p-16384, false, true, + false, + 0x1.fffffffffffffffcp-16384, false, true, + 0x2p-16384, false, true, + 0x1.fffffffffffffffcp-16384, false, true, + 0x2p-16384, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.fffffffffffffffffffffffffff8p-16384, false, true, + 0x1.fffffffffffffffffffffffffffcp-16384, false, true, + 0x1.fffffffffffffffffffffffffff8p-16384, false, true, + 0x1.fffffffffffffffffffffffffffcp-16384, false, true), + TEST ("0x0.7000004p-126", + false, + 0x1.cp-128, false, true, + 0x1.cp-128, false, true, + 0x1.cp-128, false, true, + 0x1.c00008p-128, false, true, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + true, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false, + 0x1.c00001p-128, false, false), + TEST ("0x0.70000000000002p-1022", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x1.cp-1024, false, true, + 0x1.cp-1024, false, true, + 0x1.cp-1024, false, true, + 0x1.c000000000004p-1024, false, true, + true, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + true, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + false, + 0x1.cp-1024, false, true, + 0x1.cp-1024, false, true, + 0x1.cp-1024, false, true, + 0x1.c000000000004p-1024, false, true, + true, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false, + 0x1.c0000000000008p-1024, false, false), + TEST ("0x0.70000000000000004p-16382", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000008p-16384, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000004p-16384, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0x1.c000000000000001p-16384, false, false, + 0x1.c000000000000001p-16384, false, false, + 0x1.c000000000000001p-16384, false, false, + 0x1.c000000000000001p-16384, false, false), + TEST ("0x0.70000000000000004p-16383", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0xep-16388, false, true, + 0xep-16388, false, true, + 0xep-16388, false, true, + 0xe.000000000000008p-16388, false, true, + false, + 0xep-16388, false, true, + 0xep-16388, false, true, + 0xep-16388, false, true, + 0xe.000000000000004p-16388, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + true, + 0xe.0000000000000008p-16388, false, false, + 0xe.0000000000000008p-16388, false, false, + 0xe.0000000000000008p-16388, false, false, + 0xe.0000000000000008p-16388, false, false), + TEST ("0x0.70000000000000000000000000002p-16382", + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x8p-152, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000008p-16384, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000004p-16384, false, true, + false, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x0p+0, false, true, + 0x4p-1076, false, true, + false, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.cp-16384, false, true, + 0x1.c000000000000000000000000004p-16384, false, true), }; diff --git a/stdlib/tst-strtod-round-skeleton.c b/stdlib/tst-strtod-round-skeleton.c index 6fba4b5228..be081ba416 100644 --- a/stdlib/tst-strtod-round-skeleton.c +++ b/stdlib/tst-strtod-round-skeleton.c @@ -21,6 +21,7 @@ declared in the headers. */ #define _LIBC_TEST 1 #define __STDC_WANT_IEC_60559_TYPES_EXT__ +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include "tst-strtod.h" @@ -138,16 +140,26 @@ gen-tst-strtod-round utility to select the appropriately rounded long double value for a given format. */ #define TEST(s, \ - fx, fd, fdo, fn, fno, fz, fzo, fu, fuo, \ - dx, dd, ddo, dn, dno, dz, dzo, du, duo, \ - ld64ix, ld64id, ld64ido, ld64in, ld64ino, \ - ld64iz, ld64izo, ld64iu, ld64iuo, \ - ld64mx, ld64md, ld64mdo, ld64mn, ld64mno, \ - ld64mz, ld64mzo, ld64mu, ld64muo, \ - ld106x, ld106d, ld106do, ld106n, ld106no, \ - ld106z, ld106zo, ld106u, ld106uo, \ - ld113x, ld113d, ld113do, ld113n, ld113no, \ - ld113z, ld113zo, ld113u, ld113uo) \ + fx, fd, fdo, fdu, fn, fno, fnu, \ + fz, fzo, fzu, fu, fuo, fuu, \ + dx, dd, ddo, ddu, dn, dno, dnu, \ + dz, dzo, dzu, du, duo, duu, \ + ld64ix, ld64id, ld64ido, ld64idu, \ + ld64in, ld64ino, ld64inu, \ + ld64iz, ld64izo, ld64izu, \ + ld64iu, ld64iuo, ld64iuu, \ + ld64mx, ld64md, ld64mdo, ld64mdu, \ + ld64mn, ld64mno, ld64mnu, \ + ld64mz, ld64mzo, ld64mzu, \ + ld64mu, ld64muo, ld64muu, \ + ld106x, ld106d, ld106do, ld106du, \ + ld106n, ld106no, ld106nu, \ + ld106z, ld106zo, ld106zu, \ + ld106u, ld106uo, ld106uu, \ + ld113x, ld113d, ld113do, ld113du, \ + ld113n, ld113no, ld113nu, \ + ld113z, ld113zo, ld113zu, \ + ld113u, ld113uo, ld113uu) \ { \ L_ (s), \ { XNTRY (fx, dx, ld64ix, ld64mx, ld106x, ld113x) }, \ @@ -162,6 +174,12 @@ { XNTRY (fdo, ddo, ld64ido, ld64mdo, ld106do, ld113do) }, \ { XNTRY (fzo, dzo, ld64izo, ld64mzo, ld106zo, ld113zo) }, \ { XNTRY (fuo, duo, ld64iuo, ld64muo, ld106uo, ld113uo) } \ + }, \ + { \ + { XNTRY (fnu, dnu, ld64inu, ld64mnu, ld106nu, ld113nu) }, \ + { XNTRY (fdu, ddu, ld64idu, ld64mdu, ld106du, ld113du) }, \ + { XNTRY (fzu, dzu, ld64izu, ld64mzu, ld106zu, ld113zu) }, \ + { XNTRY (fuu, duu, ld64iuu, ld64muu, ld106uu, ld113uu) } \ } \ } @@ -180,11 +198,17 @@ struct test_overflow STRUCT_FOREACH_FLOAT_BOOL }; +struct test_underflow + { + STRUCT_FOREACH_FLOAT_BOOL + }; + struct test { const CHAR *s; struct test_exactness exact; struct test_results r[4]; struct test_overflow o[4]; + struct test_underflow u[4]; }; /* Include the generated test data. */ @@ -202,10 +226,16 @@ struct test { # define FE_OVERFLOW 0 #endif +#ifndef FE_UNDERFLOW +# define FE_UNDERFLOW 0 +#endif + #define GEN_ONE_TEST(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ { \ feclearexcept (FE_ALL_EXCEPT); \ + errno = 12345; \ FTYPE f = STRTO (FSUF) (s, NULL); \ + int new_errno = errno; \ if (f != expected->FSUF \ || (copysign ## CSUF) (1.0 ## LSUF, f) \ != (copysign ## CSUF) (1.0 ## LSUF, expected->FSUF)) \ @@ -254,6 +284,48 @@ struct test { printf ("ignoring this exception error\n"); \ } \ } \ + if (overflow->FSUF && new_errno != ERANGE) \ + { \ + printf (FNPFXS "to" #FSUF \ + " (" STRM ") left errno == %d," \ + " not %d (ERANGE)\n", \ + s, new_errno, ERANGE); \ + result = 1; \ + } \ + if (FE_UNDERFLOW != 0) \ + { \ + bool underflow_raised \ + = fetestexcept (FE_UNDERFLOW) != 0; \ + if (underflow_raised != underflow->FSUF) \ + { \ + printf (FNPFXS "to" #FSUF \ + " (" STRM ") underflow %d " \ + "not %d\n", s, underflow_raised, \ + underflow->FSUF); \ + if (EXCEPTION_TESTS (FTYPE)) \ + result = 1; \ + else \ + printf ("ignoring this exception error\n"); \ + } \ + } \ + if (underflow->FSUF && new_errno != ERANGE) \ + { \ + printf (FNPFXS "to" #FSUF \ + " (" STRM ") left errno == %d," \ + " not %d (ERANGE)\n", \ + s, new_errno, ERANGE); \ + result = 1; \ + } \ + if (!overflow->FSUF \ + && !underflow->FSUF \ + && new_errno != 12345) \ + { \ + printf (FNPFXS "to" #FSUF \ + " (" STRM ") set errno == %d," \ + " should be unchanged\n", \ + s, new_errno); \ + result = 1; \ + } \ } \ } @@ -261,6 +333,7 @@ static int test_in_one_mode (const CHAR *s, const struct test_results *expected, const struct test_exactness *exact, const struct test_overflow *overflow, + const struct test_underflow *underflow, const char *mode_name, int rnd_mode) { int result = 0; @@ -296,6 +369,7 @@ do_test (void) { result |= test_in_one_mode (tests[i].s, &tests[i].r[modes[0].rnd_i], &tests[i].exact, &tests[i].o[modes[0].rnd_i], + &tests[i].u[modes[0].rnd_i], modes[0].mode_name, modes[0].rnd_mode); for (const struct fetestmodes *m = &modes[1]; m->mode_name != NULL; m++) { @@ -303,7 +377,9 @@ do_test (void) { result |= test_in_one_mode (tests[i].s, &tests[i].r[m->rnd_i], &tests[i].exact, - &tests[i].o[m->rnd_i], m->mode_name, + &tests[i].o[m->rnd_i], + &tests[i].u[m->rnd_i], + m->mode_name, m->rnd_mode); fesetround (save_round_mode); } diff --git a/stdlib/tst-strtod-underflow.c b/stdlib/tst-strtod-underflow.c index a5ced18599..8598b95b6d 100644 --- a/stdlib/tst-strtod-underflow.c +++ b/stdlib/tst-strtod-underflow.c @@ -17,6 +17,10 @@ License along with the GNU C Library; if not, see . */ +/* Defining _LIBC_TEST ensures long double math functions are + declared in the headers. */ +#define _LIBC_TEST 1 +#define __STDC_WANT_IEC_60559_TYPES_EXT__ #include #include #include @@ -25,6 +29,60 @@ #include #include +#include "tst-strtod.h" + +/* Logic for selecting between tests for different formats is as in + tst-strtod-skeleton.c, but here it is selecting string inputs with + different underflow properties, rather than generated test + data. */ + +#define _CONCAT(a, b) a ## b +#define CONCAT(a, b) _CONCAT (a, b) + +#define MEMBER(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ + const char *s_ ## FSUF; + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +# define CHOOSE_ld(f,d,...) d +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && LDBL_MIN_EXP == -16381 +# define CHOOSE_ld(f,d,ld64i,...) ld64i +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && LDBL_MIN_EXP == -16382 +# define CHOOSE_ld(f,d,ld64i,ld64m,...) ld64m +#elif LDBL_MANT_DIG == 106 && LDBL_MAX_EXP == 1024 +# define CHOOSE_ld(f,d,ld64i,ld64m,ld106,...) ld106 +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 +# define CHOOSE_ld(f,d,ld64i,ld64m,ld106,ld113,...) ld113 +#else +# error "unknown long double format" +#endif + +#define CHOOSE_f(f,...) f +#define CHOOSE_f32(f,...) f +#define CHOOSE_d(f,d,...) d +#define CHOOSE_f64(f,d,...) d +#define CHOOSE_f32x(f,d,...) d +#define CHOOSE_f128(f,d,ld64i,ld64m,ld106,ld113,...) ld113 + +#if __HAVE_FLOAT64X +# if FLT64X_MANT_DIG == 113 && FLT64X_MAX_EXP == 16384 +# define CHOOSE_f64x(f,d,ld64i,ld64m,ld106,ld113,...) ld113 +# elif (FLT64X_MANT_DIG == 64 \ + && FLT64X_MAX_EXP == 16384 \ + && FLT64X_MIN_EXP == -16381) +# define CHOOSE_f64x(f,d,ld64i,...) ld64i +# else +# error "unknown _Float64x format" +# endif +#endif + +#define _XNTRY(FSUF, FTYPE, FTOSTR, LSUF, CSUF, ...) \ + CHOOSE_ ## FSUF (__VA_ARGS__), +#define XNTRY(...) \ + GEN_TEST_STRTOD_FOREACH (_XNTRY, __VA_ARGS__) + +#define TEST(f, d, ld64i, ld64m, ld106, ld113, u) \ + { XNTRY(f, d, ld64i, ld64m, ld106, ld113) u } + enum underflow_case { /* Result is exact or outside the subnormal range. */ @@ -55,38 +113,194 @@ enum underflow_case struct test { - const char *s; + GEN_TEST_STRTOD_FOREACH (MEMBER) enum underflow_case c; }; static const struct test tests[] = { - { "0x1p-1022", UNDERFLOW_NONE }, - { "-0x1p-1022", UNDERFLOW_NONE }, - { "0x0p-10000000000000000000000000", UNDERFLOW_NONE }, - { "-0x0p-10000000000000000000000000", UNDERFLOW_NONE }, - { "0x1p-10000000000000000000000000", UNDERFLOW_ALWAYS }, - { "-0x1p-10000000000000000000000000", UNDERFLOW_ALWAYS }, - { "0x1.000000000000000000001p-1022", UNDERFLOW_NONE }, - { "-0x1.000000000000000000001p-1022", UNDERFLOW_NONE }, - { "0x1p-1075", UNDERFLOW_ALWAYS }, - { "-0x1p-1075", UNDERFLOW_ALWAYS }, - { "0x1p-1023", UNDERFLOW_NONE }, - { "-0x1p-1023", UNDERFLOW_NONE }, - { "0x1p-1074", UNDERFLOW_NONE }, - { "-0x1p-1074", UNDERFLOW_NONE }, - { "0x1.ffffffffffffep-1023", UNDERFLOW_NONE }, - { "-0x1.ffffffffffffep-1023", UNDERFLOW_NONE }, - { "0x1.fffffffffffffp-1023", UNDERFLOW_ALWAYS }, - { "-0x1.fffffffffffffp-1023", UNDERFLOW_ALWAYS }, - { "0x1.fffffffffffff0001p-1023", UNDERFLOW_EXCEPT_UPWARD }, - { "-0x1.fffffffffffff0001p-1023", UNDERFLOW_EXCEPT_DOWNWARD }, - { "0x1.fffffffffffff7fffp-1023", UNDERFLOW_EXCEPT_UPWARD }, - { "-0x1.fffffffffffff7fffp-1023", UNDERFLOW_EXCEPT_DOWNWARD }, - { "0x1.fffffffffffff8p-1023", UNDERFLOW_ONLY_DOWNWARD_ZERO }, - { "-0x1.fffffffffffff8p-1023", UNDERFLOW_ONLY_UPWARD_ZERO }, - { "0x1.fffffffffffffffffp-1023", UNDERFLOW_ONLY_DOWNWARD_ZERO }, - { "-0x1.fffffffffffffffffp-1023", UNDERFLOW_ONLY_UPWARD_ZERO }, + TEST ("0x1p-126", + "0x1p-1022", + "0x1p-16382", + "0x1p-16383", + "0x1p-969", + "0x1p-16382", + UNDERFLOW_NONE), + TEST ("-0x1p-126", + "-0x1p-1022", + "-0x1p-16382", + "-0x1p-16383", + "-0x1p-969", + "-0x1p-16382", + UNDERFLOW_NONE), + TEST ("0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + "0x0p-10000000000000000000000000", + UNDERFLOW_NONE), + TEST ("-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + "-0x0p-10000000000000000000000000", + UNDERFLOW_NONE), + TEST ("0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + "0x1p-10000000000000000000000000", + UNDERFLOW_ALWAYS), + TEST ("-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + "-0x1p-10000000000000000000000000", + UNDERFLOW_ALWAYS), + TEST ("0x1.000000000000000000001p-126", + "0x1.000000000000000000001p-1022", + "0x1.000000000000000000001p-16382", + "0x1.000000000000000000001p-16383", + "0x1.000000000000000000001p-969", + "0x1.00000000000000000000000000000000000000001p-16382", + UNDERFLOW_NONE), + TEST ("-0x1.000000000000000000001p-126", + "-0x1.000000000000000000001p-1022", + "-0x1.000000000000000000001p-16382", + "-0x1.000000000000000000001p-16383", + "-0x1.000000000000000000001p-969", + "-0x1.00000000000000000000000000000000000000001p-16382", + UNDERFLOW_NONE), + TEST ("0x1p-150", + "0x1p-1075", + "0x1p-16446", + "0x1p-16447", + "0x1p-1075", + "0x1p-16495", + UNDERFLOW_ALWAYS), + TEST ("-0x1p-150", + "-0x1p-1075", + "-0x1p-16446", + "-0x1p-16447", + "-0x1p-1075", + "-0x1p-16495", + UNDERFLOW_ALWAYS), + TEST ("0x1p-127", + "0x1p-1023", + "0x1p-16383", + "0x1p-16384", + "0x1p-970", + "0x1p-16383", + UNDERFLOW_NONE), + TEST ("-0x1p-127", + "-0x1p-1023", + "-0x1p-16383", + "-0x1p-16384", + "-0x1p-970", + "-0x1p-16383", + UNDERFLOW_NONE), + TEST ("0x1p-149", + "0x1p-1074", + "0x1p-16445", + "0x1p-16446", + "0x1p-1074", + "0x1p-16494", + UNDERFLOW_NONE), + TEST ("-0x1p-149", + "-0x1p-1074", + "-0x1p-16445", + "-0x1p-16446", + "-0x1p-1074", + "-0x1p-16494", + UNDERFLOW_NONE), + TEST ("0x1.fffffcp-127", + "0x1.ffffffffffffep-1023", + "0x1.fffffffffffffffcp-16383", + "0x1.fffffffffffffffcp-16384", + "0x1.ffffffffffffffffffffffffffp-970", + "0x1.fffffffffffffffffffffffffffep-16383", + UNDERFLOW_NONE), + TEST ("-0x1.fffffcp-127", + "-0x1.ffffffffffffep-1023", + "-0x1.fffffffffffffffcp-16383", + "-0x1.fffffffffffffffcp-16384", + "-0x1.ffffffffffffffffffffffffffp-970", + "-0x1.fffffffffffffffffffffffffffep-16383", + UNDERFLOW_NONE), + TEST ("0x1.fffffep-127", + "0x1.fffffffffffffp-1023", + "0x1.fffffffffffffffep-16383", + "0x1.fffffffffffffffep-16384", + "0x1.ffffffffffffffffffffffffff8p-970", + "0x1.ffffffffffffffffffffffffffffp-16383", + UNDERFLOW_ALWAYS), + TEST ("-0x1.fffffep-127", + "-0x1.fffffffffffffp-1023", + "-0x1.fffffffffffffffep-16383", + "-0x1.fffffffffffffffep-16384", + "-0x1.ffffffffffffffffffffffffff8p-970", + "-0x1.ffffffffffffffffffffffffffffp-16383", + UNDERFLOW_ALWAYS), + TEST ("0x1.fffffe0001p-127", + "0x1.fffffffffffff0001p-1023", + "0x1.fffffffffffffffe0001p-16383", + "0x1.fffffffffffffffe0001p-16384", + "0x1.ffffffffffffffffffffffffff80001p-970", + "0x1.ffffffffffffffffffffffffffff0001p-16383", + UNDERFLOW_EXCEPT_UPWARD), + TEST ("-0x1.fffffe0001p-127", + "-0x1.fffffffffffff0001p-1023", + "-0x1.fffffffffffffffe0001p-16383", + "-0x1.fffffffffffffffe0001p-16384", + "-0x1.ffffffffffffffffffffffffff80001p-970", + "-0x1.ffffffffffffffffffffffffffff0001p-16383", + UNDERFLOW_EXCEPT_DOWNWARD), + TEST ("0x1.fffffeffffp-127", + "0x1.fffffffffffff7fffp-1023", + "0x1.fffffffffffffffeffffp-16383", + "0x1.fffffffffffffffeffffp-16384", + "0x1.ffffffffffffffffffffffffffbffffp-970", + "0x1.ffffffffffffffffffffffffffff7fffp-16383", + UNDERFLOW_EXCEPT_UPWARD), + TEST ("-0x1.fffffeffffp-127", + "-0x1.fffffffffffff7fffp-1023", + "-0x1.fffffffffffffffeffffp-16383", + "-0x1.fffffffffffffffeffffp-16384", + "-0x1.ffffffffffffffffffffffffffbffffp-970", + "-0x1.ffffffffffffffffffffffffffff7fffp-16383", + UNDERFLOW_EXCEPT_DOWNWARD), + TEST ("0x1.ffffffp-127", + "0x1.fffffffffffff8p-1023", + "0x1.ffffffffffffffffp-16383", + "0x1.ffffffffffffffffp-16384", + "0x1.ffffffffffffffffffffffffffcp-970", + "0x1.ffffffffffffffffffffffffffff8p-16383", + UNDERFLOW_ONLY_DOWNWARD_ZERO), + TEST ("-0x1.ffffffp-127", + "-0x1.fffffffffffff8p-1023", + "-0x1.ffffffffffffffffp-16383", + "-0x1.ffffffffffffffffp-16384", + "-0x1.ffffffffffffffffffffffffffcp-970", + "-0x1.ffffffffffffffffffffffffffff8p-16383", + UNDERFLOW_ONLY_UPWARD_ZERO), + TEST ("0x1.ffffffffffp-127", + "0x1.fffffffffffffffffp-1023", + "0x1.ffffffffffffffffffffp-16383", + "0x1.ffffffffffffffffffffp-16384", + "0x1.ffffffffffffffffffffffffffffffp-970", + "0x1.ffffffffffffffffffffffffffffffffp-16383", + UNDERFLOW_ONLY_DOWNWARD_ZERO), + TEST ("-0x1.ffffffffffp-127", + "-0x1.fffffffffffffffffp-1023", + "-0x1.ffffffffffffffffffffp-16383", + "-0x1.ffffffffffffffffffffp-16384", + "-0x1.ffffffffffffffffffffffffffffffp-970", + "-0x1.ffffffffffffffffffffffffffffffffp-16383", + UNDERFLOW_ONLY_UPWARD_ZERO), }; /* Return whether to expect underflow from a particular testcase, in a @@ -133,39 +347,62 @@ static bool support_underflow_exception = false; volatile double d = DBL_MIN; volatile double dd; -static int -test_in_one_mode (const char *s, enum underflow_case c, int rm, - const char *mode_name) +static bool +test_got_fe_underflow (void) { - int result = 0; - feclearexcept (FE_ALL_EXCEPT); - errno = 0; - double d = strtod (s, NULL); - int got_errno = errno; #ifdef FE_UNDERFLOW - bool got_fe_underflow = fetestexcept (FE_UNDERFLOW) != 0; + return fetestexcept (FE_UNDERFLOW) != 0; #else - bool got_fe_underflow = false; + return false; #endif - printf ("strtod (%s) (%s) returned %a, errno = %d, %sunderflow exception\n", - s, mode_name, d, got_errno, got_fe_underflow ? "" : "no "); - bool this_expect_underflow = expect_underflow (c, rm); - if (got_errno != 0 && got_errno != ERANGE) - { - puts ("FAIL: errno neither 0 nor ERANGE"); - result = 1; - } - else if (this_expect_underflow != (errno == ERANGE)) - { - puts ("FAIL: underflow from errno differs from expectations"); - result = 1; - } - if (support_underflow_exception && got_fe_underflow != this_expect_underflow) - { - puts ("FAIL: underflow from exceptions differs from expectations"); - result = 1; - } - return result; +} + +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static int \ +test_strto ## FSUF (int i, int rm, const char *mode_name) \ +{ \ + const char *s = tests[i].s_ ## FSUF; \ + enum underflow_case c = tests[i].c; \ + int result = 0; \ + feclearexcept (FE_ALL_EXCEPT); \ + errno = 0; \ + FTYPE d = strto ## FSUF (s, NULL); \ + int got_errno = errno; \ + bool got_fe_underflow = test_got_fe_underflow (); \ + char buf[FSTRLENMAX]; \ + FTOSTR (buf, sizeof (buf), "%a", d); \ + printf ("strto" #FSUF \ + " (%s) (%s) returned %s, errno = %d, " \ + "%sunderflow exception\n", \ + s, mode_name, buf, got_errno, \ + got_fe_underflow ? "" : "no "); \ + bool this_expect_underflow = expect_underflow (c, rm); \ + if (got_errno != 0 && got_errno != ERANGE) \ + { \ + puts ("FAIL: errno neither 0 nor ERANGE"); \ + result = 1; \ + } \ + else if (this_expect_underflow != (errno == ERANGE)) \ + { \ + puts ("FAIL: underflow from errno differs from expectations"); \ + result = 1; \ + } \ + if (support_underflow_exception \ + && got_fe_underflow != this_expect_underflow) \ + { \ + puts ("FAIL: underflow from exceptions " \ + "differs from expectations"); \ + result = 1; \ + } \ + return result; \ +} + +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) + +static int +test_in_one_mode (size_t i, int rm, const char *mode_name) +{ + return STRTOD_TEST_FOREACH (test_strto, i, rm, mode_name); } static int @@ -191,12 +428,12 @@ do_test (void) #endif for (size_t i = 0; i < sizeof (tests) / sizeof (tests[0]); i++) { - result |= test_in_one_mode (tests[i].s, tests[i].c, fe_tonearest, + result |= test_in_one_mode (i, fe_tonearest, "default rounding mode"); #ifdef FE_DOWNWARD if (!fesetround (FE_DOWNWARD)) { - result |= test_in_one_mode (tests[i].s, tests[i].c, FE_DOWNWARD, + result |= test_in_one_mode (i, FE_DOWNWARD, "FE_DOWNWARD"); fesetround (save_round_mode); } @@ -204,7 +441,7 @@ do_test (void) #ifdef FE_TOWARDZERO if (!fesetround (FE_TOWARDZERO)) { - result |= test_in_one_mode (tests[i].s, tests[i].c, FE_TOWARDZERO, + result |= test_in_one_mode (i, FE_TOWARDZERO, "FE_TOWARDZERO"); fesetround (save_round_mode); } @@ -212,7 +449,7 @@ do_test (void) #ifdef FE_UPWARD if (!fesetround (FE_UPWARD)) { - result |= test_in_one_mode (tests[i].s, tests[i].c, FE_UPWARD, + result |= test_in_one_mode (i, FE_UPWARD, "FE_UPWARD"); fesetround (save_round_mode); } diff --git a/stdlib/tst-strtod1i.c b/stdlib/tst-strtod1i.c index 9d6bb760fb..44ae0264f4 100644 --- a/stdlib/tst-strtod1i.c +++ b/stdlib/tst-strtod1i.c @@ -25,60 +25,91 @@ #include #include -/* Perform a few tests in a locale with thousands separators. */ -static int -do_test (void) -{ - static const struct - { - const char *loc; - const char *str; - double exp; - ptrdiff_t nread; - } tests[] = - { - { "de_DE.UTF-8", "1,5", 1.5, 3 }, - { "de_DE.UTF-8", "1.5", 1.0, 1 }, - { "de_DE.UTF-8", "1.500", 1500.0, 5 }, - { "de_DE.UTF-8", "36.893.488.147.419.103.232", 0x1.0p65, 26 } - }; -#define ntests (sizeof (tests) / sizeof (tests[0])) - size_t n; - int result = 0; - - puts ("\nLocale tests"); +#include "tst-strtod.h" - for (n = 0; n < ntests; ++n) - { - double d; - char *endp; +/* This tests internal interfaces, which are only defined for types + with distinct ABIs, so disable testing for types without distinct + ABIs. */ +#undef IF_FLOAT32 +#define IF_FLOAT32(x) +#undef IF_FLOAT64 +#define IF_FLOAT64(x) +#undef IF_FLOAT32X +#define IF_FLOAT32X(x) +#undef IF_FLOAT64X +#define IF_FLOAT64X(x) +#if !__HAVE_DISTINCT_FLOAT128 +# undef IF_FLOAT128 +# define IF_FLOAT128(x) +#endif - if (setlocale (LC_ALL, tests[n].loc) == NULL) - { - printf ("cannot set locale %s\n", tests[n].loc); - result = 1; - continue; - } +#define ntests (sizeof (tests) / sizeof (tests[0])) - d = __strtod_internal (tests[n].str, &endp, 1); - if (d != tests[n].exp) - { - printf ("strtod(\"%s\") returns %g and not %g\n", - tests[n].str, d, tests[n].exp); - result = 1; - } - else if (endp - tests[n].str != tests[n].nread) - { - printf ("strtod(\"%s\") read %td bytes and not %td\n", - tests[n].str, endp - tests[n].str, tests[n].nread); - result = 1; - } - } +/* Perform a few tests in a locale with thousands separators. */ +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static int \ +test_strto ## FSUF (void) \ +{ \ + static const struct \ + { \ + const char *loc; \ + const char *str; \ + FTYPE exp; \ + ptrdiff_t nread; \ + } tests[] = \ + { \ + { "de_DE.UTF-8", "1,5", 1.5 ## LSUF, 3 }, \ + { "de_DE.UTF-8", "1.5", 1.0 ## LSUF, 1 }, \ + { "de_DE.UTF-8", "1.500", 1500.0 ## LSUF, 5 }, \ + { "de_DE.UTF-8", "36.893.488.147.419.103.232", 0x1.0p65 ## LSUF, 26 } \ + }; \ + size_t n; \ + int result = 0; \ + \ + puts ("\nLocale tests"); \ + \ + for (n = 0; n < ntests; ++n) \ + { \ + FTYPE d; \ + char *endp; \ + \ + if (setlocale (LC_ALL, tests[n].loc) == NULL) \ + { \ + printf ("cannot set locale %s\n", tests[n].loc); \ + result = 1; \ + continue; \ + } \ + \ + d = __strto ## FSUF ## _internal (tests[n].str, &endp, 1); \ + if (d != tests[n].exp) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", d); \ + FTOSTR (buf2, sizeof (buf2), "%g", tests[n].exp); \ + printf ("strto" # FSUF "(\"%s\") returns %s and not %s\n", \ + tests[n].str, buf1, buf2); \ + result = 1; \ + } \ + else if (endp - tests[n].str != tests[n].nread) \ + { \ + printf ("strto" # FSUF "(\"%s\") read %td bytes and not %td\n", \ + tests[n].str, endp - tests[n].str, tests[n].nread); \ + result = 1; \ + } \ + } \ + \ + if (result == 0) \ + puts ("all OK"); \ + \ + return result ? EXIT_FAILURE : EXIT_SUCCESS; \ +} - if (result == 0) - puts ("all OK"); +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) - return result ? EXIT_FAILURE : EXIT_SUCCESS; +static int +do_test (void) +{ + return STRTOD_TEST_FOREACH (test_strto); } #include diff --git a/stdlib/tst-strtod2.c b/stdlib/tst-strtod2.c index a7df82ebbd..d00bc13323 100644 --- a/stdlib/tst-strtod2.c +++ b/stdlib/tst-strtod2.c @@ -1,43 +1,112 @@ +#include #include #include -struct test -{ - const char *str; - double result; - size_t offset; -} tests[] = -{ - { "0xy", 0.0, 1 }, - { "0x.y", 0.0, 1 }, - { "0x0.y", 0.0, 4 }, - { "0x.0y", 0.0, 4 }, - { ".y", 0.0, 0 }, - { "0.y", 0.0, 2 }, - { ".0y", 0.0, 2 } -}; +#include "tst-strtod.h" + +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +struct test_strto ## FSUF \ +{ \ + const char *str; \ + FTYPE result; \ + size_t offset; \ +} tests_strto ## FSUF[] = \ +{ \ + { "0xy", 0.0 ## LSUF, 1 }, \ + { "0x.y", 0.0 ## LSUF, 1 }, \ + { "0x0.y", 0.0 ## LSUF, 4 }, \ + { "0x.0y", 0.0 ## LSUF, 4 }, \ + { ".y", 0.0 ## LSUF, 0 }, \ + { "0.y", 0.0 ## LSUF, 2 }, \ + { ".0y", 0.0 ## LSUF, 2 }, \ + { "1.0e", 1.0 ## LSUF, 3 }, \ + { "1.0e+", 1.0 ## LSUF, 3 }, \ + { "1.0e-", 1.0 ## LSUF, 3 }, \ + { "1.0ex", 1.0 ## LSUF, 3 }, \ + { "1.0e+x", 1.0 ## LSUF, 3 }, \ + { "1.0e-x", 1.0 ## LSUF, 3 }, \ + { "0x1p", 1.0 ## LSUF, 3 }, \ + { "0x1p+", 1.0 ## LSUF, 3 }, \ + { "0x1p-", 1.0 ## LSUF, 3 }, \ + { "0x1px", 1.0 ## LSUF, 3 }, \ + { "0x1p+x", 1.0 ## LSUF, 3 }, \ + { "0x1p-x", 1.0 ## LSUF, 3 }, \ + { "", 0.0 ## LSUF, 0 }, \ + { ".", 0.0 ## LSUF, 0 }, \ + { "-", 0.0 ## LSUF, 0 }, \ + { "-.", 0.0 ## LSUF, 0 }, \ + { ".e", 0.0 ## LSUF, 0 }, \ + { "-.e", 0.0 ## LSUF, 0 }, \ + { " \t", 0.0 ## LSUF, 0 }, \ + { " \t.", 0.0 ## LSUF, 0 }, \ + { " \t-", 0.0 ## LSUF, 0 }, \ + { " \t-.", 0.0 ## LSUF, 0 }, \ + { " \t.e", 0.0 ## LSUF, 0 }, \ + { " \t-.e", 0.0 ## LSUF, 0 }, \ + { " \t\f\r\n\v1", 1.0 ## LSUF, 7 }, \ + { " \t\f\r\n\v-1.5e2", -150.0 ## LSUF, 12 }, \ + { "INFx", INFINITY, 3 }, \ + { "infx", INFINITY, 3 }, \ + { "INFINITx", INFINITY, 3 }, \ + { "infinitx", INFINITY, 3 }, \ + { "INFINITYY", INFINITY, 8 }, \ + { "infinityy", INFINITY, 8 }, \ + { "NANx", NAN, 3 }, \ + { "nanx", NAN, 3 }, \ + { "NAN(", NAN, 3 }, \ + { "nan(", NAN, 3 }, \ + { "NAN(x", NAN, 3 }, \ + { "nan(x", NAN, 3 }, \ + { "NAN(x)y", NAN, 6 }, \ + { "nan(x)y", NAN, 6 }, \ + { "NAN(*)y", NAN, 3 }, \ + { "nan(*)y", NAN, 3 } \ +}; \ + \ +static int \ +compare_strto ## FSUF (FTYPE x, FTYPE y) \ +{ \ + if (isnan (x) && isnan (y)) \ + return 1; \ + return x == y; \ +} \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + for (size_t i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = strto ## FSUF (tests_strto ## FSUF[i].str, &ep); \ + if (!compare_strto ## FSUF (r, tests_strto ## FSUF[i].result)) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", tests_strto ## FSUF[i].result); \ + printf ("test %zu r = %s, expect %s\n", i, buf1, buf2); \ + status = 1; \ + } \ + if (ep != tests_strto ## FSUF[i].str + tests_strto ## FSUF[i].offset) \ + { \ + printf ("test %zu strto" #FSUF \ + " parsed %tu characters, expected %zu\n", \ + i, ep - tests_strto ## FSUF[i].str, \ + tests_strto ## FSUF[i].offset); \ + status = 1; \ + } \ + } \ + return status; \ +} + +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) { - int status = 0; - for (size_t i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) - { - char *ep; - double r = strtod (tests[i].str, &ep); - if (r != tests[i].result) - { - printf ("test %zu r = %g, expect %g\n", i, r, tests[i].result); - status = 1; - } - if (ep != tests[i].str + tests[i].offset) - { - printf ("test %zu strtod parsed %tu characters, expected %zu\n", - i, ep - tests[i].str, tests[i].offset); - status = 1; - } - } - return status; + return STRTOD_TEST_FOREACH (test_strto); } #define TEST_FUNCTION do_test () diff --git a/stdlib/tst-strtod3.c b/stdlib/tst-strtod3.c index 23abec1896..0d662d8be8 100644 --- a/stdlib/tst-strtod3.c +++ b/stdlib/tst-strtod3.c @@ -3,19 +3,73 @@ #include #include -static const struct -{ - const char *in; - const char *out; - double expected; -} tests[] = - { - { "000,,,e1", ",,,e1", 0.0 }, - { "000e1", "", 0.0 }, - { "000,1e1", ",1e1", 0.0 } - }; -#define NTESTS (sizeof (tests) / sizeof (tests[0])) +#include "tst-strtod.h" + +/* This tests internal interfaces, which are only defined for types + with distinct ABIs, so disable testing for types without distinct + ABIs. */ +#undef IF_FLOAT32 +#define IF_FLOAT32(x) +#undef IF_FLOAT64 +#define IF_FLOAT64(x) +#undef IF_FLOAT32X +#define IF_FLOAT32X(x) +#undef IF_FLOAT64X +#define IF_FLOAT64X(x) +#if !__HAVE_DISTINCT_FLOAT128 +# undef IF_FLOAT128 +# define IF_FLOAT128(x) +#endif +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static const struct \ +{ \ + const char *in; \ + const char *out; \ + FTYPE expected; \ +} tests_strto ## FSUF[] = \ + { \ + { "000,,,e1", ",,,e1", 0.0 ## LSUF }, \ + { "000e1", "", 0.0 ## LSUF }, \ + { "000,1e1", ",1e1", 0.0 ## LSUF } \ + }; \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + \ + for (int i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = __strto ## FSUF ## _internal (tests_strto ## FSUF[i].in, \ + &ep, 1); \ + \ + if (strcmp (ep, tests_strto ## FSUF[i].out) != 0) \ + { \ + printf ("%d: got rest string \"%s\", expected \"%s\"\n", \ + i, ep, tests_strto ## FSUF[i].out); \ + status = 1; \ + } \ + \ + if (r != tests_strto ## FSUF[i].expected) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", \ + tests_strto ## FSUF[i].expected); \ + printf ("%d: got wrong results %s, expected %s\n", \ + i, buf1, buf2); \ + status = 1; \ + } \ + } \ + \ + return status; \ +} + +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) @@ -26,29 +80,7 @@ do_test (void) return 1; } - int status = 0; - - for (int i = 0; i < NTESTS; ++i) - { - char *ep; - double r = __strtod_internal (tests[i].in, &ep, 1); - - if (strcmp (ep, tests[i].out) != 0) - { - printf ("%d: got rest string \"%s\", expected \"%s\"\n", - i, ep, tests[i].out); - status = 1; - } - - if (r != tests[i].expected) - { - printf ("%d: got wrong results %g, expected %g\n", - i, r, tests[i].expected); - status = 1; - } - } - - return status; + return STRTOD_TEST_FOREACH (test_strto); } #define TEST_FUNCTION do_test () diff --git a/stdlib/tst-strtod4.c b/stdlib/tst-strtod4.c index 6cc4e843c7..dfd3f05027 100644 --- a/stdlib/tst-strtod4.c +++ b/stdlib/tst-strtod4.c @@ -3,22 +3,76 @@ #include #include +#include "tst-strtod.h" + +/* This tests internal interfaces, which are only defined for types + with distinct ABIs, so disable testing for types without distinct + ABIs. */ +#undef IF_FLOAT32 +#define IF_FLOAT32(x) +#undef IF_FLOAT64 +#define IF_FLOAT64(x) +#undef IF_FLOAT32X +#define IF_FLOAT32X(x) +#undef IF_FLOAT64X +#define IF_FLOAT64X(x) +#if !__HAVE_DISTINCT_FLOAT128 +# undef IF_FLOAT128 +# define IF_FLOAT128(x) +#endif + #define NNBSP "\xe2\x80\xaf" -static const struct -{ - const char *in; - const char *out; - double expected; -} tests[] = - { - { "000"NNBSP"000"NNBSP"000", "", 0.0 }, - { "1"NNBSP"000"NNBSP"000,5x", "x", 1000000.5 }, - /* Bug 30964 */ - { "10"NNBSP NNBSP"200", NNBSP NNBSP"200", 10.0 } - }; -#define NTESTS (sizeof (tests) / sizeof (tests[0])) +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static const struct \ +{ \ + const char *in; \ + const char *out; \ + FTYPE expected; \ +} tests_strto ## FSUF[] = \ + { \ + { "000"NNBSP"000"NNBSP"000", "", 0.0 ## LSUF }, \ + { "1"NNBSP"000"NNBSP"000,5x", "x", 1000000.5 ## LSUF }, \ + /* Bug 30964 */ \ + { "10"NNBSP NNBSP"200", NNBSP NNBSP"200", 10.0 ## LSUF } \ + }; \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + \ + for (int i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = __strto ## FSUF ## _internal (tests_strto ## FSUF[i].in, \ + &ep, 1); \ + \ + if (strcmp (ep, tests_strto ## FSUF[i].out) != 0) \ + { \ + printf ("%d: got rest string \"%s\", expected \"%s\"\n", \ + i, ep, tests_strto ## FSUF[i].out); \ + status = 1; \ + } \ + \ + if (r != tests_strto ## FSUF[i].expected) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", \ + tests_strto ## FSUF[i].expected); \ + printf ("%d: got wrong results %s, expected %s\n", \ + i, buf1, buf2); \ + status = 1; \ + } \ + } \ + \ + return status; \ +} +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) @@ -29,29 +83,7 @@ do_test (void) return 1; } - int status = 0; - - for (int i = 0; i < NTESTS; ++i) - { - char *ep; - double r = __strtod_internal (tests[i].in, &ep, 1); - - if (strcmp (ep, tests[i].out) != 0) - { - printf ("%d: got rest string \"%s\", expected \"%s\"\n", - i, ep, tests[i].out); - status = 1; - } - - if (r != tests[i].expected) - { - printf ("%d: got wrong results %g, expected %g\n", - i, r, tests[i].expected); - status = 1; - } - } - - return status; + return STRTOD_TEST_FOREACH (test_strto); } #define TEST_FUNCTION do_test () diff --git a/stdlib/tst-strtod5.c b/stdlib/tst-strtod5.c index 29153ec005..7eb9b3a2d7 100644 --- a/stdlib/tst-strtod5.c +++ b/stdlib/tst-strtod5.c @@ -22,35 +22,75 @@ #include #include +#include "tst-strtod.h" + #define NBSP "\xc2\xa0" -static const struct -{ - const char *in; - double expected; -} tests[] = - { - { "0", 0.0 }, - { "000", 0.0 }, - { "-0", -0.0 }, - { "-000", -0.0 }, - { "0,", 0.0 }, - { "-0,", -0.0 }, - { "0,0", 0.0 }, - { "-0,0", -0.0 }, - { "0e-10", 0.0 }, - { "-0e-10", -0.0 }, - { "0,e-10", 0.0 }, - { "-0,e-10", -0.0 }, - { "0,0e-10", 0.0 }, - { "-0,0e-10", -0.0 }, - { "0e-1000000", 0.0 }, - { "-0e-1000000", -0.0 }, - { "0,0e-1000000", 0.0 }, - { "-0,0e-1000000", -0.0 }, - }; -#define NTESTS (sizeof (tests) / sizeof (tests[0])) +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static const struct \ +{ \ + const char *in; \ + FTYPE expected; \ +} tests_strto ## FSUF[] = \ + { \ + { "0", 0.0 ## LSUF }, \ + { "000", 0.0 ## LSUF }, \ + { "-0", -0.0 ## LSUF }, \ + { "-000", -0.0 ## LSUF }, \ + { "0,", 0.0 ## LSUF }, \ + { "-0,", -0.0 ## LSUF }, \ + { "0,0", 0.0 ## LSUF }, \ + { "-0,0", -0.0 ## LSUF }, \ + { "0e-10", 0.0 ## LSUF }, \ + { "-0e-10", -0.0 ## LSUF }, \ + { "0,e-10", 0.0 ## LSUF }, \ + { "-0,e-10", -0.0 ## LSUF }, \ + { "0,0e-10", 0.0 ## LSUF }, \ + { "-0,0e-10", -0.0 ## LSUF }, \ + { "0e-1000000", 0.0 ## LSUF }, \ + { "-0e-1000000", -0.0 ## LSUF }, \ + { "0,0e-1000000", 0.0 ## LSUF }, \ + { "-0,0e-1000000", -0.0 ## LSUF }, \ + }; \ + \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + \ + for (int i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = strto ## FSUF (tests_strto ## FSUF[i].in, &ep); \ + \ + if (*ep != '\0') \ + { \ + printf ("%d: got rest string \"%s\", expected \"\"\n", i, ep); \ + status = 1; \ + } \ + \ + if (r != tests_strto ## FSUF[i].expected \ + || (copysign ## CSUF (10.0 ## LSUF, r) \ + != copysign ## CSUF (10.0 ## LSUF, \ + tests_strto ## FSUF[i].expected))) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", \ + tests_strto ## FSUF[i].expected); \ + printf ("%d: got wrong results %s, expected %s\n", \ + i, buf1, buf2); \ + status = 1; \ + } \ + } \ + \ + return status; \ +} +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) @@ -61,29 +101,7 @@ do_test (void) return 1; } - int status = 0; - - for (int i = 0; i < NTESTS; ++i) - { - char *ep; - double r = strtod (tests[i].in, &ep); - - if (*ep != '\0') - { - printf ("%d: got rest string \"%s\", expected \"\"\n", i, ep); - status = 1; - } - - if (r != tests[i].expected - || copysign (10.0, r) != copysign (10.0, tests[i].expected)) - { - printf ("%d: got wrong results %g, expected %g\n", - i, r, tests[i].expected); - status = 1; - } - } - - return status; + return STRTOD_TEST_FOREACH (test_strto); } #include diff --git a/stdlib/tst-strtod5i.c b/stdlib/tst-strtod5i.c index ee54e3404c..136aedea68 100644 --- a/stdlib/tst-strtod5i.c +++ b/stdlib/tst-strtod5i.c @@ -16,52 +16,112 @@ License along with the GNU C Library; if not, see . */ +/* Defining _LIBC_TEST ensures long double math functions are + declared in the headers. */ +#define _LIBC_TEST 1 #include #include #include #include #include +#include "tst-strtod.h" + +/* This tests internal interfaces, which are only defined for types + with distinct ABIs, so disable testing for types without distinct + ABIs. */ +#undef IF_FLOAT32 +#define IF_FLOAT32(x) +#undef IF_FLOAT64 +#define IF_FLOAT64(x) +#undef IF_FLOAT32X +#define IF_FLOAT32X(x) +#undef IF_FLOAT64X +#define IF_FLOAT64X(x) +#if !__HAVE_DISTINCT_FLOAT128 +# undef IF_FLOAT128 +# define IF_FLOAT128(x) +#endif + #define NNBSP "\xe2\x80\xaf" -static const struct -{ - const char *in; - int group; - double expected; -} tests[] = - { - { "0", 0, 0.0 }, - { "000", 0, 0.0 }, - { "-0", 0, -0.0 }, - { "-000", 0, -0.0 }, - { "0,", 0, 0.0 }, - { "-0,", 0, -0.0 }, - { "0,0", 0, 0.0 }, - { "-0,0", 0, -0.0 }, - { "0e-10", 0, 0.0 }, - { "-0e-10", 0, -0.0 }, - { "0,e-10", 0, 0.0 }, - { "-0,e-10", 0, -0.0 }, - { "0,0e-10", 0, 0.0 }, - { "-0,0e-10", 0, -0.0 }, - { "0e-1000000", 0, 0.0 }, - { "-0e-1000000", 0, -0.0 }, - { "0,0e-1000000", 0, 0.0 }, - { "-0,0e-1000000", 0, -0.0 }, - { "0", 1, 0.0 }, - { "000", 1, 0.0 }, - { "-0", 1, -0.0 }, - { "-000", 1, -0.0 }, - { "0e-10", 1, 0.0 }, - { "-0e-10", 1, -0.0 }, - { "0e-1000000", 1, 0.0 }, - { "-0e-1000000", 1, -0.0 }, - { "000"NNBSP"000"NNBSP"000", 1, 0.0 }, - { "-000"NNBSP"000"NNBSP"000", 1, -0.0 } - }; -#define NTESTS (sizeof (tests) / sizeof (tests[0])) +#define TEST_STRTOD(FSUF, FTYPE, FTOSTR, LSUF, CSUF) \ +static const struct \ +{ \ + const char *in; \ + int group; \ + FTYPE expected; \ +} tests_strto ## FSUF[] = \ + { \ + { "0", 0, 0.0 ## LSUF }, \ + { "000", 0, 0.0 ## LSUF }, \ + { "-0", 0, -0.0 ## LSUF }, \ + { "-000", 0, -0.0 ## LSUF }, \ + { "0,", 0, 0.0 ## LSUF }, \ + { "-0,", 0, -0.0 ## LSUF }, \ + { "0,0", 0, 0.0 ## LSUF }, \ + { "-0,0", 0, -0.0 ## LSUF }, \ + { "0e-10", 0, 0.0 ## LSUF }, \ + { "-0e-10", 0, -0.0 ## LSUF }, \ + { "0,e-10", 0, 0.0 ## LSUF }, \ + { "-0,e-10", 0, -0.0 ## LSUF }, \ + { "0,0e-10", 0, 0.0 ## LSUF }, \ + { "-0,0e-10", 0, -0.0 ## LSUF }, \ + { "0e-1000000", 0, 0.0 ## LSUF }, \ + { "-0e-1000000", 0, -0.0 ## LSUF }, \ + { "0,0e-1000000", 0, 0.0 ## LSUF }, \ + { "-0,0e-1000000", 0, -0.0 ## LSUF }, \ + { "0", 1, 0.0 ## LSUF }, \ + { "000", 1, 0.0 ## LSUF }, \ + { "-0", 1, -0.0 ## LSUF }, \ + { "-000", 1, -0.0 ## LSUF }, \ + { "0e-10", 1, 0.0 ## LSUF }, \ + { "-0e-10", 1, -0.0 ## LSUF }, \ + { "0e-1000000", 1, 0.0 ## LSUF }, \ + { "-0e-1000000", 1, -0.0 ## LSUF }, \ + { "000"NNBSP"000"NNBSP"000", 1, 0.0 ## LSUF }, \ + { "-000"NNBSP"000"NNBSP"000", 1, -0.0 ## LSUF } \ + }; \ + \ +static int \ +test_strto ## FSUF (void) \ +{ \ + int status = 0; \ + \ + for (int i = 0; \ + i < sizeof (tests_strto ## FSUF) / sizeof (tests_strto ## FSUF[0]); \ + ++i) \ + { \ + char *ep; \ + FTYPE r = __strto ## FSUF ## _internal (tests_strto ## FSUF[i].in, \ + &ep, \ + tests_strto ## FSUF[i].group); \ + \ + if (*ep != '\0') \ + { \ + printf ("%d: got rest string \"%s\", expected \"\"\n", i, ep); \ + status = 1; \ + } \ + \ + if (r != tests_strto ## FSUF[i].expected \ + || (copysign ## CSUF (10.0 ## LSUF, r) \ + != copysign ## CSUF (10.0 ## LSUF, \ + tests_strto ## FSUF[i].expected))) \ + { \ + char buf1[FSTRLENMAX], buf2[FSTRLENMAX]; \ + FTOSTR (buf1, sizeof (buf1), "%g", r); \ + FTOSTR (buf2, sizeof (buf2), "%g", \ + tests_strto ## FSUF[i].expected); \ + printf ("%d: got wrong results %s, expected %s\n", \ + i, buf1, buf2); \ + status = 1; \ + } \ + } \ + \ + return status; \ +} +GEN_TEST_STRTOD_FOREACH (TEST_STRTOD) static int do_test (void) @@ -72,29 +132,7 @@ do_test (void) return 1; } - int status = 0; - - for (int i = 0; i < NTESTS; ++i) - { - char *ep; - double r = __strtod_internal (tests[i].in, &ep, tests[i].group); - - if (*ep != '\0') - { - printf ("%d: got rest string \"%s\", expected \"\"\n", i, ep); - status = 1; - } - - if (r != tests[i].expected - || copysign (10.0, r) != copysign (10.0, tests[i].expected)) - { - printf ("%d: got wrong results %g, expected %g\n", - i, r, tests[i].expected); - status = 1; - } - } - - return status; + return STRTOD_TEST_FOREACH (test_strto); } #include diff --git a/string/Makefile b/string/Makefile index 8f31fa49e6..1dff405c27 100644 --- a/string/Makefile +++ b/string/Makefile @@ -184,6 +184,7 @@ tests := \ test-strncpy \ test-strndup \ test-strnlen \ + test-strnlen-nonarray \ test-strpbrk \ test-strrchr \ test-strspn \ @@ -235,7 +236,10 @@ tests-unsupported += $(tests-translation) endif # This test allocates a lot of memory and can run for a long time. -xtests = tst-strcoll-overflow +xtests += tst-strcoll-overflow + +# This test runs for a long time. +xtests += test-strncmp-nonarray # This test needs libdl. ifeq (yes,$(build-shared)) diff --git a/string/strerror.c b/string/strerror.c index 107d9d39c2..efa4e903ea 100644 --- a/string/strerror.c +++ b/string/strerror.c @@ -21,5 +21,5 @@ char * strerror (int errnum) { - return __strerror_l (errnum, __libc_tsd_get (locale_t, LOCALE)); + return __strerror_l (errnum, __libc_tsd_LOCALE); } diff --git a/string/strerror_l.c b/string/strerror_l.c index 15cce261e6..70456e5bb4 100644 --- a/string/strerror_l.c +++ b/string/strerror_l.c @@ -20,7 +20,7 @@ #include #include #include - +#include static const char * translate (const char *str, locale_t loc) @@ -31,6 +31,12 @@ translate (const char *str, locale_t loc) return res; } +static char * +unknown_error (locale_t loc) +{ + return (char *) translate ("Unknown error", loc); +} + /* Return a string describing the errno code in ERRNUM. */ char * @@ -40,18 +46,25 @@ __strerror_l (int errnum, locale_t loc) char *err = (char *) __get_errlist (errnum); if (__glibc_unlikely (err == NULL)) { - struct tls_internal_t *tls_internal = __glibc_tls_internal (); - free (tls_internal->strerror_l_buf); - if (__asprintf (&tls_internal->strerror_l_buf, "%s%d", - translate ("Unknown error ", loc), errnum) > 0) - err = tls_internal->strerror_l_buf; - else + if (__libc_initial) { - /* The memory was freed above. */ - tls_internal->strerror_l_buf = NULL; - /* Provide a fallback translation. */ - err = (char *) translate ("Unknown error", loc); + struct tls_internal_t *tls_internal = __glibc_tls_internal (); + free (tls_internal->strerror_l_buf); + if (__asprintf (&tls_internal->strerror_l_buf, "%s%d", + translate ("Unknown error ", loc), errnum) > 0) + err = tls_internal->strerror_l_buf; + else + { + /* The memory was freed above. */ + tls_internal->strerror_l_buf = NULL; + /* Provide a fallback translation. */ + err = unknown_error (loc); + } } + else + /* Secondary namespaces use a different malloc, so cannot + participate in the buffer management. */ + err = unknown_error (loc); } else err = (char *) translate (err, loc); diff --git a/string/strsignal.c b/string/strsignal.c index 3114601564..d9b0365468 100644 --- a/string/strsignal.c +++ b/string/strsignal.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Return a string describing the meaning of the signal number SIGNUM. */ char * @@ -30,21 +31,28 @@ strsignal (int signum) if (desc != NULL) return _(desc); - struct tls_internal_t *tls_internal = __glibc_tls_internal (); - free (tls_internal->strsignal_buf); + if (__libc_initial) + { + struct tls_internal_t *tls_internal = __glibc_tls_internal (); + free (tls_internal->strsignal_buf); - int r; + int r; #ifdef SIGRTMIN - if (signum >= SIGRTMIN && signum <= SIGRTMAX) - r = __asprintf (&tls_internal->strsignal_buf, _("Real-time signal %d"), - signum - SIGRTMIN); - else + if (signum >= SIGRTMIN && signum <= SIGRTMAX) + r = __asprintf (&tls_internal->strsignal_buf, _("Real-time signal %d"), + signum - SIGRTMIN); + else #endif - r = __asprintf (&tls_internal->strsignal_buf, _("Unknown signal %d"), - signum); - - if (r == -1) - tls_internal->strsignal_buf = NULL; - - return tls_internal->strsignal_buf; + r = __asprintf (&tls_internal->strsignal_buf, _("Unknown signal %d"), + signum); + + if (r >= 0) + return tls_internal->strsignal_buf; + else + tls_internal->strsignal_buf = NULL; + } + /* Fall through on asprintf error, and for !__libc_initial: + secondary namespaces use a different malloc and cannot + participate in the buffer management. */ + return _("Unknown signal"); } diff --git a/string/test-Xncmp-nonarray.c b/string/test-Xncmp-nonarray.c new file mode 100644 index 0000000000..9f3a3ca75d --- /dev/null +++ b/string/test-Xncmp-nonarray.c @@ -0,0 +1,183 @@ +/* Test non-array inputs to string comparison functions. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This skeleton file is included from string/test-strncmp-nonarray.c and + wcsmbs/test-wcsncmp-nonarray.c to test that reading of the arrays stops + at the first null character. + + TEST_IDENTIFIER must be the test function identifier. TEST_NAME is + the same as a string. + + CHAR must be defined as the character type. */ + +#include +#include +#include +#include +#include +#include +#include + +/* Much shorter than test-Xnlen-nonarray.c because of deeply nested loops. */ +enum { buffer_length = 80 }; + +/* The test buffer layout follows what is described test-Xnlen-nonarray.c, + except that there two buffers, left and right. The variables + a_count, zero_count, start_offset are all duplicated. */ + +/* Return the maximum string length for a string that starts at + start_offset. */ +static int +string_length (int a_count, int start_offset) +{ + if (start_offset == buffer_length || start_offset >= a_count) + return 0; + else + return a_count - start_offset; +} + +/* This is the valid maximum length argument computation for + strnlen/wcsnlen. See text-Xnlen-nonarray.c. */ +static int +maximum_length (int start_offset, int zero_count) +{ + if (start_offset == buffer_length) + return 0; + else if (zero_count > 0) + /* Effectively unbounded, but we need to stop fairly low, + otherwise testing takes too long. */ + return buffer_length + 32; + else + return buffer_length - start_offset; +} + +typedef __typeof (TEST_IDENTIFIER) *proto_t; + +#define TEST_MAIN +#include "test-string.h" + +IMPL (TEST_IDENTIFIER, 1) + +static int +test_main (void) +{ + TEST_VERIFY_EXIT (sysconf (_SC_PAGESIZE) >= buffer_length); + test_init (); + + struct support_next_to_fault left_ntf + = support_next_to_fault_allocate (buffer_length * sizeof (CHAR)); + CHAR *left_buffer = (CHAR *) left_ntf.buffer; + struct support_next_to_fault right_ntf + = support_next_to_fault_allocate (buffer_length * sizeof (CHAR)); + CHAR *right_buffer = (CHAR *) right_ntf.buffer; + + FOR_EACH_IMPL (impl, 0) + { + printf ("info: testing %s\n", impl->name); + for (size_t i = 0; i < buffer_length; ++i) + left_buffer[i] = 'A'; + + for (int left_zero_count = 0; left_zero_count <= buffer_length; + ++left_zero_count) + { + if (left_zero_count > 0) + left_buffer[buffer_length - left_zero_count] = 0; + int left_a_count = buffer_length - left_zero_count; + for (size_t i = 0; i < buffer_length; ++i) + right_buffer[i] = 'A'; + for (int right_zero_count = 0; right_zero_count <= buffer_length; + ++right_zero_count) + { + if (right_zero_count > 0) + right_buffer[buffer_length - right_zero_count] = 0; + int right_a_count = buffer_length - right_zero_count; + for (int left_start_offset = 0; + left_start_offset <= buffer_length; + ++left_start_offset) + { + CHAR *left_start_pointer = left_buffer + left_start_offset; + int left_maxlen + = maximum_length (left_start_offset, left_zero_count); + int left_length + = string_length (left_a_count, left_start_offset); + for (int right_start_offset = 0; + right_start_offset <= buffer_length; + ++right_start_offset) + { + CHAR *right_start_pointer + = right_buffer + right_start_offset; + int right_maxlen + = maximum_length (right_start_offset, right_zero_count); + int right_length + = string_length (right_a_count, right_start_offset); + + /* Maximum length is modelled after strnlen/wcsnlen, + and must be valid for both pointer arguments at + the same time. */ + int maxlen = MIN (left_maxlen, right_maxlen); + + for (int length_argument = 0; length_argument <= maxlen; + ++length_argument) + { + if (test_verbose) + { + printf ("left: zero_count=%d" + " a_count=%d start_offset=%d\n", + left_zero_count, left_a_count, + left_start_offset); + printf ("right: zero_count=%d" + " a_count=%d start_offset=%d\n", + right_zero_count, right_a_count, + right_start_offset); + printf ("length argument: %d\n", + length_argument); + } + + /* Effective lengths bounded by length argument. + The effective length determines the + outcome of the comparison. */ + int left_effective + = MIN (left_length, length_argument); + int right_effective + = MIN (right_length, length_argument); + if (left_effective == right_effective) + TEST_COMPARE (CALL (impl, + left_start_pointer, + right_start_pointer, + length_argument), 0); + else if (left_effective < right_effective) + TEST_COMPARE (CALL (impl, + left_start_pointer, + right_start_pointer, + length_argument) < 0, 1); + else + TEST_COMPARE (CALL (impl, + left_start_pointer, + right_start_pointer, + length_argument) > 0, 1); + } + } + } + } + } + } + + return 0; +} + +#include diff --git a/string/test-Xnlen-nonarray.c b/string/test-Xnlen-nonarray.c new file mode 100644 index 0000000000..499bef2041 --- /dev/null +++ b/string/test-Xnlen-nonarray.c @@ -0,0 +1,133 @@ +/* Test non-array inputs to string length functions. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This skeleton file is included from string/test-strnlen-nonarray.c + and wcsmbs/test-wcsnlen-nonarray.c to test that reading of the array + stops at the first null character. + + TEST_IDENTIFIER must be the test function identifier. TEST_NAME is + the same as a string. + + CHAR must be defined as the character type. */ + +#include +#include +#include +#include +#include +#include +#include + +typedef __typeof (TEST_IDENTIFIER) *proto_t; + +#define TEST_MAIN +#include "test-string.h" + +IMPL (TEST_IDENTIFIER, 1) + +static int +test_main (void) +{ + enum { buffer_length = 256 }; + TEST_VERIFY_EXIT (sysconf (_SC_PAGESIZE) >= buffer_length); + + test_init (); + + /* Buffer layout: There are a_count 'A' character followed by + zero_count null character, for a total of buffer_length + character: + + AAAAA...AAAAA 00000 ... 00000 (unmapped page follows) + \ / \ / + (a_count) (zero_count) + \___ (buffer_length) ___/ + ^ + | + start_offset + + The buffer length does not change, but a_count (and thus _zero) + and start_offset vary. + + If start_offset == buffer_length, only 0 is a valid length + argument. The result is 0. + + Otherwwise, if zero_count > 0 (if there a null characters in the + buffer), then any length argument is valid. If start_offset < + a_count (i.e., there is a non-null character at start_offset), the + result is the minimum of a_count - start_offset and the length + argument. Otherwise the result is 0. + + Otherwise, there are no null characters before the unmapped page. + The length argument must not be greater than buffer_length - + start_offset, and the result is the length argument. */ + + struct support_next_to_fault ntf + = support_next_to_fault_allocate (buffer_length * sizeof (CHAR)); + CHAR *buffer = (CHAR *) ntf.buffer; + + FOR_EACH_IMPL (impl, 0) + { + printf ("info: testing %s\n", impl->name); + for (size_t i = 0; i < buffer_length; ++i) + buffer[i] = 'A'; + + for (int zero_count = 0; zero_count <= buffer_length; ++zero_count) + { + if (zero_count > 0) + buffer[buffer_length - zero_count] = 0; + int a_count = buffer_length - zero_count; + for (int start_offset = 0; start_offset <= buffer_length; + ++start_offset) + { + CHAR *start_pointer = buffer + start_offset; + if (start_offset == buffer_length) + TEST_COMPARE (CALL (impl, buffer + start_offset, 0), 0); + else if (zero_count > 0) + for (int length_argument = 0; + length_argument <= 2 * buffer_length; + ++length_argument) + { + if (test_verbose) + printf ("zero_count=%d a_count=%d start_offset=%d" + " length_argument=%d\n", + zero_count, a_count, start_offset, + length_argument); + if (start_offset < a_count) + TEST_COMPARE (CALL (impl, start_pointer, length_argument), + MIN (a_count - start_offset, + length_argument)); + else + TEST_COMPARE (CALL (impl, start_pointer, length_argument), + 0); + } + else + for (int length_argument = 0; + length_argument <= buffer_length - start_offset; + ++length_argument) + TEST_COMPARE (CALL (impl, start_pointer, length_argument), + length_argument); + } + } + } + + support_next_to_fault_free (&ntf); + + return 0; +} + +#include diff --git a/string/test-strchr.c b/string/test-strchr.c index c795eac6fa..72b17af687 100644 --- a/string/test-strchr.c +++ b/string/test-strchr.c @@ -255,6 +255,69 @@ check1 (void) check_result (impl, s, c, exp_result); } +static void +check2 (void) +{ + CHAR *s = (CHAR *) (buf1 + getpagesize () - 4 * sizeof (CHAR)); + CHAR *s_begin = (CHAR *) (buf1 + getpagesize () - 64); +#ifndef USE_FOR_STRCHRNUL + CHAR *exp_result = NULL; +#else + CHAR *exp_result = s + 1; +#endif + CHAR val = 0x12; + for (; s_begin != s; ++s_begin) + *s_begin = val; + + s[0] = val + 1; + s[1] = 0; + s[2] = val + 1; + s[3] = val + 1; + + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + s[3] = val; + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + exp_result = s; + s[0] = val; + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + + s[3] = val + 1; + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + + s[0] = val + 1; + s[1] = val + 1; + s[2] = val + 1; + s[3] = val + 1; + s[4] = val; + exp_result = s + 4; + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } + s[4] = 0; +#ifndef USE_FOR_STRCHRNUL + exp_result = NULL; +#else + exp_result = s + 4; +#endif + { + FOR_EACH_IMPL (impl, 0) + check_result (impl, s, val, exp_result); + } +} + int test_main (void) { @@ -263,7 +326,7 @@ test_main (void) test_init (); check1 (); - + check2 (); printf ("%20s", ""); FOR_EACH_IMPL (impl, 0) printf ("\t%s", impl->name); diff --git a/string/test-strncmp-nonarray.c b/string/test-strncmp-nonarray.c new file mode 100644 index 0000000000..581e52d01b --- /dev/null +++ b/string/test-strncmp-nonarray.c @@ -0,0 +1,4 @@ +#define TEST_IDENTIFIER strncmp +#define TEST_NAME "strncmp" +typedef char CHAR; +#include "test-Xncmp-nonarray.c" diff --git a/string/test-strnlen-nonarray.c b/string/test-strnlen-nonarray.c new file mode 100644 index 0000000000..0ad05756d9 --- /dev/null +++ b/string/test-strnlen-nonarray.c @@ -0,0 +1,4 @@ +#define TEST_IDENTIFIER strnlen +#define TEST_NAME "strnlen" +typedef char CHAR; +#include "test-Xnlen-nonarray.c" diff --git a/sunrpc/rpc_thread.c b/sunrpc/rpc_thread.c index a04b7ec47f..e20f0a6230 100644 --- a/sunrpc/rpc_thread.c +++ b/sunrpc/rpc_thread.c @@ -3,7 +3,6 @@ #include #include -#include #include #include diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h index 93b7245d2a..57bb941e7d 100644 --- a/support/capture_subprocess.h +++ b/support/capture_subprocess.h @@ -42,11 +42,12 @@ struct support_capture_subprocess support_capture_subprocess struct support_capture_subprocess support_capture_subprogram (const char *file, char *const argv[], char *const envp[]); -/* Copy the running program into a setgid binary and run it with CHILD_ID - argument. If execution is successful, return the exit status of the child - program, otherwise return a non-zero failure exit code. */ -int support_capture_subprogram_self_sgid - (char *child_id); +/* Copy the running program into a setgid binary and run it with + CHILD_ID argument. If the program exits with a non-zero status, + exit with that exit status (or status 1 if the program did not exit + normally). If the test cannot be performed, exit with + EXIT_UNSUPPORTED. */ +void support_capture_subprogram_self_sgid (const char *child_id); /* Deallocate the subprocess data captured by support_capture_subprocess. */ diff --git a/support/check.h b/support/check.h index 711f34b83b..8f41e5b99f 100644 --- a/support/check.h +++ b/support/check.h @@ -24,6 +24,11 @@ __BEGIN_DECLS +/* Record a test failure, print the failure message to standard output + and pass the result of 1 through. */ +#define FAIL(...) \ + support_print_failure_impl (__FILE__, __LINE__, __VA_ARGS__) + /* Record a test failure, print the failure message to standard output and return 1. */ #define FAIL_RET(...) \ @@ -202,6 +207,9 @@ void support_record_failure_reset (void); failures or not. */ int support_record_failure_is_failed (void); +/* Terminate the process if any failures have been encountered so far. */ +void support_record_failure_barrier (void); + __END_DECLS #endif /* SUPPORT_CHECK_H */ diff --git a/support/process_state.h b/support/process_state.h index 1cf902e91b..9541d8c343 100644 --- a/support/process_state.h +++ b/support/process_state.h @@ -31,13 +31,16 @@ enum support_process_state support_process_state_dead = 0x20, /* X (dead). */ support_process_state_zombie = 0x40, /* Z (zombie). */ support_process_state_parked = 0x80, /* P (parked). */ + support_process_state_invalid = 0x100 /* Invalid state. */ }; /* Wait for process PID to reach state STATE. It can be a combination of multiple possible states ('process_state_running | process_state_sleeping') where the function return when any of these state are observed. For an invalid state not represented by SUPPORT_PROCESS_STATE, it fallbacks - to a 2 second sleep. */ -void support_process_state_wait (pid_t pid, enum support_process_state state); + to a 2 second sleep. + Return the found process state. */ +enum support_process_state +support_process_state_wait (pid_t pid, enum support_process_state state); #endif diff --git a/support/resolv_test.c b/support/resolv_test.c index f1613bd255..d4cc26b4aa 100644 --- a/support/resolv_test.c +++ b/support/resolv_test.c @@ -326,7 +326,7 @@ resolv_response_add_name (struct resolv_response_builder *b, crname_target = *ptr; else crname_target = NULL; - TEST_VERIFY (crname_target != crname); + TEST_VERIFY_EXIT (crname_target != crname); /* Not added to the tree. */ free (crname); } diff --git a/support/support.h b/support/support.h index ba21ec9b5a..1a77f79793 100644 --- a/support/support.h +++ b/support/support.h @@ -113,7 +113,7 @@ void *xposix_memalign (size_t alignment, size_t n) __attribute_malloc__ __attribute_alloc_align__ ((1)) __attribute_alloc_size__ ((2)) __attr_dealloc_free __returns_nonnull; char *xasprintf (const char *format, ...) - __attribute__ ((format (printf, 1, 2), malloc)) __attr_dealloc_free + __attribute__ ((format (printf, 1, 2), __malloc__)) __attr_dealloc_free __returns_nonnull; char *xstrdup (const char *) __attr_dealloc_free __returns_nonnull; char *xstrndup (const char *, size_t) __attr_dealloc_free __returns_nonnull; diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c index 53847194cb..cbc6951064 100644 --- a/support/support_capture_subprocess.c +++ b/support/support_capture_subprocess.c @@ -21,12 +21,17 @@ #include #include +#include +#include +#include #include +#include #include #include #include #include #include +#include #include static void @@ -109,111 +114,88 @@ support_capture_subprogram (const char *file, char *const argv[], /* Copies the executable into a restricted directory, so that we can safely make it SGID with the TARGET group ID. Then runs the executable. */ -static int -copy_and_spawn_sgid (char *child_id, gid_t gid) +static void +copy_and_spawn_sgid (const char *child_id, gid_t gid) { - char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", - test_dir, (intmax_t) getpid ()); + char *dirname = support_create_temp_directory ("tst-glibc-sgid-"); char *execname = xasprintf ("%s/bin", dirname); - int infd = -1; - int outfd = -1; - int ret = 1, status = 1; - - TEST_VERIFY (mkdir (dirname, 0700) == 0); - if (support_record_failure_is_failed ()) - goto err; + add_temp_file (execname); - infd = open ("/proc/self/exe", O_RDONLY); - if (infd < 0) + if (access ("/proc/self/exe", R_OK) != 0) FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n"); - outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); - TEST_VERIFY (outfd >= 0); - if (support_record_failure_is_failed ()) - goto err; - - char buf[4096]; - for (;;) - { - ssize_t rdcount = read (infd, buf, sizeof (buf)); - TEST_VERIFY (rdcount >= 0); - if (support_record_failure_is_failed ()) - goto err; - if (rdcount == 0) - break; - char *p = buf; - char *end = buf + rdcount; - while (p != end) - { - ssize_t wrcount = write (outfd, buf, end - p); - if (wrcount == 0) - errno = ENOSPC; - TEST_VERIFY (wrcount > 0); - if (support_record_failure_is_failed ()) - goto err; - p += wrcount; - } - } + support_copy_file ("/proc/self/exe", execname); - bool chowned = false; - TEST_VERIFY ((chowned = fchown (outfd, getuid (), gid) == 0) - || errno == EPERM); - if (support_record_failure_is_failed ()) - goto err; - else if (!chowned) - { - ret = 77; - goto err; - } + if (chown (execname, getuid (), gid) != 0) + FAIL_UNSUPPORTED ("cannot change group of \"%s\" to %jd: %m", + execname, (intmax_t) gid); - TEST_VERIFY (fchmod (outfd, 02750) == 0); - if (support_record_failure_is_failed ()) - goto err; - TEST_VERIFY (close (outfd) == 0); - if (support_record_failure_is_failed ()) - goto err; - TEST_VERIFY (close (infd) == 0); - if (support_record_failure_is_failed ()) - goto err; + if (chmod (execname, 02750) != 0) + FAIL_UNSUPPORTED ("cannot make \"%s\" SGID: %m ", execname); /* We have the binary, now spawn the subprocess. Avoid using support_subprogram because we only want the program exit status, not the contents. */ - ret = 0; - infd = outfd = -1; - char * const args[] = {execname, child_id, NULL}; + char * const args[] = {execname, (char *) child_id, NULL}; + int status = support_subprogram_wait (args[0], args); - status = support_subprogram_wait (args[0], args); + free (execname); + free (dirname); -err: - if (outfd >= 0) - close (outfd); - if (infd >= 0) - close (infd); - if (execname != NULL) + if (WIFEXITED (status)) { - unlink (execname); - free (execname); + if (WEXITSTATUS (status) == 0) + return; + else + exit (WEXITSTATUS (status)); } - if (dirname != NULL) + else + FAIL_EXIT1 ("subprogram failed with status %d", status); +} + +/* Returns true if a group with NAME has been found, and writes its + GID to *TARGET. */ +static bool +find_sgid_group (gid_t *target, const char *name) +{ + /* Do not use getgrname_r because it does not work in statically + linked binaries if the system libc is different. */ + FILE *fp = fopen ("/etc/group", "rce"); + if (fp == NULL) + return false; + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + bool ok = false; + struct scratch_buffer buf; + scratch_buffer_init (&buf); + while (true) { - rmdir (dirname); - free (dirname); + struct group grp; + struct group *result = NULL; + int status = fgetgrent_r (fp, &grp, buf.data, buf.length, &result); + if (status == 0 && result != NULL) + { + if (strcmp (result->gr_name, name) == 0) + { + *target = result->gr_gid; + ok = true; + break; + } + } + else if (errno != ERANGE) + break; + else if (!scratch_buffer_grow (&buf)) + break; } - - if (ret == 77) - FAIL_UNSUPPORTED ("Failed to make sgid executable for test\n"); - if (ret != 0) - FAIL_EXIT1 ("Failed to make sgid executable for test\n"); - - return status; + scratch_buffer_free (&buf); + fclose (fp); + return ok; } -int -support_capture_subprogram_self_sgid (char *child_id) +void +support_capture_subprogram_self_sgid (const char *child_id) { - gid_t target = 0; const int count = 64; gid_t groups[count]; @@ -225,6 +207,7 @@ support_capture_subprogram_self_sgid (char *child_id) (intmax_t) getuid ()); gid_t current = getgid (); + gid_t target = current; for (int i = 0; i < ret; ++i) { if (groups[i] != current) @@ -234,11 +217,18 @@ support_capture_subprogram_self_sgid (char *child_id) } } - if (target == 0) - FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n", - (intmax_t) getuid ()); + if (target == current) + { + /* If running as root, try to find a harmless group for SGID. */ + if (getuid () != 0 + || (!find_sgid_group (&target, "nogroup") + && !find_sgid_group (&target, "bin") + && !find_sgid_group (&target, "daemon"))) + FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n", + (intmax_t) getuid ()); + } - return copy_and_spawn_sgid (child_id, target); + copy_and_spawn_sgid (child_id, target); } void diff --git a/support/support_process_state.c b/support/support_process_state.c index 062335234f..ae8e0a531c 100644 --- a/support/support_process_state.c +++ b/support/support_process_state.c @@ -27,7 +27,7 @@ #include #include -void +enum support_process_state support_process_state_wait (pid_t pid, enum support_process_state state) { #ifdef __linux__ @@ -75,7 +75,7 @@ support_process_state_wait (pid_t pid, enum support_process_state state) { free (line); xfclose (fstatus); - return; + return process_states[i].s; } rewind (fstatus); @@ -90,4 +90,6 @@ support_process_state_wait (pid_t pid, enum support_process_state state) /* Fallback to nanosleep if an invalid state is found. */ #endif nanosleep (&(struct timespec) { 1, 0 }, NULL); + + return support_process_state_invalid; } diff --git a/support/support_record_failure.c b/support/support_record_failure.c index 978123701d..72ee2b232f 100644 --- a/support/support_record_failure.c +++ b/support/support_record_failure.c @@ -112,3 +112,13 @@ support_record_failure_is_failed (void) synchronization for reliable test error reporting anyway. */ return __atomic_load_n (&state->failed, __ATOMIC_RELAXED); } + +void +support_record_failure_barrier (void) +{ + if (__atomic_load_n (&state->failed, __ATOMIC_RELAXED)) + { + puts ("error: exiting due to previous errors"); + exit (1); + } +} diff --git a/support/test-container.c b/support/test-container.c index ebcc722da5..23fe31071c 100644 --- a/support/test-container.c +++ b/support/test-container.c @@ -705,6 +705,7 @@ check_for_unshare_hints (int require_pidns) val = -1; /* Sentinel. */ int cnt = fscanf (f, "%d", &val); + fclose (f); if (cnt == 1 && val != files[i].bad_value) continue; diff --git a/support/test-driver.c b/support/test-driver.c index f4c3e4d666..04ceebc08f 100644 --- a/support/test-driver.c +++ b/support/test-driver.c @@ -155,6 +155,7 @@ main (int argc, char **argv) { CMDLINE_OPTIONS TEST_DEFAULT_OPTIONS + { 0, } }; test_config.options = &options; #endif diff --git a/support/tst-support-process_state.c b/support/tst-support-process_state.c index d73269320f..4a88eae3a7 100644 --- a/support/tst-support-process_state.c +++ b/support/tst-support-process_state.c @@ -68,28 +68,39 @@ do_test (void) if (test_verbose) printf ("info: waiting pid %d, state_stopped/state_tracing_stop\n", (int) pid); - support_process_state_wait (pid, stop_state); + { + enum support_process_state state = + support_process_state_wait (pid, stop_state); + TEST_VERIFY (state == support_process_state_stopped + || state == support_process_state_tracing_stop); + } if (kill (pid, SIGCONT) != 0) FAIL_RET ("kill (%d, SIGCONT): %m\n", pid); if (test_verbose) printf ("info: waiting pid %d, state_sleeping\n", (int) pid); - support_process_state_wait (pid, support_process_state_sleeping); + TEST_COMPARE (support_process_state_wait (pid, + support_process_state_sleeping), + support_process_state_sleeping); if (kill (pid, SIGUSR1) != 0) FAIL_RET ("kill (%d, SIGUSR1): %m\n", pid); if (test_verbose) printf ("info: waiting pid %d, state_running\n", (int) pid); - support_process_state_wait (pid, support_process_state_running); + TEST_COMPARE (support_process_state_wait (pid, + support_process_state_running), + support_process_state_running); if (kill (pid, SIGKILL) != 0) FAIL_RET ("kill (%d, SIGKILL): %m\n", pid); if (test_verbose) printf ("info: waiting pid %d, state_zombie\n", (int) pid); - support_process_state_wait (pid, support_process_state_zombie); + TEST_COMPARE (support_process_state_wait (pid, + support_process_state_zombie), + support_process_state_zombie);; siginfo_t info; int r = waitid (P_PID, pid, &info, WEXITED); diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile index 141d7d9cc2..f205884fe1 100644 --- a/sysdeps/aarch64/Makefile +++ b/sysdeps/aarch64/Makefile @@ -2,11 +2,15 @@ long-double-fcts = yes ifeq (yes,$(aarch64-bti)) # Mark linker output BTI compatible, it warns on non-BTI inputs. +# Do not do this for conform tests because they may not be compiled +# with the appropriate compiler flags. +ifneq ($(subdir),conform) sysdep-LDFLAGS += -Wl,-z,force-bti # Make warnings fatal outside the test system. LDFLAGS-lib.so += -Wl,--fatal-warnings LDFLAGS-rtld += -Wl,-z,force-bti,--fatal-warnings -endif +endif # $(subdir) != conform +endif # $(aarch64-bit) ifeq ($(subdir),elf) sysdep-dl-routines += dl-bti @@ -70,7 +74,19 @@ sysdep_routines += \ __arm_za_disable tests += \ - tst-sme-jmp + tst-sme-jmp \ + tst-sme-signal \ + tst-sme-za-state \ + # tests +tests-internal += \ + tst-sme-clone \ + tst-sme-clone3 \ + tst-sme-fork \ + tst-sme-vfork \ + # tests-internal + +$(objpfx)tst-sme-clone3: $(objpfx)clone3.o $(objpfx)__arm_za_disable.o + endif ifeq ($(subdir),malloc) diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S index 7b6add751e..84156ee6ef 100644 --- a/sysdeps/aarch64/__longjmp.S +++ b/sysdeps/aarch64/__longjmp.S @@ -51,24 +51,7 @@ ENTRY (__longjmp) #if IS_IN(libc) /* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */ -# if HAVE_AARCH64_PAC_RET - PACIASP - cfi_window_save -# endif - stp x29, x30, [sp, -16]! - cfi_adjust_cfa_offset (16) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - mov x29, sp - bl __libc_arm_za_disable - ldp x29, x30, [sp], 16 - cfi_adjust_cfa_offset (-16) - cfi_restore (x29) - cfi_restore (x30) -# if HAVE_AARCH64_PAC_RET - AUTIASP - cfi_window_save -# endif + CALL_LIBC_ARM_ZA_DISABLE #endif ldp x19, x20, [x0, #JB_X19<<3] diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile index 234a6c457c..be8541f649 100644 --- a/sysdeps/aarch64/fpu/Makefile +++ b/sysdeps/aarch64/fpu/Makefile @@ -41,8 +41,6 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \ v_log10_data \ erf_data \ erff_data \ - sv_erf_data \ - sv_erff_data \ v_exp_tail_data \ erfc_data \ erfcf_data \ diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c index 0a86c9823a..aaf874f4ea 100644 --- a/sysdeps/aarch64/fpu/acos_advsimd.c +++ b/sysdeps/aarch64/fpu/acos_advsimd.c @@ -18,24 +18,23 @@ . */ #include "v_math.h" -#include "poly_advsimd_f64.h" static const struct data { - float64x2_t poly[12]; - float64x2_t pi, pi_over_2; + double c1, c3, c5, c7, c9, c11; + float64x2_t c0, c2, c4, c6, c8, c10; uint64x2_t abs_mask; + float64x2_t pi, pi_over_2; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ - .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4), - V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6), - V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6), - V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7), - V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6), - V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), }, - .pi = V2 (0x1.921fb54442d18p+1), - .pi_over_2 = V2 (0x1.921fb54442d18p+0), + .c0 = V2 (0x1.555555555554ep-3), .c1 = 0x1.3333333337233p-4, + .c2 = V2 (0x1.6db6db67f6d9fp-5), .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = V2 (0x1.6e8b264d467d6p-6), .c5 = 0x1.1c5997c357e9dp-6, + .c6 = V2 (0x1.c86a22cd9389dp-7), .c7 = 0x1.856073c22ebbep-7, + .c8 = V2 (0x1.fd1151acb6bedp-8), .c9 = 0x1.087182f799c1dp-6, + .c10 = V2 (-0x1.6602748120927p-7), .c11 = 0x1.cfa0dd1f9478p-6, + .pi = V2 (0x1.921fb54442d18p+1), .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff), }; @@ -63,7 +62,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) acos(x) ~ pi/2 - (x + x^3 P(x^2)). - The largest observed error in this region is 1.18 ulps, + The largest observed error in this region is 1.18 ulp: _ZGVnN2v_acos (0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0 want 0x1.0d54d1985c069p+0. @@ -71,9 +70,9 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). - The largest observed error in this region is 1.52 ulps, - _ZGVnN2v_acos (0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1 - want 0x1.edbbedf8a7d6cp-1. */ + The largest observed error in this region is 1.50 ulp: + _ZGVnN2v_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1 + want 0x1.ec1a46aa829p-1. */ float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x) { const struct data *d = ptr_barrier (&data); @@ -99,13 +98,32 @@ float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x) float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2)); /* Use a single polynomial approximation P for both intervals. */ + float64x2_t z3 = vmulq_f64 (z2, z); float64x2_t z4 = vmulq_f64 (z2, z2); float64x2_t z8 = vmulq_f64 (z4, z4); - float64x2_t z16 = vmulq_f64 (z8, z8); - float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly); - /* Finalize polynomial: z + z * z2 * P(z2). */ - p = vfmaq_f64 (z, vmulq_f64 (z, z2), p); + /* Order-11 Estrin. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p411 = vfmaq_f64 (p47, z8, p811); + float64x2_t p = vfmaq_f64 (p03, z8, p411); + + /* Finalize polynomial: z + z3 * P(z2). */ + p = vfmaq_f64 (z, z3, p); /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5 = 2 Q(|x|) , for 0.5 < x < 1.0 diff --git a/sysdeps/aarch64/fpu/acos_sve.c b/sysdeps/aarch64/fpu/acos_sve.c index 99dbfac3e6..870d8062cf 100644 --- a/sysdeps/aarch64/fpu/acos_sve.c +++ b/sysdeps/aarch64/fpu/acos_sve.c @@ -18,20 +18,21 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[12]; - float64_t pi, pi_over_2; + float64_t c1, c3, c5, c7, c9, c11; + float64_t c0, c2, c4, c6, c8, c10; + float64_t pi_over_2; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ - .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5, - 0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6, - 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8, - 0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, }, - .pi = 0x1.921fb54442d18p+1, + .c0 = 0x1.555555555554ep-3, .c1 = 0x1.3333333337233p-4, + .c2 = 0x1.6db6db67f6d9fp-5, .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = 0x1.6e8b264d467d6p-6, .c5 = 0x1.1c5997c357e9dp-6, + .c6 = 0x1.c86a22cd9389dp-7, .c7 = 0x1.856073c22ebbep-7, + .c8 = 0x1.fd1151acb6bedp-8, .c9 = 0x1.087182f799c1dp-6, + .c10 = -0x1.6602748120927p-7, .c11 = 0x1.cfa0dd1f9478p-6, .pi_over_2 = 0x1.921fb54442d18p+0, }; @@ -42,20 +43,21 @@ static const struct data acos(x) ~ pi/2 - (x + x^3 P(x^2)). - The largest observed error in this region is 1.18 ulps, - _ZGVsMxv_acos (0x1.fbc5fe28ee9e3p-2) got 0x1.0d4d0f55667f6p+0 - want 0x1.0d4d0f55667f7p+0. + The largest observed error in this region is 1.18 ulp: + _ZGVsMxv_acos (0x1.fbb7c9079b429p-2) got 0x1.0d51266607582p+0 + want 0x1.0d51266607583p+0. For |x| in [0.5, 1.0], use same approximation with a change of variable acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). - The largest observed error in this region is 1.52 ulps, - _ZGVsMxv_acos (0x1.24024271a500ap-1) got 0x1.ed82df4243f0dp-1 - want 0x1.ed82df4243f0bp-1. */ + The largest observed error in this region is 1.50 ulp: + _ZGVsMxv_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1 + want 0x1.ec1a46aa829p-1. */ svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b64 (); svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000); svfloat64_t ax = svabs_x (pg, x); @@ -70,24 +72,41 @@ svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg) svfloat64_t z = svsqrt_m (ax, a_gt_half, z2); /* Use a single polynomial approximation P for both intervals. */ - svfloat64_t z4 = svmul_x (pg, z2, z2); - svfloat64_t z8 = svmul_x (pg, z4, z4); - svfloat64_t z16 = svmul_x (pg, z8, z8); - svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly); + svfloat64_t z3 = svmul_x (ptrue, z2, z); + svfloat64_t z4 = svmul_x (ptrue, z2, z2); + svfloat64_t z8 = svmul_x (ptrue, z4, z4); + + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + + svfloat64_t p411 = svmla_x (pg, p47, z8, p811); + svfloat64_t p = svmad_x (pg, p411, z8, p03); /* Finalize polynomial: z + z * z2 * P(z2). */ - p = svmla_x (pg, z, svmul_x (pg, z, z2), p); + p = svmad_x (pg, p, z3, z); /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5 = 2 Q(|x|) , for 0.5 < x < 1.0 = pi - 2 Q(|x|) , for -1.0 < x < -0.5. */ - svfloat64_t y - = svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (p), sign)); - - svbool_t is_neg = svcmplt (pg, x, 0.0); - svfloat64_t off = svdup_f64_z (is_neg, d->pi); - svfloat64_t mul = svsel (a_gt_half, sv_f64 (2.0), sv_f64 (-1.0)); - svfloat64_t add = svsel (a_gt_half, off, sv_f64 (d->pi_over_2)); - - return svmla_x (pg, add, mul, y); + svfloat64_t mul = svreinterpret_f64 ( + svlsl_m (a_gt_half, svreinterpret_u64 (sv_f64 (1.0)), 10)); + mul = svreinterpret_f64 (sveor_x (ptrue, svreinterpret_u64 (mul), sign)); + svfloat64_t add = svreinterpret_f64 ( + svorr_x (ptrue, sign, svreinterpret_u64 (sv_f64 (d->pi_over_2)))); + add = svsub_m (a_gt_half, sv_f64 (d->pi_over_2), add); + + return svmsb_x (pg, p, mul, add); } diff --git a/sysdeps/aarch64/fpu/acosh_advsimd.c b/sysdeps/aarch64/fpu/acosh_advsimd.c index c88283cf11..a98f4a2e4d 100644 --- a/sysdeps/aarch64/fpu/acosh_advsimd.c +++ b/sysdeps/aarch64/fpu/acosh_advsimd.c @@ -54,9 +54,8 @@ VPCS_ATTR float64x2_t V_NAME_D1 (acosh) (float64x2_t x) x = vbslq_f64 (special, vreinterpretq_f64_u64 (d->one), x); #endif - float64x2_t xm1 = vsubq_f64 (x, v_f64 (1)); - float64x2_t y; - y = vaddq_f64 (x, v_f64 (1)); + float64x2_t xm1 = vsubq_f64 (x, v_f64 (1.0)); + float64x2_t y = vaddq_f64 (x, v_f64 (1.0)); y = vmulq_f64 (y, xm1); y = vsqrtq_f64 (y); y = vaddq_f64 (xm1, y); diff --git a/sysdeps/aarch64/fpu/acosh_sve.c b/sysdeps/aarch64/fpu/acosh_sve.c index 3e4faaa5ca..78ebcffbb5 100644 --- a/sysdeps/aarch64/fpu/acosh_sve.c +++ b/sysdeps/aarch64/fpu/acosh_sve.c @@ -30,10 +30,10 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special) } /* SVE approximation for double-precision acosh, based on log1p. - The largest observed error is 3.19 ULP in the region where the + The largest observed error is 3.14 ULP in the region where the argument to log1p falls in the k=0 interval, i.e. x close to 1: - SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2 - want 0x1.ed23399f51373p-2. */ + SV_NAME_D1 (acosh)(0x1.1e80ed12f0ad1p+0) got 0x1.ef0cee7c33ce1p-2 + want 0x1.ef0cee7c33ce4p-2. */ svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg) { /* (ix - One) >= (BigBound - One). */ diff --git a/sysdeps/aarch64/fpu/acoshf_advsimd.c b/sysdeps/aarch64/fpu/acoshf_advsimd.c index 8916dcbf40..004474acf9 100644 --- a/sysdeps/aarch64/fpu/acoshf_advsimd.c +++ b/sysdeps/aarch64/fpu/acoshf_advsimd.c @@ -25,35 +25,32 @@ const static struct data { struct v_log1pf_data log1pf_consts; uint32x4_t one; - uint16x4_t thresh; -} data = { - .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, - .one = V4 (0x3f800000), - .thresh = V4 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */ -}; +} data = { .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .one = V4 (0x3f800000) }; + +#define Thresh vdup_n_u16 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */ static float32x4_t NOINLINE VPCS_ATTR special_case (float32x4_t x, float32x4_t y, uint16x4_t special, - const struct v_log1pf_data d) + const struct v_log1pf_data *d) { return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special)); } /* Vector approximation for single-precision acosh, based on log1p. Maximum error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it - is 2.78 ULP: - __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 - want 0x1.ef9ea2p-3. + is 3.00 ULP: + _ZGVnN4v_acoshf(0x1.01df3ap+0) got 0x1.ef0a82p-4 + want 0x1.ef0a7cp-4. With exceptions disabled, we can compute u with a shorter dependency chain, - which gives maximum error of 3.07 ULP: - __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4 - want 0x1.fbc7f4p-4. */ + which gives maximum error of 3.22 ULP: + _ZGVnN4v_acoshf(0x1.007ef2p+0) got 0x1.fdcdccp-5 + want 0x1.fdcdd2p-5. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); uint32x4_t ix = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh); + uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), Thresh); #if WANT_SIMD_EXCEPT /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use @@ -64,15 +61,16 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x) float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p); float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1); #else - float32x4_t xm1 = vsubq_f32 (x, v_f32 (1)); - float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f))); + float32x4_t xm1 = vsubq_f32 (x, vreinterpretq_f32_u32 (d->one)); + float32x4_t u + = vmulq_f32 (xm1, vaddq_f32 (x, vreinterpretq_f32_u32 (d->one))); #endif float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u)); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, y, special, d->log1pf_consts); - return log1pf_inline (y, d->log1pf_consts); + return special_case (x, y, special, &d->log1pf_consts); + return log1pf_inline (y, &d->log1pf_consts); } libmvec_hidden_def (V_NAME_F1 (acosh)) HALF_WIDTH_ALIAS_F1 (acosh) diff --git a/sysdeps/aarch64/fpu/acoshf_sve.c b/sysdeps/aarch64/fpu/acoshf_sve.c index 2110894e62..491365e24d 100644 --- a/sysdeps/aarch64/fpu/acoshf_sve.c +++ b/sysdeps/aarch64/fpu/acoshf_sve.c @@ -17,23 +17,26 @@ License along with the GNU C Library; if not, see . */ +#include "sv_math.h" +#include "sv_log1pf_inline.h" + #define One 0x3f800000 #define Thres 0x20000000 /* asuint(0x1p64) - One. */ -#include "sv_log1pf_inline.h" - static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special) { + svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f); + svfloat32_t y = sv_log1pf_inline (tmp, svptrue_b32 ()); return sv_call_f32 (acoshf, x, y, special); } /* Single-precision SVE acosh(x) routine. Implements the same algorithm as vector acoshf and log1p. - Maximum error is 2.78 ULPs: - SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4 - want 0x1.f45b3cp-4. */ + Maximum error is 2.47 ULPs: + SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4 + want 0x1.e435a2p-4. */ svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg) { svuint32_t ix = svreinterpret_u32 (x); @@ -41,9 +44,9 @@ svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg) svfloat32_t xm1 = svsub_x (pg, x, 1.0f); svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f)); - svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg); + svfloat32_t tmp = svadd_x (pg, xm1, svsqrt_x (pg, u)); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, y, special); - return y; + return special_case (xm1, tmp, special); + return sv_log1pf_inline (tmp, pg); } diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c index 2de6eff407..f4884227a5 100644 --- a/sysdeps/aarch64/fpu/asin_advsimd.c +++ b/sysdeps/aarch64/fpu/asin_advsimd.c @@ -18,24 +18,23 @@ . */ #include "v_math.h" -#include "poly_advsimd_f64.h" static const struct data { - float64x2_t poly[12]; + float64x2_t c0, c2, c4, c6, c8, c10; float64x2_t pi_over_2; uint64x2_t abs_mask; + double c1, c3, c5, c7, c9, c11; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ - .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4), - V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6), - V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6), - V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7), - V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6), - V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), }, - .pi_over_2 = V2 (0x1.921fb54442d18p+0), - .abs_mask = V2 (0x7fffffffffffffff), + .c0 = V2 (0x1.555555555554ep-3), .c1 = 0x1.3333333337233p-4, + .c2 = V2 (0x1.6db6db67f6d9fp-5), .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = V2 (0x1.6e8b264d467d6p-6), .c5 = 0x1.1c5997c357e9dp-6, + .c6 = V2 (0x1.c86a22cd9389dp-7), .c7 = 0x1.856073c22ebbep-7, + .c8 = V2 (0x1.fd1151acb6bedp-8), .c9 = 0x1.087182f799c1dp-6, + .c10 = V2 (-0x1.6602748120927p-7), .c11 = 0x1.cfa0dd1f9478p-6, + .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff), }; #define AllMask v_u64 (0xffffffffffffffff) @@ -68,8 +67,8 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). The largest observed error in this region is 2.69 ulps, - _ZGVnN2v_asin (0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1 - want 0x1.110d7e85fdd53p-1. */ + _ZGVnN2v_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1 + want 0x1.1111dd54ddf99p-1. */ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) { const struct data *d = ptr_barrier (&data); @@ -86,7 +85,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) return special_case (x, x, AllMask); #endif - uint64x2_t a_lt_half = vcltq_f64 (ax, v_f64 (0.5)); + uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5)); /* Evaluate polynomial Q(x) = y + y * z * P(z) with z = x ^ 2 and y = |x| , if |x| < 0.5 @@ -99,7 +98,26 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) float64x2_t z4 = vmulq_f64 (z2, z2); float64x2_t z8 = vmulq_f64 (z4, z4); float64x2_t z16 = vmulq_f64 (z8, z8); - float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly); + + /* order-11 estrin. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p07 = vfmaq_f64 (p03, z8, p47); + float64x2_t p = vfmaq_f64 (p07, z16, p811); /* Finalize polynomial: z + z * z2 * P(z2). */ p = vfmaq_f64 (z, vmulq_f64 (z, z2), p); diff --git a/sysdeps/aarch64/fpu/asin_sve.c b/sysdeps/aarch64/fpu/asin_sve.c index 9daa382e34..acbf1b3bdd 100644 --- a/sysdeps/aarch64/fpu/asin_sve.c +++ b/sysdeps/aarch64/fpu/asin_sve.c @@ -18,45 +18,43 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[12]; - float64_t pi_over_2f; + float64_t c1, c3, c5, c7, c9, c11; + float64_t c0, c2, c4, c6, c8, c10; + float64_t pi_over_2; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ - .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, - 0x1.6db6db67f6d9fp-5, 0x1.f1c71fbd29fbbp-6, - 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6, - 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, - 0x1.fd1151acb6bedp-8, 0x1.087182f799c1dp-6, - -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, }, - .pi_over_2f = 0x1.921fb54442d18p+0, + .c0 = 0x1.555555555554ep-3, .c1 = 0x1.3333333337233p-4, + .c2 = 0x1.6db6db67f6d9fp-5, .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = 0x1.6e8b264d467d6p-6, .c5 = 0x1.1c5997c357e9dp-6, + .c6 = 0x1.c86a22cd9389dp-7, .c7 = 0x1.856073c22ebbep-7, + .c8 = 0x1.fd1151acb6bedp-8, .c9 = 0x1.087182f799c1dp-6, + .c10 = -0x1.6602748120927p-7, .c11 = 0x1.cfa0dd1f9478p-6, + .pi_over_2 = 0x1.921fb54442d18p+0, }; -#define P(i) sv_f64 (d->poly[i]) - /* Double-precision SVE implementation of vector asin(x). For |x| in [0, 0.5], use an order 11 polynomial P such that the final approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2). - The largest observed error in this region is 0.52 ulps, - _ZGVsMxv_asin(0x1.d95ae04998b6cp-2) got 0x1.ec13757305f27p-2 - want 0x1.ec13757305f26p-2. - - For |x| in [0.5, 1.0], use same approximation with a change of variable + The largest observed error in this region is 0.98 ulp: + _ZGVsMxv_asin (0x1.d98f6a748ed8ap-2) got 0x1.ec4eb661a73d3p-2 + want 0x1.ec4eb661a73d2p-2. - asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). + For |x| in [0.5, 1.0], use same approximation with a change of variable: + asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). - The largest observed error in this region is 2.69 ulps, - _ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1 - want 0x1.110d7e85fdd53p-1. */ + The largest observed error in this region is 2.66 ulp: + _ZGVsMxv_asin (0x1.04024f6e2a2fbp-1) got 0x1.10b9586f087a8p-1 + want 0x1.10b9586f087abp-1. */ svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b64 (); svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000); svfloat64_t ax = svabs_x (pg, x); @@ -70,17 +68,37 @@ svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg) svfloat64_t z = svsqrt_m (ax, a_ge_half, z2); /* Use a single polynomial approximation P for both intervals. */ + svfloat64_t z3 = svmul_x (pg, z2, z); svfloat64_t z4 = svmul_x (pg, z2, z2); svfloat64_t z8 = svmul_x (pg, z4, z4); - svfloat64_t z16 = svmul_x (pg, z8, z8); - svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly); + + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + + /* Order-11 Estrin scheme. */ + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + + svfloat64_t p411 = svmla_x (pg, p47, z8, p811); + svfloat64_t p = svmla_x (pg, p03, z8, p411); + /* Finalize polynomial: z + z * z2 * P(z2). */ - p = svmla_x (pg, z, svmul_x (pg, z, z2), p); + p = svmla_x (pg, z, z3, p); - /* asin(|x|) = Q(|x|) , for |x| < 0.5 - = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ - svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2f); + /* asin(|x|) = Q(|x|), for |x| < 0.5 + = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ + svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2); - /* Copy sign. */ + /* Reinsert the sign from the argument. */ return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)); } diff --git a/sysdeps/aarch64/fpu/asinf_advsimd.c b/sysdeps/aarch64/fpu/asinf_advsimd.c index 59d870ca36..88437f85ab 100644 --- a/sysdeps/aarch64/fpu/asinf_advsimd.c +++ b/sysdeps/aarch64/fpu/asinf_advsimd.c @@ -18,22 +18,21 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" static const struct data { - float32x4_t poly[5]; + float32x4_t c0, c2, c4; + float c1, c3; float32x4_t pi_over_2f; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */ - .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5), - V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) }, - .pi_over_2f = V4 (0x1.921fb6p+0f), + .c0 = V4 (0x1.55555ep-3f), .c1 = 0x1.33261ap-4f, + .c2 = V4 (0x1.70d7dcp-5f), .c3 = 0x1.b059dp-6f, + .c4 = V4 (0x1.3af7d8p-5f), .pi_over_2f = V4 (0x1.921fb6p+0f), }; #define AbsMask 0x7fffffff -#define Half 0x3f000000 #define One 0x3f800000 #define Small 0x39800000 /* 2^-12. */ @@ -47,11 +46,8 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special) /* Single-precision implementation of vector asin(x). - For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct - rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the - following approximation. - For |x| in [Small, 0.5], use order 4 polynomial P such that the final + For |x| <0.5, use order 4 polynomial P such that the final approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2). The largest observed error in this region is 0.83 ulps, @@ -80,24 +76,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asin) (float32x4_t x) #endif float32x4_t ax = vreinterpretq_f32_u32 (ia); - uint32x4_t a_lt_half = vcltq_u32 (ia, v_u32 (Half)); + uint32x4_t a_lt_half = vcaltq_f32 (x, v_f32 (0.5f)); /* Evaluate polynomial Q(x) = y + y * z * P(z) with z = x ^ 2 and y = |x| , if |x| < 0.5 z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */ float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x), - vfmsq_n_f32 (v_f32 (0.5), ax, 0.5)); + vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f)); float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2)); /* Use a single polynomial approximation P for both intervals. */ - float32x4_t p = v_horner_4_f32 (z2, d->poly); + + /* PW Horner 3 evaluation scheme. */ + float32x4_t z4 = vmulq_f32 (z2, z2); + float32x4_t c13 = vld1q_f32 (&d->c1); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c13, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c13, 1); + float32x4_t p = vfmaq_f32 (p23, d->c4, z4); + p = vfmaq_f32 (p01, p, z4); /* Finalize polynomial: z + z * z2 * P(z2). */ p = vfmaq_f32 (z, vmulq_f32 (z, z2), p); /* asin(|x|) = Q(|x|) , for |x| < 0.5 = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ float32x4_t y - = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0)); + = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0f)); /* Copy sign. */ return vbslq_f32 (v_u32 (AbsMask), y, x); diff --git a/sysdeps/aarch64/fpu/asinh_advsimd.c b/sysdeps/aarch64/fpu/asinh_advsimd.c index 6207e7da95..2739f98b39 100644 --- a/sysdeps/aarch64/fpu/asinh_advsimd.c +++ b/sysdeps/aarch64/fpu/asinh_advsimd.c @@ -20,41 +20,71 @@ #include "v_math.h" #include "poly_advsimd_f64.h" -#define A(i) v_f64 (__v_log_data.poly[i]) -#define N (1 << V_LOG_TABLE_BITS) -#define IndexMask (N - 1) - const static struct data { - float64x2_t poly[18]; - uint64x2_t off, huge_bound, abs_mask; - float64x2_t ln2, tiny_bound; + uint64x2_t huge_bound, abs_mask, off, mask; +#if WANT_SIMD_EXCEPT + float64x2_t tiny_bound; +#endif + float64x2_t lc0, lc2; + double lc1, lc3, ln2, lc4; + + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c17; + double c1, c3, c5, c7, c9, c11, c13, c15; + } data = { - .off = V2 (0x3fe6900900000000), - .ln2 = V2 (0x1.62e42fefa39efp-1), - .huge_bound = V2 (0x5fe0000000000000), + +#if WANT_SIMD_EXCEPT .tiny_bound = V2 (0x1p-26), - .abs_mask = V2 (0x7fffffffffffffff), +#endif /* Even terms of polynomial s.t. asinh(x) is approximated by asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...). Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2). */ - .poly = { V2 (-0x1.55555555554a7p-3), V2 (0x1.3333333326c7p-4), - V2 (-0x1.6db6db68332e6p-5), V2 (0x1.f1c71b26fb40dp-6), - V2 (-0x1.6e8b8b654a621p-6), V2 (0x1.1c4daa9e67871p-6), - V2 (-0x1.c9871d10885afp-7), V2 (0x1.7a16e8d9d2ecfp-7), - V2 (-0x1.3ddca533e9f54p-7), V2 (0x1.0becef748dafcp-7), - V2 (-0x1.b90c7099dd397p-8), V2 (0x1.541f2bb1ffe51p-8), - V2 (-0x1.d217026a669ecp-9), V2 (0x1.0b5c7977aaf7p-9), - V2 (-0x1.e0f37daef9127p-11), V2 (0x1.388b5fe542a6p-12), - V2 (-0x1.021a48685e287p-14), V2 (0x1.93d4ba83d34dap-18) }, + + .c0 = V2 (-0x1.55555555554a7p-3), + .c1 = 0x1.3333333326c7p-4, + .c2 = V2 (-0x1.6db6db68332e6p-5), + .c3 = 0x1.f1c71b26fb40dp-6, + .c4 = V2 (-0x1.6e8b8b654a621p-6), + .c5 = 0x1.1c4daa9e67871p-6, + .c6 = V2 (-0x1.c9871d10885afp-7), + .c7 = 0x1.7a16e8d9d2ecfp-7, + .c8 = V2 (-0x1.3ddca533e9f54p-7), + .c9 = 0x1.0becef748dafcp-7, + .c10 = V2 (-0x1.b90c7099dd397p-8), + .c11 = 0x1.541f2bb1ffe51p-8, + .c12 = V2 (-0x1.d217026a669ecp-9), + .c13 = 0x1.0b5c7977aaf7p-9, + .c14 = V2 (-0x1.e0f37daef9127p-11), + .c15 = 0x1.388b5fe542a6p-12, + .c16 = V2 (-0x1.021a48685e287p-14), + .c17 = V2 (0x1.93d4ba83d34dap-18), + + .lc0 = V2 (-0x1.ffffffffffff7p-2), + .lc1 = 0x1.55555555170d4p-2, + .lc2 = V2 (-0x1.0000000399c27p-2), + .lc3 = 0x1.999b2e90e94cap-3, + .lc4 = -0x1.554e550bd501ep-3, + .ln2 = 0x1.62e42fefa39efp-1, + + .off = V2 (0x3fe6900900000000), + .huge_bound = V2 (0x5fe0000000000000), + .abs_mask = V2 (0x7fffffffffffffff), + .mask = V2 (0xfffULL << 52), }; static float64x2_t NOINLINE VPCS_ATTR -special_case (float64x2_t x, float64x2_t y, uint64x2_t special) +special_case (float64x2_t x, float64x2_t y, uint64x2_t abs_mask, + uint64x2_t special) { + /* Copy sign. */ + y = vbslq_f64 (abs_mask, y, x); return v_call_f64 (asinh, x, y, special); } +#define N (1 << V_LOG_TABLE_BITS) +#define IndexMask (N - 1) + struct entry { float64x2_t invc; @@ -76,27 +106,34 @@ lookup (uint64x2_t i) } static inline float64x2_t -log_inline (float64x2_t x, const struct data *d) +log_inline (float64x2_t xm, const struct data *d) { - /* Double-precision vector log, copied from ordinary vector log with some - cosmetic modification and special-cases removed. */ - uint64x2_t ix = vreinterpretq_u64_f64 (x); - uint64x2_t tmp = vsubq_u64 (ix, d->off); - int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); - uint64x2_t iz - = vsubq_u64 (ix, vandq_u64 (tmp, vdupq_n_u64 (0xfffULL << 52))); + + uint64x2_t u = vreinterpretq_u64_f64 (xm); + uint64x2_t u_off = vsubq_u64 (u, d->off); + + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52); + uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->mask)); float64x2_t z = vreinterpretq_f64_u64 (iz); - struct entry e = lookup (tmp); + + struct entry e = lookup (u_off); + + /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); float64x2_t kd = vcvtq_f64_s64 (k); - float64x2_t hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2); + + /* hi = r + log(c) + k*Ln2. */ + float64x2_t ln2_and_lc4 = vld1q_f64 (&d->ln2); + float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_lc4, 0); + + /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ + float64x2_t odd_coeffs = vld1q_f64 (&d->lc1); float64x2_t r2 = vmulq_f64 (r, r); - float64x2_t y = vfmaq_f64 (A (2), A (3), r); - float64x2_t p = vfmaq_f64 (A (0), A (1), r); - y = vfmaq_f64 (y, A (4), r2); - y = vfmaq_f64 (p, y, r2); - y = vfmaq_f64 (hi, y, r2); - return y; + float64x2_t y = vfmaq_laneq_f64 (d->lc2, r, odd_coeffs, 1); + float64x2_t p = vfmaq_laneq_f64 (d->lc0, r, odd_coeffs, 0); + y = vfmaq_laneq_f64 (y, r2, ln2_and_lc4, 1); + y = vfmaq_f64 (p, r2, y); + return vfmaq_f64 (hi, y, r2); } /* Double-precision implementation of vector asinh(x). @@ -106,23 +143,24 @@ log_inline (float64x2_t x, const struct data *d) asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1 = sign(x) * (|x| + |x|^3 * P(x^2)) otherwise where log(x) is an optimized log approximation, and P(x) is a polynomial - shared with the scalar routine. The greatest observed error 3.29 ULP, in + shared with the scalar routine. The greatest observed error 2.79 ULP, in |x| >= 1: - __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1 - want 0x1.ffffcfd0e2352p-1. */ + _ZGVnN2v_asinh(0x1.2cd9d73ea76a6p+0) got 0x1.ffffd003219dap-1 + want 0x1.ffffd003219ddp-1. */ VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x) { const struct data *d = ptr_barrier (&data); - float64x2_t ax = vabsq_f64 (x); - uint64x2_t iax = vreinterpretq_u64_f64 (ax); uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1)); - uint64x2_t special = vcgeq_u64 (iax, d->huge_bound); #if WANT_SIMD_EXCEPT + uint64x2_t iax = vreinterpretq_u64_f64 (ax); + uint64x2_t special = vcgeq_u64 (iax, (d->huge_bound)); uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound); special = vorrq_u64 (special, tiny); +#else + uint64x2_t special = vcgeq_f64 (ax, vreinterpretq_f64_u64 (d->huge_bound)); #endif /* Option 1: |x| >= 1. @@ -147,19 +185,45 @@ VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x) overflow, and tiny lanes, which will underflow, by setting them to 0. They will be fixed later, either by selecting x or falling back to the scalar special-case. The largest observed error in this region is 1.47 ULPs: - __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1 - want 0x1.c1d6bf874019cp-1. */ + _ZGVnN2v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1 + want 0x1.c1d6bf874019cp-1. */ float64x2_t option_2 = v_f64 (0); + if (__glibc_likely (v_any_u64 (vceqzq_u64 (gt1)))) { + #if WANT_SIMD_EXCEPT ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1)); #endif - float64x2_t x2 = vmulq_f64 (ax, ax), x3 = vmulq_f64 (ax, x2), - z2 = vmulq_f64 (x2, x2), z4 = vmulq_f64 (z2, z2), - z8 = vmulq_f64 (z4, z4), z16 = vmulq_f64 (z8, z8); - float64x2_t p = v_estrin_17_f64 (x2, z2, z4, z8, z16, d->poly); - option_2 = vfmaq_f64 (ax, p, x3); + float64x2_t x2 = vmulq_f64 (ax, ax), z2 = vmulq_f64 (x2, x2); + /* Order-17 Pairwise Horner scheme. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + float64x2_t c1315 = vld1q_f64 (&d->c13); + + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, x2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, x2, c13, 1); + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, x2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, x2, c57, 1); + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, x2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, x2, c911, 1); + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, x2, c1315, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, x2, c1315, 1); + float64x2_t p1617 = vfmaq_f64 (d->c16, x2, d->c17); + + float64x2_t p = vfmaq_f64 (p1415, z2, p1617); + p = vfmaq_f64 (p1213, z2, p); + p = vfmaq_f64 (p1011, z2, p); + p = vfmaq_f64 (p89, z2, p); + + p = vfmaq_f64 (p67, z2, p); + p = vfmaq_f64 (p45, z2, p); + + p = vfmaq_f64 (p23, z2, p); + + p = vfmaq_f64 (p01, z2, p); + option_2 = vfmaq_f64 (ax, p, vmulq_f64 (ax, x2)); #if WANT_SIMD_EXCEPT option_2 = vbslq_f64 (tiny, x, option_2); #endif @@ -167,10 +231,10 @@ VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x) /* Choose the right option for each lane. */ float64x2_t y = vbslq_f64 (gt1, option_1, option_2); - /* Copy sign. */ - y = vbslq_f64 (d->abs_mask, y, x); - if (__glibc_unlikely (v_any_u64 (special))) - return special_case (x, y, special); - return y; + { + return special_case (x, y, d->abs_mask, special); + } + /* Copy sign. */ + return vbslq_f64 (d->abs_mask, y, x); } diff --git a/sysdeps/aarch64/fpu/asinh_sve.c b/sysdeps/aarch64/fpu/asinh_sve.c index 28dc5c4587..fe8715e06c 100644 --- a/sysdeps/aarch64/fpu/asinh_sve.c +++ b/sysdeps/aarch64/fpu/asinh_sve.c @@ -18,36 +18,49 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" #define SignMask (0x8000000000000000) #define One (0x3ff0000000000000) #define Thres (0x5fe0000000000000) /* asuint64 (0x1p511). */ +#define IndexMask (((1 << V_LOG_TABLE_BITS) - 1) << 1) static const struct data { - double poly[18]; - double ln2, p3, p1, p4, p0, p2; - uint64_t n; - uint64_t off; + double even_coeffs[9]; + double ln2, p3, p1, p4, p0, p2, c1, c3, c5, c7, c9, c11, c13, c15, c17; + uint64_t off, mask; } data = { - /* Polynomial generated using Remez on [2^-26, 1]. */ - .poly - = { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5, - 0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6, - -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7, - 0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8, - -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11, - 0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18 }, + /* Polynomial generated using Remez on [2^-26, 1]. */ + .even_coeffs ={ + -0x1.55555555554a7p-3, + -0x1.6db6db68332e6p-5, + -0x1.6e8b8b654a621p-6, + -0x1.c9871d10885afp-7, + -0x1.3ddca533e9f54p-7, + -0x1.b90c7099dd397p-8, + -0x1.d217026a669ecp-9, + -0x1.e0f37daef9127p-11, + -0x1.021a48685e287p-14, }, + + .c1 = 0x1.3333333326c7p-4, + .c3 = 0x1.f1c71b26fb40dp-6, + .c5 = 0x1.1c4daa9e67871p-6, + .c7 = 0x1.7a16e8d9d2ecfp-7, + .c9 = 0x1.0becef748dafcp-7, + .c11 = 0x1.541f2bb1ffe51p-8, + .c13 = 0x1.0b5c7977aaf7p-9, + .c15 = 0x1.388b5fe542a6p-12, + .c17 = 0x1.93d4ba83d34dap-18, + .ln2 = 0x1.62e42fefa39efp-1, .p0 = -0x1.ffffffffffff7p-2, .p1 = 0x1.55555555170d4p-2, .p2 = -0x1.0000000399c27p-2, .p3 = 0x1.999b2e90e94cap-3, .p4 = -0x1.554e550bd501ep-3, - .n = 1 << V_LOG_TABLE_BITS, - .off = 0x3fe6900900000000 + .off = 0x3fe6900900000000, + .mask = 0xfffULL << 52, }; static svfloat64_t NOINLINE @@ -64,11 +77,10 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg) of the algorithm used. */ svuint64_t ix = svreinterpret_u64 (x); - svuint64_t tmp = svsub_x (pg, ix, d->off); - svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), - (d->n - 1) << 1); - svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52); - svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)); + svuint64_t i_off = svsub_x (pg, ix, d->off); + svuint64_t i + = svand_x (pg, svlsr_x (pg, i_off, (51 - V_LOG_TABLE_BITS)), IndexMask); + svuint64_t iz = svsub_x (pg, ix, svand_x (pg, i_off, d->mask)); svfloat64_t z = svreinterpret_f64 (iz); svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i); @@ -78,14 +90,14 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg) svfloat64_t p1_p4 = svld1rq (svptrue_b64 (), &d->p1); svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z); - svfloat64_t kd = svcvt_f64_x (pg, k); + svfloat64_t kd + = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (i_off), 52)); svfloat64_t hi = svmla_lane (svadd_x (pg, logc, r), kd, ln2_p3, 0); - svfloat64_t r2 = svmul_x (pg, r, r); - + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); svfloat64_t y = svmla_lane (sv_f64 (d->p2), r, ln2_p3, 1); - svfloat64_t p = svmla_lane (sv_f64 (d->p0), r, p1_p4, 0); + y = svmla_lane (y, r2, p1_p4, 1); y = svmla_x (pg, p, r2, y); y = svmla_x (pg, hi, r2, y); @@ -111,7 +123,6 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) svuint64_t iax = svbic_x (pg, ix, SignMask); svuint64_t sign = svand_x (pg, ix, SignMask); svfloat64_t ax = svreinterpret_f64 (iax); - svbool_t ge1 = svcmpge (pg, iax, One); svbool_t special = svcmpge (pg, iax, Thres); @@ -120,7 +131,7 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) svfloat64_t option_1 = sv_f64 (0); if (__glibc_likely (svptest_any (pg, ge1))) { - svfloat64_t x2 = svmul_x (pg, ax, ax); + svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax); option_1 = __sv_log_inline ( svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, x2, 1))), d, pg); } @@ -130,21 +141,53 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) The largest observed error in this region is 1.51 ULPs: _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1 want 0x1.c1e649ee2681dp-1. */ + svfloat64_t option_2 = sv_f64 (0); if (__glibc_likely (svptest_any (pg, svnot_z (pg, ge1)))) { - svfloat64_t x2 = svmul_x (pg, ax, ax); - svfloat64_t x4 = svmul_x (pg, x2, x2); - svfloat64_t p = sv_pw_horner_17_f64_x (pg, x2, x4, d->poly); - option_2 = svmla_x (pg, ax, p, svmul_x (pg, x2, ax)); + svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax); + svfloat64_t x4 = svmul_x (svptrue_b64 (), x2, x2); + /* Order-17 Pairwise Horner scheme. */ + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5); + svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9); + svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13); + + svfloat64_t p01 = svmla_lane (sv_f64 (d->even_coeffs[0]), x2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->even_coeffs[1]), x2, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->even_coeffs[2]), x2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->even_coeffs[3]), x2, c57, 1); + svfloat64_t p89 = svmla_lane (sv_f64 (d->even_coeffs[4]), x2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->even_coeffs[5]), x2, c911, 1); + svfloat64_t p1213 + = svmla_lane (sv_f64 (d->even_coeffs[6]), x2, c1315, 0); + svfloat64_t p1415 + = svmla_lane (sv_f64 (d->even_coeffs[7]), x2, c1315, 1); + svfloat64_t p1617 = svmla_x (pg, sv_f64 (d->even_coeffs[8]), x2, d->c17); + + svfloat64_t p = svmla_x (pg, p1415, x4, p1617); + p = svmla_x (pg, p1213, x4, p); + p = svmla_x (pg, p1011, x4, p); + p = svmla_x (pg, p89, x4, p); + + p = svmla_x (pg, p67, x4, p); + p = svmla_x (pg, p45, x4, p); + + p = svmla_x (pg, p23, x4, p); + + p = svmla_x (pg, p01, x4, p); + + option_2 = svmla_x (pg, ax, p, svmul_x (svptrue_b64 (), x2, ax)); } - /* Choose the right option for each lane. */ - svfloat64_t y = svsel (ge1, option_1, option_2); - if (__glibc_unlikely (svptest_any (pg, special))) return special_case ( - x, svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)), + x, + svreinterpret_f64 (sveor_x ( + pg, svreinterpret_u64 (svsel (ge1, option_1, option_2)), sign)), special); + + /* Choose the right option for each lane. */ + svfloat64_t y = svsel (ge1, option_1, option_2); return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)); } diff --git a/sysdeps/aarch64/fpu/asinhf_advsimd.c b/sysdeps/aarch64/fpu/asinhf_advsimd.c index 09fd8a6143..eb789b91b6 100644 --- a/sysdeps/aarch64/fpu/asinhf_advsimd.c +++ b/sysdeps/aarch64/fpu/asinhf_advsimd.c @@ -20,16 +20,16 @@ #include "v_math.h" #include "v_log1pf_inline.h" -#define SignMask v_u32 (0x80000000) - const static struct data { struct v_log1pf_data log1pf_consts; + float32x4_t one; uint32x4_t big_bound; #if WANT_SIMD_EXCEPT uint32x4_t tiny_bound; #endif } data = { + .one = V4 (1), .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .big_bound = V4 (0x5f800000), /* asuint(0x1p64). */ #if WANT_SIMD_EXCEPT @@ -38,20 +38,27 @@ const static struct data }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t sign, float32x4_t y, + uint32x4_t special, const struct data *d) { - return v_call_f32 (asinhf, x, y, special); + return v_call_f32 ( + asinhf, x, + vreinterpretq_f32_u32 (veorq_u32 ( + sign, vreinterpretq_u32_f32 (log1pf_inline (y, &d->log1pf_consts)))), + special); } /* Single-precision implementation of vector asinh(x), using vector log1p. - Worst-case error is 2.66 ULP, at roughly +/-0.25: - __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */ + Worst-case error is 2.59 ULP: + _ZGVnN4v_asinhf(0x1.d86124p-3) got 0x1.d449bep-3 + want 0x1.d449c4p-3. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x) { const struct data *dat = ptr_barrier (&data); - uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask); - float32x4_t ax = vreinterpretq_f32_u32 (iax); + float32x4_t ax = vabsq_f32 (x); + uint32x4_t iax = vreinterpretq_u32_f32 (ax); uint32x4_t special = vcgeq_u32 (iax, dat->big_bound); + uint32x4_t sign = veorq_u32 (vreinterpretq_u32_f32 (x), iax); float32x4_t special_arg = x; #if WANT_SIMD_EXCEPT @@ -68,13 +75,13 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x) /* asinh(x) = log(x + sqrt(x * x + 1)). For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */ float32x4_t d - = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x))); - float32x4_t y = log1pf_inline ( - vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts); + = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (dat->one, ax, ax))); + float32x4_t y = vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)); if (__glibc_unlikely (v_any_u32 (special))) - return special_case (special_arg, vbslq_f32 (SignMask, x, y), special); - return vbslq_f32 (SignMask, x, y); + return special_case (special_arg, sign, y, special, dat); + return vreinterpretq_f32_u32 (veorq_u32 ( + sign, vreinterpretq_u32_f32 (log1pf_inline (y, &dat->log1pf_consts)))); } libmvec_hidden_def (V_NAME_F1 (asinh)) HALF_WIDTH_ALIAS_F1 (asinh) diff --git a/sysdeps/aarch64/fpu/asinhf_sve.c b/sysdeps/aarch64/fpu/asinhf_sve.c index d85c3a685c..b7f253bf32 100644 --- a/sysdeps/aarch64/fpu/asinhf_sve.c +++ b/sysdeps/aarch64/fpu/asinhf_sve.c @@ -20,20 +20,23 @@ #include "sv_math.h" #include "sv_log1pf_inline.h" -#define BigBound (0x5f800000) /* asuint(0x1p64). */ +#define BigBound 0x5f800000 /* asuint(0x1p64). */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special) { + svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign)); + y = svreinterpret_f32 ( + svorr_x (svptrue_b32 (), sign, svreinterpret_u32 (y))); return sv_call_f32 (asinhf, x, y, special); } /* Single-precision SVE asinh(x) routine. Implements the same algorithm as vector asinhf and log1p. - Maximum error is 2.48 ULPs: - SV_NAME_F1 (asinh) (0x1.008864p-3) got 0x1.ffbbbcp-4 - want 0x1.ffbbb8p-4. */ + Maximum error is 1.92 ULPs: + SV_NAME_F1 (asinh) (-0x1.0922ecp-1) got -0x1.fd0bccp-2 + want -0x1.fd0bc8p-2. */ svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg) { svfloat32_t ax = svabs_x (pg, x); @@ -49,8 +52,6 @@ svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg) = sv_log1pf_inline (svadd_x (pg, ax, svdiv_x (pg, ax2, d)), pg); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case ( - x, svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))), - special); + return special_case (iax, sign, y, special); return svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))); } diff --git a/sysdeps/aarch64/fpu/atan2_advsimd.c b/sysdeps/aarch64/fpu/atan2_advsimd.c index b1e7a9b8fc..1e5d20602e 100644 --- a/sysdeps/aarch64/fpu/atan2_advsimd.c +++ b/sysdeps/aarch64/fpu/atan2_advsimd.c @@ -19,59 +19,74 @@ #include "math_config.h" #include "v_math.h" -#include "poly_advsimd_f64.h" static const struct data { + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; float64x2_t pi_over_2; - float64x2_t poly[20]; + uint64x2_t zeroinfnan; } data = { - /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on - the interval [2**-1022, 1.0]. */ - .poly = { V2 (-0x1.5555555555555p-2), V2 (0x1.99999999996c1p-3), - V2 (-0x1.2492492478f88p-3), V2 (0x1.c71c71bc3951cp-4), - V2 (-0x1.745d160a7e368p-4), V2 (0x1.3b139b6a88ba1p-4), - V2 (-0x1.11100ee084227p-4), V2 (0x1.e1d0f9696f63bp-5), - V2 (-0x1.aebfe7b418581p-5), V2 (0x1.842dbe9b0d916p-5), - V2 (-0x1.5d30140ae5e99p-5), V2 (0x1.338e31eb2fbbcp-5), - V2 (-0x1.00e6eece7de8p-5), V2 (0x1.860897b29e5efp-6), - V2 (-0x1.0051381722a59p-6), V2 (0x1.14e9dc19a4a4ep-7), - V2 (-0x1.d0062b42fe3bfp-9), V2 (0x1.17739e210171ap-10), - V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), }, + /* Coefficients of polynomial P such that + atan(x)~x+x*P(x^2) on [2^-1022, 1.0]. */ + .c0 = V2 (-0x1.555555555552ap-2), + .c1 = 0x1.9999999995aebp-3, + .c2 = V2 (-0x1.24924923923f6p-3), + .c3 = 0x1.c71c7184288a2p-4, + .c4 = V2 (-0x1.745d11fb3d32bp-4), + .c5 = 0x1.3b136a18051b9p-4, + .c6 = V2 (-0x1.110e6d985f496p-4), + .c7 = 0x1.e1bcf7f08801dp-5, + .c8 = V2 (-0x1.ae644e28058c3p-5), + .c9 = 0x1.82eeb1fed85c6p-5, + .c10 = V2 (-0x1.59d7f901566cbp-5), + .c11 = 0x1.2c982855ab069p-5, + .c12 = V2 (-0x1.eb49592998177p-6), + .c13 = 0x1.69d8b396e3d38p-6, + .c14 = V2 (-0x1.ca980345c4204p-7), + .c15 = 0x1.dc050eafde0b3p-8, + .c16 = V2 (-0x1.7ea70755b8eccp-9), + .c17 = 0x1.ba3da3de903e8p-11, + .c18 = V2 (-0x1.44a4b059b6f67p-13), + .c19 = 0x1.c4a45029e5a91p-17, .pi_over_2 = V2 (0x1.921fb54442d18p+0), + .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1), }; #define SignMask v_u64 (0x8000000000000000) /* Special cases i.e. 0, infinity, NaN (fall back to scalar calls). */ static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t y, float64x2_t x, float64x2_t ret, uint64x2_t cmp) +special_case (float64x2_t y, float64x2_t x, float64x2_t ret, + uint64x2_t sign_xy, uint64x2_t cmp) { + /* Account for the sign of x and y. */ + ret = vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); return v_call2_f64 (atan2, y, x, ret, cmp); } /* Returns 1 if input is the bit representation of 0, infinity or nan. */ static inline uint64x2_t -zeroinfnan (uint64x2_t i) +zeroinfnan (uint64x2_t i, const struct data *d) { /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1). */ - return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), - v_u64 (2 * asuint64 (INFINITY) - 1)); + return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), d->zeroinfnan); } /* Fast implementation of vector atan2. - Maximum observed error is 2.8 ulps: - _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5) - got 0x1.92d628ab678ccp-1 - want 0x1.92d628ab678cfp-1. */ + Maximum observed error is 1.97 ulps: + _ZGVnN2vv_atan2 (0x1.42337dba73768p+5, 0x1.422d748cd3e29p+5) + got 0x1.9224810264efcp-1 want 0x1.9224810264efep-1. */ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) { - const struct data *data_ptr = ptr_barrier (&data); + const struct data *d = ptr_barrier (&data); uint64x2_t ix = vreinterpretq_u64_f64 (x); uint64x2_t iy = vreinterpretq_u64_f64 (y); - uint64x2_t special_cases = vorrq_u64 (zeroinfnan (ix), zeroinfnan (iy)); + uint64x2_t special_cases + = vorrq_u64 (zeroinfnan (ix, d), zeroinfnan (iy, d)); uint64x2_t sign_x = vandq_u64 (ix, SignMask); uint64x2_t sign_y = vandq_u64 (iy, SignMask); @@ -81,42 +96,75 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) float64x2_t ay = vabsq_f64 (y); uint64x2_t pred_xlt0 = vcltzq_f64 (x); - uint64x2_t pred_aygtax = vcgtq_f64 (ay, ax); - - /* Set up z for call to atan. */ - float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); - float64x2_t d = vbslq_f64 (pred_aygtax, ay, ax); - float64x2_t z = vdivq_f64 (n, d); - - /* Work out the correct shift. */ + uint64x2_t pred_aygtax = vcagtq_f64 (y, x); + + /* Set up z for evaluation of atan. */ + float64x2_t num = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); + float64x2_t den = vbslq_f64 (pred_aygtax, ay, ax); + float64x2_t z = vdivq_f64 (num, den); + + /* Work out the correct shift for atan2: + Multiplication by pi is done later. + -pi when x < 0 and ax < ay + -pi/2 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + pi/2 when x >= 0 and ax > ay. */ float64x2_t shift = vreinterpretq_f64_u64 ( vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0)))); - shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift); - shift = vmulq_f64 (shift, data_ptr->pi_over_2); - - /* Calculate the polynomial approximation. - Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of - full scheme to avoid underflow in x^16. - The order 19 polynomial P approximates - (atan(sqrt(x))-sqrt(x))/x^(3/2). */ + float64x2_t shift2 = vreinterpretq_f64_u64 ( + vandq_u64 (pred_aygtax, vreinterpretq_u64_f64 (v_f64 (1.0)))); + shift = vaddq_f64 (shift, shift2); + + /* Calculate the polynomial approximation. */ float64x2_t z2 = vmulq_f64 (z, z); - float64x2_t x2 = vmulq_f64 (z2, z2); - float64x2_t x4 = vmulq_f64 (x2, x2); - float64x2_t x8 = vmulq_f64 (x4, x4); - float64x2_t ret - = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, data_ptr->poly), - v_estrin_11_f64 (z2, x2, x4, x8, data_ptr->poly + 8), x8); + float64x2_t z3 = vmulq_f64 (z2, z); + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + float64x2_t z16 = vmulq_f64 (z8, z8); - /* Finalize. y = shift + z + z^3 * P(z^2). */ - ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z)); - ret = vaddq_f64 (ret, shift); + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + float64x2_t c1315 = vld1q_f64 (&d->c13); + float64x2_t c1719 = vld1q_f64 (&d->c17); - /* Account for the sign of x and y. */ - ret = vreinterpretq_f64_u64 ( - veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); + /* Order-7 Estrin. */ + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + + float64x2_t p07 = vfmaq_f64 (p03, z8, p47); + + /* Order-11 Estrin. */ + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1); + float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415); + + float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0); + float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1); + float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819); + + float64x2_t p815 = vfmaq_f64 (p811, z8, p1215); + float64x2_t p819 = vfmaq_f64 (p815, z16, p1619); + + float64x2_t poly = vfmaq_f64 (p07, p819, z16); + + /* Finalize. y = shift + z + z^3 * P(z^2). */ + float64x2_t ret = vfmaq_f64 (z, shift, d->pi_over_2); + ret = vfmaq_f64 (ret, z3, poly); if (__glibc_unlikely (v_any_u64 (special_cases))) - return special_case (y, x, ret, special_cases); + return special_case (y, x, ret, sign_xy, special_cases); - return ret; + /* Account for the sign of x and y. */ + return vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); } diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c index ed9f683436..0337ea57b7 100644 --- a/sysdeps/aarch64/fpu/atan2_sve.c +++ b/sysdeps/aarch64/fpu/atan2_sve.c @@ -19,25 +19,25 @@ #include "math_config.h" #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[20]; - float64_t pi_over_2; + float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; + float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-1022, 1.0]. */ - .poly = { -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3, - 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, - -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5, - 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, - -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6, - 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, - -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, }, - .pi_over_2 = 0x1.921fb54442d18p+0, + .c0 = -0x1.555555555552ap-2, .c1 = 0x1.9999999995aebp-3, + .c2 = -0x1.24924923923f6p-3, .c3 = 0x1.c71c7184288a2p-4, + .c4 = -0x1.745d11fb3d32bp-4, .c5 = 0x1.3b136a18051b9p-4, + .c6 = -0x1.110e6d985f496p-4, .c7 = 0x1.e1bcf7f08801dp-5, + .c8 = -0x1.ae644e28058c3p-5, .c9 = 0x1.82eeb1fed85c6p-5, + .c10 = -0x1.59d7f901566cbp-5, .c11 = 0x1.2c982855ab069p-5, + .c12 = -0x1.eb49592998177p-6, .c13 = 0x1.69d8b396e3d38p-6, + .c14 = -0x1.ca980345c4204p-7, .c15 = 0x1.dc050eafde0b3p-8, + .c16 = -0x1.7ea70755b8eccp-9, .c17 = 0x1.ba3da3de903e8p-11, + .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17, }; - /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ static svfloat64_t NOINLINE special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret, @@ -56,15 +56,17 @@ zeroinfnan (svuint64_t i, const svbool_t pg) } /* Fast implementation of SVE atan2. Errors are greatest when y and - x are reasonably close together. The greatest observed error is 2.28 ULP: - _ZGVsMxvv_atan2 (-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732) - got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */ -svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + x are reasonably close together. The greatest observed error is 1.94 ULP: + _ZGVsMxvv_atan2 (0x1.8a4bf7167228ap+5, 0x1.84971226bb57bp+5) + got 0x1.95db19dfef9ccp-1 want 0x1.95db19dfef9cep-1. */ +svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, + const svbool_t pg) { - const struct data *data_ptr = ptr_barrier (&data); + const struct data *d = ptr_barrier (&data); svuint64_t ix = svreinterpret_u64 (x); svuint64_t iy = svreinterpret_u64 (y); + svbool_t ptrue = svptrue_b64 (); svbool_t cmp_x = zeroinfnan (ix, pg); svbool_t cmp_y = zeroinfnan (iy, pg); @@ -81,32 +83,67 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) svbool_t pred_aygtax = svcmpgt (pg, ay, ax); - /* Set up z for call to atan. */ - svfloat64_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay); - svfloat64_t d = svsel (pred_aygtax, ay, ax); - svfloat64_t z = svdiv_x (pg, n, d); - - /* Work out the correct shift. */ + /* Set up z for evaluation of atan. */ + svfloat64_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay); + svfloat64_t den = svsel (pred_aygtax, ay, ax); + svfloat64_t z = svdiv_x (pg, num, den); + + /* Work out the correct shift for atan2: + Multiplication by pi is done later. + -pi when x < 0 and ax < ay + -pi/2 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + pi/2 when x >= 0 and ax > ay. */ svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1)); + svfloat64_t shift_mul = svreinterpret_f64 ( + svorr_x (pg, sign_x, svreinterpret_u64 (sv_f64 (0x1.921fb54442d18p+0)))); shift = svsel (pred_aygtax, sv_f64 (1.0), shift); - shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift))); - shift = svmul_x (pg, shift, data_ptr->pi_over_2); + shift = svmla_x (pg, z, shift, shift_mul); /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ svfloat64_t z2 = svmul_x (pg, z, z); - svfloat64_t x2 = svmul_x (pg, z2, z2); - svfloat64_t x4 = svmul_x (pg, x2, x2); - svfloat64_t x8 = svmul_x (pg, x4, x4); + svfloat64_t z3 = svmul_x (pg, z2, z); + svfloat64_t z4 = svmul_x (pg, z2, z2); + svfloat64_t z8 = svmul_x (pg, z4, z4); + svfloat64_t z16 = svmul_x (pg, z8, z8); - svfloat64_t ret = svmla_x ( - pg, sv_estrin_7_f64_x (pg, z2, x2, x4, data_ptr->poly), - sv_estrin_11_f64_x (pg, z2, x2, x4, x8, data_ptr->poly + 8), x8); + /* Order-7 Estrin. */ + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); - /* y = shift + z + z^3 * P(z^2). */ - svfloat64_t z3 = svmul_x (pg, z2, z); - ret = svmla_x (pg, z, z3, ret); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + svfloat64_t p07 = svmla_x (pg, p03, z8, p47); + + /* Order-11 Estrin. */ + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + svfloat64_t c1315 = svld1rq (ptrue, &d->c13); + svfloat64_t c1719 = svld1rq (ptrue, &d->c17); - ret = svadd_m (pg, ret, shift); + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + + svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0); + svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1); + svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415); + + svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0); + svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1); + svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819); + + svfloat64_t p815 = svmla_x (pg, p811, z8, p1215); + svfloat64_t p819 = svmla_x (pg, p815, z16, p1619); + + svfloat64_t poly = svmla_x (pg, p07, z16, p819); + + /* y = shift + z + z^3 * P(z^2). */ + svfloat64_t ret = svmla_x (pg, shift, z3, poly); /* Account for the sign of x and y. */ if (__glibc_unlikely (svptest_any (pg, cmp_xy))) diff --git a/sysdeps/aarch64/fpu/atan2f_advsimd.c b/sysdeps/aarch64/fpu/atan2f_advsimd.c index 56e610caf1..23a825e63b 100644 --- a/sysdeps/aarch64/fpu/atan2f_advsimd.c +++ b/sysdeps/aarch64/fpu/atan2f_advsimd.c @@ -18,52 +18,58 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" static const struct data { - float32x4_t poly[8]; - float32x4_t pi_over_2; + float32x4_t c0, c4, c6, c2; + float c1, c3, c5, c7; + uint32x4_t comp_const; + float32x4_t pi; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. Generated using fpminimax between FLT_MIN and 1. */ - .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f), - V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f), - V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) }, - .pi_over_2 = V4 (0x1.921fb6p+0f), + .c0 = V4 (-0x1.5554dcp-2), .c1 = 0x1.9978ecp-3, + .c2 = V4 (-0x1.230a94p-3), .c3 = 0x1.b4debp-4, + .c4 = V4 (-0x1.3550dap-4), .c5 = 0x1.61eebp-5, + .c6 = V4 (-0x1.0c17d4p-6), .c7 = 0x1.7ea694p-9, + .pi = V4 (0x1.921fb6p+1f), .comp_const = V4 (2 * 0x7f800000lu - 1), }; #define SignMask v_u32 (0x80000000) /* Special cases i.e. 0, infinity and nan (fall back to scalar calls). */ static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t y, float32x4_t x, float32x4_t ret, uint32x4_t cmp) +special_case (float32x4_t y, float32x4_t x, float32x4_t ret, + uint32x4_t sign_xy, uint32x4_t cmp) { + /* Account for the sign of y. */ + ret = vreinterpretq_f32_u32 ( + veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy)); return v_call2_f32 (atan2f, y, x, ret, cmp); } /* Returns 1 if input is the bit representation of 0, infinity or nan. */ static inline uint32x4_t -zeroinfnan (uint32x4_t i) +zeroinfnan (uint32x4_t i, const struct data *d) { /* 2 * i - 1 >= 2 * 0x7f800000lu - 1. */ - return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), - v_u32 (2 * 0x7f800000lu - 1)); + return vcgeq_u32 (vsubq_u32 (vshlq_n_u32 (i, 1), v_u32 (1)), d->comp_const); } /* Fast implementation of vector atan2f. Maximum observed error is - 2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]: - _ZGVnN4vv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1 - want 0x1.967f00p-1. */ + 2.13 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]: + _ZGVnN4vv_atan2f (0x1.14a9d4p-87, 0x1.0eb886p-87) got 0x1.97aea2p-1 + want 0x1.97ae9ep-1. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) { - const struct data *data_ptr = ptr_barrier (&data); + const struct data *d = ptr_barrier (&data); uint32x4_t ix = vreinterpretq_u32_f32 (x); uint32x4_t iy = vreinterpretq_u32_f32 (y); - uint32x4_t special_cases = vorrq_u32 (zeroinfnan (ix), zeroinfnan (iy)); + uint32x4_t special_cases + = vorrq_u32 (zeroinfnan (ix, d), zeroinfnan (iy, d)); uint32x4_t sign_x = vandq_u32 (ix, SignMask); uint32x4_t sign_y = vandq_u32 (iy, SignMask); @@ -75,44 +81,52 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) uint32x4_t pred_xlt0 = vcltzq_f32 (x); uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax); - /* Set up z for call to atanf. */ - float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); - float32x4_t d = vbslq_f32 (pred_aygtax, ay, ax); - float32x4_t z = vdivq_f32 (n, d); - - /* Work out the correct shift. */ + /* Set up z for evaluation of atanf. */ + float32x4_t num = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); + float32x4_t den = vbslq_f32 (pred_aygtax, ay, ax); + float32x4_t z = vdivq_f32 (num, den); + + /* Work out the correct shift for atan2: + Multiplication by pi is done later. + -pi when x < 0 and ax < ay + -pi/2 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + pi/2 when x >= 0 and ax > ay. */ float32x4_t shift = vreinterpretq_f32_u32 ( - vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f)))); - shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift); - shift = vmulq_f32 (shift, data_ptr->pi_over_2); - - /* Calculate the polynomial approximation. - Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, - a standard implementation using z8 creates spurious underflow - in the very last fma (when z^8 is small enough). - Therefore, we split the last fma into a mul and an fma. - Horner and single-level Estrin have higher errors that exceed - threshold. */ + vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-1.0f)))); + float32x4_t shift2 = vreinterpretq_f32_u32 ( + vandq_u32 (pred_aygtax, vreinterpretq_u32_f32 (v_f32 (0.5f)))); + shift = vaddq_f32 (shift, shift2); + + /* Calculate the polynomial approximation. */ float32x4_t z2 = vmulq_f32 (z, z); + float32x4_t z3 = vmulq_f32 (z2, z); float32x4_t z4 = vmulq_f32 (z2, z2); + float32x4_t z8 = vmulq_f32 (z4, z4); - float32x4_t ret = vfmaq_f32 ( - v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly), z4, - vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly + 4))); + float32x4_t c1357 = vld1q_f32 (&d->c1); - /* y = shift + z * P(z^2). */ - ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c1357, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c1357, 1); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c1357, 2); + float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, c1357, 3); + float32x4_t p03 = vfmaq_f32 (p01, z4, p23); + float32x4_t p47 = vfmaq_f32 (p45, z4, p67); - /* Account for the sign of y. */ - ret = vreinterpretq_f32_u32 ( - veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy)); + float32x4_t poly = vfmaq_f32 (p03, z8, p47); + + /* y = shift + z * P(z^2). */ + float32x4_t ret = vfmaq_f32 (z, shift, d->pi); + ret = vfmaq_f32 (ret, z3, poly); if (__glibc_unlikely (v_any_u32 (special_cases))) { - return special_case (y, x, ret, special_cases); + return special_case (y, x, ret, sign_xy, special_cases); } - return ret; + /* Account for the sign of y. */ + return vreinterpretq_f32_u32 ( + veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy)); } libmvec_hidden_def (V_NAME_F2 (atan2)) HALF_WIDTH_ALIAS_F2(atan2) diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c index 9ea197147c..f7f6d40ca6 100644 --- a/sysdeps/aarch64/fpu/atan2f_sve.c +++ b/sysdeps/aarch64/fpu/atan2f_sve.c @@ -18,18 +18,18 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" static const struct data { - float32_t poly[8]; + float32_t c0, c2, c4, c6; + float32_t c1, c3, c5, c7; float32_t pi_over_2; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. */ - .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, - -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f }, - .pi_over_2 = 0x1.921fb6p+0f, + .c0 = -0x1.5554dcp-2, .c1 = 0x1.9978ecp-3, .c2 = -0x1.230a94p-3, + .c3 = 0x1.b4debp-4, .c4 = -0x1.3550dap-4, .c5 = 0x1.61eebp-5, + .c6 = -0x1.0c17d4p-6, .c7 = 0x1.7ea694p-9, .pi_over_2 = 0x1.921fb6p+0f, }; /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ @@ -51,12 +51,14 @@ zeroinfnan (svuint32_t i, const svbool_t pg) /* Fast implementation of SVE atan2f based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Maximum - observed error is 2.95 ULP: - _ZGVsMxvv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1 - want 0x1.967f00p-1. */ -svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + observed error is 2.21 ULP: + _ZGVnN4vv_atan2f (0x1.a04aa8p+6, 0x1.9a274p+6) got 0x1.95ed3ap-1 + want 0x1.95ed36p-1. */ +svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, + const svbool_t pg) { - const struct data *data_ptr = ptr_barrier (&data); + const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b32 (); svuint32_t ix = svreinterpret_u32 (x); svuint32_t iy = svreinterpret_u32 (y); @@ -76,29 +78,42 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) svbool_t pred_aygtax = svcmpgt (pg, ay, ax); - /* Set up z for call to atan. */ - svfloat32_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay); - svfloat32_t d = svsel (pred_aygtax, ay, ax); - svfloat32_t z = svdiv_x (pg, n, d); - - /* Work out the correct shift. */ + /* Set up z for evaluation of atanf. */ + svfloat32_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay); + svfloat32_t den = svsel (pred_aygtax, ay, ax); + svfloat32_t z = svdiv_x (ptrue, num, den); + + /* Work out the correct shift for atan2: + Multiplication by pi is done later. + -pi when x < 0 and ax < ay + -pi/2 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + pi/2 when x >= 0 and ax > ay. */ svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1)); shift = svsel (pred_aygtax, sv_f32 (1.0), shift); shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift))); - shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2)); /* Use pure Estrin scheme for P(z^2) with deg(P)=7. */ - svfloat32_t z2 = svmul_x (pg, z, z); + svfloat32_t z2 = svmul_x (ptrue, z, z); + svfloat32_t z3 = svmul_x (pg, z2, z); svfloat32_t z4 = svmul_x (pg, z2, z2); svfloat32_t z8 = svmul_x (pg, z4, z4); - svfloat32_t ret = sv_estrin_7_f32_x (pg, z2, z4, z8, data_ptr->poly); + svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1); - /* ret = shift + z + z^3 * P(z^2). */ - svfloat32_t z3 = svmul_x (pg, z2, z); - ret = svmla_x (pg, z, z3, ret); + svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0); + svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1); + svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2); + svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3); - ret = svadd_m (pg, ret, shift); + svfloat32_t p03 = svmla_x (pg, p01, z4, p23); + svfloat32_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat32_t poly = svmla_x (pg, p03, z8, p47); + + /* ret = shift + z + z^3 * P(z^2). */ + svfloat32_t ret = svmla_x (pg, z, shift, sv_f32 (d->pi_over_2)); + ret = svmla_x (pg, ret, z3, poly); /* Account for the sign of x and y. */ diff --git a/sysdeps/aarch64/fpu/atan_advsimd.c b/sysdeps/aarch64/fpu/atan_advsimd.c index a962be0f78..f835dae828 100644 --- a/sysdeps/aarch64/fpu/atan_advsimd.c +++ b/sysdeps/aarch64/fpu/atan_advsimd.c @@ -18,25 +18,25 @@ . */ #include "v_math.h" -#include "poly_advsimd_f64.h" static const struct data { + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; float64x2_t pi_over_2; - float64x2_t poly[20]; + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-1022, 1.0]. */ - .poly = { V2 (-0x1.5555555555555p-2), V2 (0x1.99999999996c1p-3), - V2 (-0x1.2492492478f88p-3), V2 (0x1.c71c71bc3951cp-4), - V2 (-0x1.745d160a7e368p-4), V2 (0x1.3b139b6a88ba1p-4), - V2 (-0x1.11100ee084227p-4), V2 (0x1.e1d0f9696f63bp-5), - V2 (-0x1.aebfe7b418581p-5), V2 (0x1.842dbe9b0d916p-5), - V2 (-0x1.5d30140ae5e99p-5), V2 (0x1.338e31eb2fbbcp-5), - V2 (-0x1.00e6eece7de8p-5), V2 (0x1.860897b29e5efp-6), - V2 (-0x1.0051381722a59p-6), V2 (0x1.14e9dc19a4a4ep-7), - V2 (-0x1.d0062b42fe3bfp-9), V2 (0x1.17739e210171ap-10), - V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), }, + .c0 = V2 (-0x1.555555555552ap-2), .c1 = 0x1.9999999995aebp-3, + .c2 = V2 (-0x1.24924923923f6p-3), .c3 = 0x1.c71c7184288a2p-4, + .c4 = V2 (-0x1.745d11fb3d32bp-4), .c5 = 0x1.3b136a18051b9p-4, + .c6 = V2 (-0x1.110e6d985f496p-4), .c7 = 0x1.e1bcf7f08801dp-5, + .c8 = V2 (-0x1.ae644e28058c3p-5), .c9 = 0x1.82eeb1fed85c6p-5, + .c10 = V2 (-0x1.59d7f901566cbp-5), .c11 = 0x1.2c982855ab069p-5, + .c12 = V2 (-0x1.eb49592998177p-6), .c13 = 0x1.69d8b396e3d38p-6, + .c14 = V2 (-0x1.ca980345c4204p-7), .c15 = 0x1.dc050eafde0b3p-8, + .c16 = V2 (-0x1.7ea70755b8eccp-9), .c17 = 0x1.ba3da3de903e8p-11, + .c18 = V2 (-0x1.44a4b059b6f67p-13), .c19 = 0x1.c4a45029e5a91p-17, .pi_over_2 = V2 (0x1.921fb54442d18p+0), }; @@ -46,12 +46,17 @@ static const struct data /* Fast implementation of vector atan. Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using - z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps: - _ZGVnN2v_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1 - want 0x1.9225645bdd7c3p-1. */ + z=1/x and shift = pi/2. Maximum observed error is 2.45 ulps: + _ZGVnN2v_atan (0x1.0008d737eb3e6p+0) got 0x1.92288c551a4c1p-1 + want 0x1.92288c551a4c3p-1. */ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) { const struct data *d = ptr_barrier (&data); + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + float64x2_t c1315 = vld1q_f64 (&d->c13); + float64x2_t c1719 = vld1q_f64 (&d->c17); /* Small cases, infs and nans are supported by our approximation technique, but do not set fenv flags correctly. Only trigger special case if we need @@ -72,33 +77,53 @@ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) y := arctan(x) for x < 1 y := pi/2 + arctan(-1/x) for x > 1 Hence, use z=-1/a if x>=1, otherwise z=a. */ - uint64x2_t red = vcagtq_f64 (x, v_f64 (1.0)); + uint64x2_t red = vcagtq_f64 (x, v_f64 (-1.0)); /* Avoid dependency in abs(x) in division (and comparison). */ - float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (1.0), x), x); + float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (-1.0), x), x); + float64x2_t shift = vreinterpretq_f64_u64 ( vandq_u64 (red, vreinterpretq_u64_f64 (d->pi_over_2))); - /* Use absolute value only when needed (odd powers of z). */ - float64x2_t az = vbslq_f64 ( - SignMask, vreinterpretq_f64_u64 (vandq_u64 (SignMask, red)), z); - - /* Calculate the polynomial approximation. - Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of - full scheme to avoid underflow in x^16. - The order 19 polynomial P approximates - (atan(sqrt(x))-sqrt(x))/x^(3/2). */ + + /* Reinsert sign bit from argument into the shift value. */ + shift = vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (shift), sign)); + + /* Calculate polynomial approximation P(z^2) with deg(P)=19. */ float64x2_t z2 = vmulq_f64 (z, z); - float64x2_t x2 = vmulq_f64 (z2, z2); - float64x2_t x4 = vmulq_f64 (x2, x2); - float64x2_t x8 = vmulq_f64 (x4, x4); - float64x2_t y - = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, d->poly), - v_estrin_11_f64 (z2, x2, x4, x8, d->poly + 8), x8); + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + float64x2_t z16 = vmulq_f64 (z8, z8); - /* Finalize. y = shift + z + z^3 * P(z^2). */ - y = vfmaq_f64 (az, y, vmulq_f64 (z2, az)); - y = vaddq_f64 (y, shift); + /* Order-7 Estrin. */ + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + + float64x2_t p07 = vfmaq_f64 (p03, z8, p47); - /* y = atan(x) if x>0, -atan(-x) otherwise. */ - y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), sign)); - return y; + /* Order-11 Estrin. */ + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1); + float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415); + + float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0); + float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1); + float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819); + + float64x2_t p815 = vfmaq_f64 (p811, z8, p1215); + float64x2_t p819 = vfmaq_f64 (p815, z16, p1619); + + float64x2_t y = vfmaq_f64 (p07, p819, z16); + + /* Finalize. y = shift + z + z^3 * P(z^2). */ + y = vfmsq_f64 (v_f64 (-1.0), z2, y); + return vfmsq_f64 (shift, z, y); } diff --git a/sysdeps/aarch64/fpu/atan_sve.c b/sysdeps/aarch64/fpu/atan_sve.c index fa1630304c..046c04fbb1 100644 --- a/sysdeps/aarch64/fpu/atan_sve.c +++ b/sysdeps/aarch64/fpu/atan_sve.c @@ -18,23 +18,26 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[20]; - float64_t pi_over_2; + float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; + float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; + float64_t shift_val, neg_one; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-1022, 1.0]. */ - .poly = { -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3, - 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, - -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5, - 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, - -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6, - 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, - -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, }, - .pi_over_2 = 0x1.921fb54442d18p+0, + .c0 = -0x1.555555555552ap-2, .c1 = 0x1.9999999995aebp-3, + .c2 = -0x1.24924923923f6p-3, .c3 = 0x1.c71c7184288a2p-4, + .c4 = -0x1.745d11fb3d32bp-4, .c5 = 0x1.3b136a18051b9p-4, + .c6 = -0x1.110e6d985f496p-4, .c7 = 0x1.e1bcf7f08801dp-5, + .c8 = -0x1.ae644e28058c3p-5, .c9 = 0x1.82eeb1fed85c6p-5, + .c10 = -0x1.59d7f901566cbp-5, .c11 = 0x1.2c982855ab069p-5, + .c12 = -0x1.eb49592998177p-6, .c13 = 0x1.69d8b396e3d38p-6, + .c14 = -0x1.ca980345c4204p-7, .c15 = 0x1.dc050eafde0b3p-8, + .c16 = -0x1.7ea70755b8eccp-9, .c17 = 0x1.ba3da3de903e8p-11, + .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17, + .shift_val = 0x1.490fdaa22168cp+1, .neg_one = -1, }; /* Useful constants. */ @@ -43,15 +46,14 @@ static const struct data /* Fast implementation of SVE atan. Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed - error is 2.27 ulps: - _ZGVsMxv_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1 - want 0x1.9225645bdd7c3p-1. */ + error is 2.08 ulps: + _ZGVsMxv_atan (0x1.000a7c56975e8p+0) got 0x1.922a3163e15c2p-1 + want 0x1.922a3163e15c4p-1. */ svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - /* No need to trigger special case. Small cases, infs and nans - are supported by our approximation technique. */ + svbool_t ptrue = svptrue_b64 (); svuint64_t ix = svreinterpret_u64 (x); svuint64_t sign = svand_x (pg, ix, SignMask); @@ -59,32 +61,60 @@ svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg) y := arctan(x) for x < 1 y := pi/2 + arctan(-1/x) for x > 1 Hence, use z=-1/a if x>=1, otherwise z=a. */ - svbool_t red = svacgt (pg, x, 1.0); - /* Avoid dependency in abs(x) in division (and comparison). */ - svfloat64_t z = svsel (red, svdivr_x (pg, x, 1.0), x); - /* Use absolute value only when needed (odd powers of z). */ - svfloat64_t az = svabs_x (pg, z); - az = svneg_m (az, red, az); + svbool_t red = svacgt (pg, x, d->neg_one); + svfloat64_t z = svsel (red, svdiv_x (pg, sv_f64 (d->neg_one), x), x); + + /* Reuse of -1.0f to reduce constant loads, + We need a shift value of 1/2, which is created via -1 + (1 + 1/2). */ + svfloat64_t shift + = svadd_z (red, sv_f64 (d->neg_one), sv_f64 (d->shift_val)); + + /* Reinserts the sign bit of the argument to handle the case of x < -1. */ + shift = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (shift), sign)); /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ - svfloat64_t z2 = svmul_x (pg, z, z); - svfloat64_t x2 = svmul_x (pg, z2, z2); - svfloat64_t x4 = svmul_x (pg, x2, x2); - svfloat64_t x8 = svmul_x (pg, x4, x4); + svfloat64_t z2 = svmul_x (ptrue, z, z); + svfloat64_t z4 = svmul_x (ptrue, z2, z2); + svfloat64_t z8 = svmul_x (ptrue, z4, z4); + svfloat64_t z16 = svmul_x (ptrue, z8, z8); - svfloat64_t y - = svmla_x (pg, sv_estrin_7_f64_x (pg, z2, x2, x4, d->poly), - sv_estrin_11_f64_x (pg, z2, x2, x4, x8, d->poly + 8), x8); + /* Order-7 Estrin. */ + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); - /* y = shift + z + z^3 * P(z^2). */ - svfloat64_t z3 = svmul_x (pg, z2, az); - y = svmla_x (pg, az, z3, y); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + svfloat64_t p07 = svmla_x (pg, p03, z8, p47); + + /* Order-11 Estrin. */ + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + svfloat64_t c1315 = svld1rq (ptrue, &d->c13); + svfloat64_t c1719 = svld1rq (ptrue, &d->c17); - /* Apply shift as indicated by `red` predicate. */ - y = svadd_m (red, y, d->pi_over_2); + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); - /* y = atan(x) if x>0, -atan(-x) otherwise. */ - y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)); + svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0); + svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1); + svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415); - return y; + svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0); + svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1); + svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819); + + svfloat64_t p815 = svmla_x (pg, p811, z8, p1215); + svfloat64_t p819 = svmla_x (pg, p815, z16, p1619); + + svfloat64_t y = svmla_x (pg, p07, z16, p819); + + /* y = shift + z + z^3 * P(z^2). */ + shift = svadd_m (red, z, shift); + y = svmul_x (pg, z2, y); + return svmla_x (pg, shift, z, y); } diff --git a/sysdeps/aarch64/fpu/atanf_advsimd.c b/sysdeps/aarch64/fpu/atanf_advsimd.c index d015cc70ad..e72074ec62 100644 --- a/sysdeps/aarch64/fpu/atanf_advsimd.c +++ b/sysdeps/aarch64/fpu/atanf_advsimd.c @@ -22,26 +22,35 @@ static const struct data { + uint32x4_t sign_mask, pi_over_2; + float32x4_t neg_one; +#if WANT_SIMD_EXCEPT float32x4_t poly[8]; - float32x4_t pi_over_2; +} data = { + .poly = { V4 (-0x1.5554dcp-2), V4 (0x1.9978ecp-3), V4 (-0x1.230a94p-3), + V4 (0x1.b4debp-4), V4 (-0x1.3550dap-4), V4 (0x1.61eebp-5), + V4 (-0x1.0c17d4p-6), V4 (0x1.7ea694p-9) }, +#else + float32x4_t c0, c2, c4, c6; + float c1, c3, c5, c7; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. Generated using fpminimax between FLT_MIN and 1. */ - .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f), - V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f), - V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) }, - .pi_over_2 = V4 (0x1.921fb6p+0f), + .c0 = V4 (-0x1.5554dcp-2), .c1 = 0x1.9978ecp-3, + .c2 = V4 (-0x1.230a94p-3), .c3 = 0x1.b4debp-4, + .c4 = V4 (-0x1.3550dap-4), .c5 = 0x1.61eebp-5, + .c6 = V4 (-0x1.0c17d4p-6), .c7 = 0x1.7ea694p-9, +#endif + .pi_over_2 = V4 (0x3fc90fdb), + .neg_one = V4 (-1.0f), + .sign_mask = V4 (0x80000000), }; -#define SignMask v_u32 (0x80000000) - -#define P(i) d->poly[i] - +#if WANT_SIMD_EXCEPT #define TinyBound 0x30800000 /* asuint(0x1p-30). */ #define BigBound 0x4e800000 /* asuint(0x1p30). */ -#if WANT_SIMD_EXCEPT static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t x, float32x4_t y, uint32x4_t special) { @@ -51,19 +60,20 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special) /* Fast implementation of vector atanf based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] - using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps: - _ZGVnN4v_atanf (0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1. */ + using z=-1/x and shift = pi/2. Maximum observed error is 2.02 ulps: + _ZGVnN4v_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1 + want 0x1.95ed36p-1. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - /* Small cases, infs and nans are supported by our approximation technique, - but do not set fenv flags correctly. Only trigger special case if we need - fenv. */ uint32x4_t ix = vreinterpretq_u32_f32 (x); - uint32x4_t sign = vandq_u32 (ix, SignMask); + uint32x4_t sign = vandq_u32 (ix, d->sign_mask); #if WANT_SIMD_EXCEPT + /* Small cases, infs and nans are supported by our approximation technique, + but do not set fenv flags correctly. Only trigger special case if we need + fenv. */ uint32x4_t ia = vandq_u32 (ix, v_u32 (0x7ff00000)); uint32x4_t special = vcgtq_u32 (vsubq_u32 (ia, v_u32 (TinyBound)), v_u32 (BigBound - TinyBound)); @@ -71,41 +81,52 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x) if (__glibc_unlikely (v_any_u32 (special))) return special_case (x, x, v_u32 (-1)); #endif - /* Argument reduction: - y := arctan(x) for x < 1 - y := pi/2 + arctan(-1/x) for x > 1 - Hence, use z=-1/a if x>=1, otherwise z=a. */ - uint32x4_t red = vcagtq_f32 (x, v_f32 (1.0)); - /* Avoid dependency in abs(x) in division (and comparison). */ - float32x4_t z = vbslq_f32 (red, vdivq_f32 (v_f32 (1.0f), x), x); + y := arctan(x) for |x| < 1 + y := arctan(-1/x) + pi/2 for x > +1 + y := arctan(-1/x) - pi/2 for x < -1 + Hence, use z=-1/a if x>=|-1|, otherwise z=a. */ + uint32x4_t red = vcagtq_f32 (x, d->neg_one); + + float32x4_t z = vbslq_f32 (red, vdivq_f32 (d->neg_one, x), x); + + /* Shift is calculated as +-pi/2 or 0, depending on the argument case. */ float32x4_t shift = vreinterpretq_f32_u32 ( - vandq_u32 (red, vreinterpretq_u32_f32 (d->pi_over_2))); - /* Use absolute value only when needed (odd powers of z). */ - float32x4_t az = vbslq_f32 ( - SignMask, vreinterpretq_f32_u32 (vandq_u32 (SignMask, red)), z); + vandq_u32 (red, veorq_u32 (d->pi_over_2, sign))); + + float32x4_t z2 = vmulq_f32 (z, z); + float32x4_t z3 = vmulq_f32 (z, z2); + float32x4_t z4 = vmulq_f32 (z2, z2); +#if WANT_SIMD_EXCEPT /* Calculate the polynomial approximation. Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, a standard implementation using z8 creates spurious underflow in the very last fma (when z^8 is small enough). - Therefore, we split the last fma into a mul and an fma. - Horner and single-level Estrin have higher errors that exceed - threshold. */ - float32x4_t z2 = vmulq_f32 (z, z); - float32x4_t z4 = vmulq_f32 (z2, z2); - + Therefore, we split the last fma into a mul and an fma. */ float32x4_t y = vfmaq_f32 ( v_pairwise_poly_3_f32 (z2, z4, d->poly), z4, vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, d->poly + 4))); - /* y = shift + z * P(z^2). */ - y = vaddq_f32 (vfmaq_f32 (az, y, vmulq_f32 (z2, az)), shift); +#else + float32x4_t z8 = vmulq_f32 (z4, z4); + + /* Uses an Estrin scheme for polynomial approximation. */ + float32x4_t odd_coeffs = vld1q_f32 (&d->c1); + + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, odd_coeffs, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, odd_coeffs, 1); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, odd_coeffs, 2); + float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, odd_coeffs, 3); - /* y = atan(x) if x>0, -atan(-x) otherwise. */ - y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), sign)); + float32x4_t p03 = vfmaq_f32 (p01, z4, p23); + float32x4_t p47 = vfmaq_f32 (p45, z4, p67); - return y; + float32x4_t y = vfmaq_f32 (p03, z8, p47); +#endif + + /* y = shift + z * P(z^2). */ + return vfmaq_f32 (vaddq_f32 (shift, z), z3, y); } libmvec_hidden_def (V_NAME_F1 (atan)) HALF_WIDTH_ALIAS_F1 (atan) diff --git a/sysdeps/aarch64/fpu/atanf_sve.c b/sysdeps/aarch64/fpu/atanf_sve.c index 7b54094586..0f5a054743 100644 --- a/sysdeps/aarch64/fpu/atanf_sve.c +++ b/sysdeps/aarch64/fpu/atanf_sve.c @@ -18,18 +18,26 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" static const struct data { - float32_t poly[8]; - float32_t pi_over_2; + float32_t c1, c3, c5, c7; + float32_t c0, c2, c4, c6; + float32_t shift_val, neg_one; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. */ - .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, - -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f }, - .pi_over_2 = 0x1.921fb6p+0f, + .c0 = -0x1.5554dcp-2, + .c1 = 0x1.9978ecp-3, + .c2 = -0x1.230a94p-3, + .c3 = 0x1.b4debp-4, + .c4 = -0x1.3550dap-4, + .c5 = 0x1.61eebp-5, + .c6 = -0x1.0c17d4p-6, + .c7 = 0x1.7ea694p-9, + /* pi/2, used as a shift value after reduction. */ + .shift_val = 0x1.921fb54442d18p+0, + .neg_one = -1.0f, }; #define SignMask (0x80000000) @@ -37,43 +45,49 @@ static const struct data /* Fast implementation of SVE atanf based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using z=-1/x and shift = pi/2. - Largest observed error is 2.9 ULP, close to +/-1.0: - _ZGVsMxv_atanf (0x1.0468f6p+0) got -0x1.967f06p-1 - want -0x1.967fp-1. */ + Largest observed error is 2.12 ULP: + _ZGVsMxv_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1 + want 0x1.95ed36p-1. */ svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b32 (); /* No need to trigger special case. Small cases, infs and nans are supported by our approximation technique. */ svuint32_t ix = svreinterpret_u32 (x); - svuint32_t sign = svand_x (pg, ix, SignMask); + svuint32_t sign = svand_x (ptrue, ix, SignMask); /* Argument reduction: y := arctan(x) for x < 1 - y := pi/2 + arctan(-1/x) for x > 1 - Hence, use z=-1/a if x>=1, otherwise z=a. */ - svbool_t red = svacgt (pg, x, 1.0f); - /* Avoid dependency in abs(x) in division (and comparison). */ - svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (1.0f), x), x); - /* Use absolute value only when needed (odd powers of z). */ - svfloat32_t az = svabs_x (pg, z); - az = svneg_m (az, red, az); - - /* Use split Estrin scheme for P(z^2) with deg(P)=7. */ - svfloat32_t z2 = svmul_x (pg, z, z); - svfloat32_t z4 = svmul_x (pg, z2, z2); - svfloat32_t z8 = svmul_x (pg, z4, z4); - - svfloat32_t y = sv_estrin_7_f32_x (pg, z2, z4, z8, d->poly); - - /* y = shift + z + z^3 * P(z^2). */ - svfloat32_t z3 = svmul_x (pg, z2, az); - y = svmla_x (pg, az, z3, y); - - /* Apply shift as indicated by 'red' predicate. */ - y = svadd_m (red, y, sv_f32 (d->pi_over_2)); - - /* y = atan(x) if x>0, -atan(-x) otherwise. */ - return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign)); + y := arctan(-1/x) + pi/2 for x > +1 + y := arctan(-1/x) - pi/2 for x < -1 + Hence, use z=-1/a if |x|>=|-1|, otherwise z=a. */ + svbool_t red = svacgt (pg, x, d->neg_one); + svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (d->neg_one), x), x); + + /* Reinserts the sign bit of the argument to handle the case of x < -1. */ + svfloat32_t shift = svreinterpret_f32 ( + sveor_x (red, svreinterpret_u32 (sv_f32 (d->shift_val)), sign)); + + svfloat32_t z2 = svmul_x (ptrue, z, z); + svfloat32_t z3 = svmul_x (ptrue, z2, z); + svfloat32_t z4 = svmul_x (ptrue, z2, z2); + svfloat32_t z8 = svmul_x (ptrue, z4, z4); + + svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1); + + svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0); + svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1); + svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2); + svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3); + + svfloat32_t p03 = svmla_x (pg, p01, z4, p23); + svfloat32_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat32_t y = svmla_x (pg, p03, z8, p47); + + /* shift + z + z^3 * P(z^2). */ + shift = svadd_m (red, z, shift); + return svmla_x (pg, shift, z3, y); } diff --git a/sysdeps/aarch64/fpu/atanh_advsimd.c b/sysdeps/aarch64/fpu/atanh_advsimd.c index 3c3d0bd6ad..eb9769aeac 100644 --- a/sysdeps/aarch64/fpu/atanh_advsimd.c +++ b/sysdeps/aarch64/fpu/atanh_advsimd.c @@ -23,15 +23,19 @@ const static struct data { struct v_log1p_data log1p_consts; - uint64x2_t one, half; + uint64x2_t one; + uint64x2_t sign_mask; } data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE, .one = V2 (0x3ff0000000000000), - .half = V2 (0x3fe0000000000000) }; + .sign_mask = V2 (0x8000000000000000) }; static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, uint64x2_t special) +special_case (float64x2_t x, float64x2_t halfsign, float64x2_t y, + uint64x2_t special, const struct data *d) { - return v_call_f64 (atanh, x, y, special); + y = log1p_inline (y, &d->log1p_consts); + return v_call_f64 (atanh, vbslq_f64 (d->sign_mask, halfsign, x), + vmulq_f64 (halfsign, y), special); } /* Approximation for vector double-precision atanh(x) using modified log1p. @@ -43,11 +47,10 @@ float64x2_t V_NAME_D1 (atanh) (float64x2_t x) { const struct data *d = ptr_barrier (&data); + float64x2_t halfsign = vbslq_f64 (d->sign_mask, x, v_f64 (0.5)); float64x2_t ax = vabsq_f64 (x); uint64x2_t ia = vreinterpretq_u64_f64 (ax); - uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia); uint64x2_t special = vcgeq_u64 (ia, d->one); - float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half)); #if WANT_SIMD_EXCEPT ax = v_zerofy_f64 (ax, special); @@ -55,10 +58,15 @@ float64x2_t V_NAME_D1 (atanh) (float64x2_t x) float64x2_t y; y = vaddq_f64 (ax, ax); - y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax)); - y = log1p_inline (y, &d->log1p_consts); + y = vdivq_f64 (y, vsubq_f64 (vreinterpretq_f64_u64 (d->one), ax)); if (__glibc_unlikely (v_any_u64 (special))) - return special_case (x, vmulq_f64 (y, halfsign), special); +#if WANT_SIMD_EXCEPT + return special_case (x, halfsign, y, special, d); +#else + return special_case (ax, halfsign, y, special, d); +#endif + + y = log1p_inline (y, &d->log1p_consts); return vmulq_f64 (y, halfsign); } diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c index 7a52728d70..a4803e5c13 100644 --- a/sysdeps/aarch64/fpu/atanh_sve.c +++ b/sysdeps/aarch64/fpu/atanh_sve.c @@ -30,7 +30,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special) } /* SVE approximation for double-precision atanh, based on log1p. - The greatest observed error is 2.81 ULP: + The greatest observed error is 3.3 ULP: _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6 want 0x1.ffd8ff31b501cp-6. */ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg) @@ -42,7 +42,6 @@ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg) svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half)); /* It is special if iax >= 1. */ -// svbool_t special = svcmpge (pg, iax, One); svbool_t special = svacge (pg, x, 1.0); /* Computation is performed based on the following sequence of equality: diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c index ae488f7b54..818b6c92ad 100644 --- a/sysdeps/aarch64/fpu/atanhf_advsimd.c +++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c @@ -40,15 +40,17 @@ const static struct data #define Half v_u32 (0x3f000000) static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y, + uint32x4_t special) { - return v_call_f32 (atanhf, x, y, special); + return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign), + vmulq_f32 (halfsign, y), special); } /* Approximation for vector single-precision atanh(x) using modified log1p. - The maximum error is 3.08 ULP: - __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5 - want 0x1.ffcb82p-5. */ + The maximum error is 2.93 ULP: + _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5 + want 0x1.f4dcf8p-5. */ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); @@ -68,11 +70,19 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x) uint32x4_t special = vcgeq_u32 (iax, d->one); #endif - float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax)); - y = log1pf_inline (y, d->log1pf_consts); + float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), + vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax)); + y = log1pf_inline (y, &d->log1pf_consts); + /* If exceptions not required, pass ax to special-case for shorter dependency + chain. If exceptions are required ax will have been zerofied, so have to + pass x. */ if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vmulq_f32 (halfsign, y), special); +#if WANT_SIMD_EXCEPT + return special_case (x, halfsign, y, special); +#else + return special_case (ax, halfsign, y, special); +#endif return vmulq_f32 (halfsign, y); } libmvec_hidden_def (V_NAME_F1 (atanh)) diff --git a/sysdeps/aarch64/fpu/atanhf_sve.c b/sysdeps/aarch64/fpu/atanhf_sve.c index dae83041ef..2d3005bbc8 100644 --- a/sysdeps/aarch64/fpu/atanhf_sve.c +++ b/sysdeps/aarch64/fpu/atanhf_sve.c @@ -17,21 +17,25 @@ License along with the GNU C Library; if not, see . */ +#include "sv_math.h" #include "sv_log1pf_inline.h" #define One (0x3f800000) #define Half (0x3f000000) static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign, + svfloat32_t y, svbool_t special) { + svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign)); + y = svmul_x (svptrue_b32 (), halfsign, y); return sv_call_f32 (atanhf, x, y, special); } /* Approximation for vector single-precision atanh(x) using modified log1p. - The maximum error is 2.28 ULP: - _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5 - want 0x1.ffbbb6p-5. */ + The maximum error is 1.99 ULP: + _ZGVsMxv_atanhf(0x1.f1583p-5) got 0x1.f1f4fap-5 + want 0x1.f1f4f6p-5. */ svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg) { svfloat32_t ax = svabs_x (pg, x); @@ -48,7 +52,7 @@ svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg) y = sv_log1pf_inline (y, pg); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmul_x (pg, halfsign, y), special); + return special_case (iax, sign, halfsign, y, special); return svmul_x (pg, halfsign, y); } diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c index 3924c9ce44..11a89b1530 100644 --- a/sysdeps/aarch64/fpu/cos_advsimd.c +++ b/sysdeps/aarch64/fpu/cos_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float64x2_t poly[7]; - float64x2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3; + float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* Worst-case error is 3.3 ulp in [-pi/2, pi/2]. */ .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7), @@ -30,11 +30,9 @@ static const struct data V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33), V2 (-0x1.9e9540300a1p-41) }, .inv_pi = V2 (0x1.45f306dc9c883p-2), - .half_pi = V2 (0x1.921fb54442d18p+0), .pi_1 = V2 (0x1.921fb54442d18p+1), .pi_2 = V2 (0x1.1a62633145c06p-53), .pi_3 = V2 (0x1.c1cd129024e09p-106), - .shift = V2 (0x1.8p52), .range_val = V2 (0x1p23) }; @@ -68,10 +66,9 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x) #endif /* n = rint((|x|+pi/2)/pi) - 0.5. */ - n = vfmaq_f64 (d->shift, d->inv_pi, vaddq_f64 (r, d->half_pi)); - odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63); - n = vsubq_f64 (n, d->shift); - n = vsubq_f64 (n, v_f64 (0.5)); + n = vrndaq_f64 (vfmaq_f64 (v_f64 (0.5), r, d->inv_pi)); + odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63); + n = vsubq_f64 (n, v_f64 (0.5f)); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f64 (r, d->pi_1, n); diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c index d0c285b03a..85a1b37373 100644 --- a/sysdeps/aarch64/fpu/cosf_advsimd.c +++ b/sysdeps/aarch64/fpu/cosf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[4]; - float32x4_t range_val, inv_pi, half_pi, shift, pi_1, pi_2, pi_3; + float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* 1.886 ulp error. */ .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f), @@ -33,8 +33,6 @@ static const struct data .pi_3 = V4 (-0x1.ee59dap-49f), .inv_pi = V4 (0x1.45f306p-2f), - .shift = V4 (0x1.8p+23f), - .half_pi = V4 (0x1.921fb6p0f), .range_val = V4 (0x1p20f) }; @@ -69,9 +67,8 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x) #endif /* n = rint((|x|+pi/2)/pi) - 0.5. */ - n = vfmaq_f32 (d->shift, d->inv_pi, vaddq_f32 (r, d->half_pi)); - odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31); - n = vsubq_f32 (n, d->shift); + n = vrndaq_f32 (vfmaq_f32 (v_f32 (0.5), r, d->inv_pi)); + odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31); n = vsubq_f32 (n, v_f32 (0.5f)); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c index 919f34604a..3561893ae6 100644 --- a/sysdeps/aarch64/fpu/cosh_sve.c +++ b/sysdeps/aarch64/fpu/cosh_sve.c @@ -21,69 +21,99 @@ static const struct data { - float64_t poly[3]; - float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres; - uint64_t index_mask, special_bound; + double c0, c2; + double c1, c3; + float64_t inv_ln2, ln2_hi, ln2_lo, shift; + uint64_t special_bound; } data = { - .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3, - 0x1.5555576a59599p-5, }, - - .inv_ln2 = 0x1.71547652b82fep8, /* N/ln2. */ - /* -ln2/N. */ - .ln2_hi = -0x1.62e42fefa39efp-9, - .ln2_lo = -0x1.abc9e3b39803f3p-64, - .shift = 0x1.8p+52, - .thres = 704.0, - - .index_mask = 0xff, - /* 0x1.6p9, above which exp overflows. */ - .special_bound = 0x4086000000000000, + /* Generated using Remez, in [-log(2)/128, log(2)/128]. */ + .c0 = 0x1.fffffffffdbcdp-2, + .c1 = 0x1.555555555444cp-3, + .c2 = 0x1.555573c6a9f7dp-5, + .c3 = 0x1.1111266d28935p-7, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + /* 1/ln2. */ + .inv_ln2 = 0x1.71547652b82fep+0, + .shift = 0x1.800000000ff80p+46, /* 1.5*2^46+1022. */ + + /* asuint(ln(2^(1024 - 1/128))), the value above which exp overflows. */ + .special_bound = 0x40862e37e7d8ba72, }; -static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) -{ - return sv_call_f64 (cosh, x, y, special); -} - -/* Helper for approximating exp(x). Copied from sv_exp_tail, with no - special-case handling or tail. */ +/* Helper for approximating exp(x)/2. + Functionally identical to FEXPA exp(x), but an adjustment in + the shift value which leads to a reduction in the exponent of scale by 1, + thus halving the result at no cost. */ static inline svfloat64_t -exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d) +exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d) { /* Calculate exp(x). */ svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); + svuint64_t u = svreinterpret_u64 (z); svfloat64_t n = svsub_x (pg, z, d->shift); - svfloat64_t r = svmla_x (pg, x, n, d->ln2_hi); - r = svmla_x (pg, r, n, d->ln2_lo); + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); - svuint64_t u = svreinterpret_u64 (z); - svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS); - svuint64_t i = svand_x (pg, u, d->index_mask); + svfloat64_t r = x; + r = svmls_lane (r, n, ln2, 0); + r = svmls_lane (r, n, ln2, 1); - svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]); - y = svmla_x (pg, sv_f64 (d->poly[0]), r, y); - y = svmla_x (pg, sv_f64 (1.0), r, y); - y = svmul_x (pg, r, y); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1); + svfloat64_t p04 = svmla_x (pg, p01, p23, r2); + svfloat64_t p = svmla_x (pg, r, p04, r2); - /* s = 2^(n/N). */ - u = svld1_gather_index (pg, __v_exp_tail_data, i); - svfloat64_t s = svreinterpret_f64 (svadd_x (pg, u, e)); + svfloat64_t scale = svexpa (u); - return svmla_x (pg, s, s, y); + return svmla_x (pg, scale, scale, p); +} + +/* Vectorised special case to handle values past where exp_inline overflows. + Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double + the valid range of inputs, and returns inf for anything past that. */ +static svfloat64_t NOINLINE +special_case (svbool_t pg, svbool_t special, svfloat64_t ax, svfloat64_t t, + const struct data *d) +{ + /* Finish fast path to compute values for non-special cases. */ + svfloat64_t inv_twoexp = svdivr_x (pg, t, 0.25); + svfloat64_t y = svadd_x (pg, t, inv_twoexp); + + /* Halves input value, and then check if any cases + are still going to overflow. */ + ax = svmul_x (special, ax, 0.5); + svbool_t is_safe + = svcmplt (special, svreinterpret_u64 (ax), d->special_bound); + + /* Computes exp(x/2), and sets any overflowing lanes to inf. */ + svfloat64_t half_exp = exp_over_two_inline (special, ax, d); + half_exp = svsel (is_safe, half_exp, sv_f64 (INFINITY)); + + /* Construct special case cosh(x) = (exp(x/2)^2)/2. */ + svfloat64_t exp = svmul_x (svptrue_b64 (), half_exp, 2); + svfloat64_t special_y = svmul_x (special, exp, half_exp); + + /* Select correct return values for special and non-special cases. */ + special_y = svsel (special, special_y, y); + + /* Ensure an input of nan is correctly propagated. */ + svbool_t is_nan + = svcmpgt (special, svreinterpret_u64 (ax), sv_u64 (0x7ff0000000000000)); + return svsel (is_nan, ax, svsel (special, special_y, y)); } /* Approximation for SVE double-precision cosh(x) using exp_inline. cosh(x) = (exp(x) + exp(-x)) / 2. - The greatest observed error is in the scalar fall-back region, so is the - same as the scalar routine, 1.93 ULP: - _ZGVsMxv_cosh (0x1.628ad45039d2fp+9) got 0x1.fd774e958236dp+1021 - want 0x1.fd774e958236fp+1021. - - The greatest observed error in the non-special region is 1.54 ULP: - _ZGVsMxv_cosh (0x1.ba5651dd4486bp+2) got 0x1.f5e2bb8d5c98fp+8 - want 0x1.f5e2bb8d5c991p+8. */ + The greatest observed error in special case region is 2.66 + 0.5 ULP: + _ZGVsMxv_cosh (0x1.633b532ffbc1ap+9) got 0x1.f9b2d3d22399ep+1023 + want 0x1.f9b2d3d22399bp+1023 + + The greatest observed error in the non-special region is 1.01 + 0.5 ULP: + _ZGVsMxv_cosh (0x1.998ecbb3c1f81p+1) got 0x1.890b225657f84p+3 + want 0x1.890b225657f82p+3. */ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); @@ -92,14 +122,13 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg) svbool_t special = svcmpgt (pg, svreinterpret_u64 (ax), d->special_bound); /* Up to the point that exp overflows, we can use it to calculate cosh by - exp(|x|) / 2 + 1 / (2 * exp(|x|)). */ - svfloat64_t t = exp_inline (ax, pg, d); - svfloat64_t half_t = svmul_x (pg, t, 0.5); - svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); + (exp(|x|)/2 + 1) / (2 * exp(|x|)). */ + svfloat64_t half_exp = exp_over_two_inline (pg, ax, d); - /* Fall back to scalar for any special cases. */ + /* Falls back to entirely standalone vectorized special case. */ if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svadd_x (pg, half_t, half_over_t), special); + return special_case (pg, special, ax, half_exp, d); - return svadd_x (pg, half_t, half_over_t); + svfloat64_t inv_twoexp = svdivr_x (pg, half_exp, 0.25); + return svadd_x (pg, half_exp, inv_twoexp); } diff --git a/sysdeps/aarch64/fpu/coshf_advsimd.c b/sysdeps/aarch64/fpu/coshf_advsimd.c index c1ab4923b8..cd5c866521 100644 --- a/sysdeps/aarch64/fpu/coshf_advsimd.c +++ b/sysdeps/aarch64/fpu/coshf_advsimd.c @@ -23,19 +23,27 @@ static const struct data { struct v_expf_data expf_consts; - uint32x4_t tiny_bound, special_bound; + uint32x4_t tiny_bound; + float32x4_t bound; +#if WANT_SIMD_EXCEPT + uint32x4_t special_bound; +#endif } data = { .expf_consts = V_EXPF_DATA, .tiny_bound = V4 (0x20000000), /* 0x1p-63: Round to 1 below this. */ /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */ + .bound = V4 (0x1.5a92d8p+6), +#if WANT_SIMD_EXCEPT .special_bound = V4 (0x42ad496c), +#endif }; #if !WANT_SIMD_EXCEPT static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t half_t, float32x4_t half_over_t, + uint32x4_t special) { - return v_call_f32 (coshf, x, y, special); + return v_call_f32 (coshf, x, vaddq_f32 (half_t, half_over_t), special); } #endif @@ -47,14 +55,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t ax = vabsq_f32 (x); - uint32x4_t iax = vreinterpretq_u32_f32 (ax); - uint32x4_t special = vcgeq_u32 (iax, d->special_bound); - #if WANT_SIMD_EXCEPT /* If fp exceptions are to be triggered correctly, fall back to the scalar variant for all inputs if any input is a special value or above the bound at which expf overflows. */ + float32x4_t ax = vabsq_f32 (x); + uint32x4_t iax = vreinterpretq_u32_f32 (ax); + uint32x4_t special = vcgeq_u32 (iax, d->special_bound); if (__glibc_unlikely (v_any_u32 (special))) return v_call_f32 (coshf, x, x, v_u32 (-1)); @@ -63,10 +70,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x) input to 0, which will generate no exceptions. */ if (__glibc_unlikely (v_any_u32 (tiny))) ax = v_zerofy_f32 (ax, tiny); + float32x4_t t = v_expf_inline (ax, &d->expf_consts); +#else + uint32x4_t special = vcageq_f32 (x, d->bound); + float32x4_t t = v_expf_inline (x, &d->expf_consts); #endif /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */ - float32x4_t t = v_expf_inline (ax, &d->expf_consts); float32x4_t half_t = vmulq_n_f32 (t, 0.5); float32x4_t half_over_t = vdivq_f32 (v_f32 (0.5), t); @@ -75,7 +85,7 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x) return vbslq_f32 (tiny, v_f32 (1), vaddq_f32 (half_t, half_over_t)); #else if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vaddq_f32 (half_t, half_over_t), special); + return special_case (x, half_t, half_over_t, special); #endif return vaddq_f32 (half_t, half_over_t); diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c index e5d8a299c6..508c0790ee 100644 --- a/sysdeps/aarch64/fpu/coshf_sve.c +++ b/sysdeps/aarch64/fpu/coshf_sve.c @@ -23,37 +23,42 @@ static const struct data { struct sv_expf_data expf_consts; - uint32_t special_bound; + float special_bound; } data = { .expf_consts = SV_EXPF_DATA, /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */ - .special_bound = 0x42ad496c, + .special_bound = 0x1.5a92d8p+6, }; static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t pg) +special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e, + svbool_t pg) { - return sv_call_f32 (coshf, x, y, pg); + return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e), + pg); } /* Single-precision vector cosh, using vector expf. - Maximum error is 1.89 ULP: - _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127 - want 0x1.f00adcp+127. */ + Maximum error is 2.56 +0.5 ULP: + _ZGVsMxv_coshf(-0x1.5b40f4p+1) got 0x1.e47748p+2 + want 0x1.e4774ep+2. */ svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - svfloat32_t ax = svabs_x (pg, x); - svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound); + svbool_t special = svacge (pg, x, d->special_bound); - /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */ - svfloat32_t t = expf_inline (ax, pg, &d->expf_consts); - svfloat32_t half_t = svmul_x (pg, t, 0.5); - svfloat32_t half_over_t = svdivr_x (pg, t, 0.5); + /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. + Note that x is passed to exp here, rather than |x|. This is to avoid using + destructive unary ABS for better register usage. However it means the + routine is not exactly symmetrical, as the exp helper is slightly less + accurate in the negative range. */ + svfloat32_t e = expf_inline (x, pg, &d->expf_consts); + svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5); + svfloat32_t half_over_e = svdivr_x (pg, e, 0.5); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svadd_x (pg, half_t, half_over_t), special); + return special_case (x, half_e, half_over_e, special); - return svadd_x (pg, half_t, half_over_t); + return svadd_x (svptrue_b32 (), half_e, half_over_e); } diff --git a/sysdeps/aarch64/fpu/erf_advsimd.c b/sysdeps/aarch64/fpu/erf_advsimd.c index 19cbb7d0f4..a48092e838 100644 --- a/sysdeps/aarch64/fpu/erf_advsimd.c +++ b/sysdeps/aarch64/fpu/erf_advsimd.c @@ -22,19 +22,21 @@ static const struct data { float64x2_t third; - float64x2_t tenth, two_over_five, two_over_fifteen; - float64x2_t two_over_nine, two_over_fortyfive; + float64x2_t tenth, two_over_five, two_over_nine; + double two_over_fifteen, two_over_fortyfive; float64x2_t max, shift; + uint64x2_t max_idx; #if WANT_SIMD_EXCEPT float64x2_t tiny_bound, huge_bound, scale_minus_one; #endif } data = { + .max_idx = V2 (768), .third = V2 (0x1.5555555555556p-2), /* used to compute 2/3 and 1/6 too. */ - .two_over_fifteen = V2 (0x1.1111111111111p-3), + .two_over_fifteen = 0x1.1111111111111p-3, .tenth = V2 (-0x1.999999999999ap-4), .two_over_five = V2 (-0x1.999999999999ap-2), .two_over_nine = V2 (-0x1.c71c71c71c71cp-3), - .two_over_fortyfive = V2 (0x1.6c16c16c16c17p-5), + .two_over_fortyfive = 0x1.6c16c16c16c17p-5, .max = V2 (5.9921875), /* 6 - 1/128. */ .shift = V2 (0x1p45), #if WANT_SIMD_EXCEPT @@ -56,8 +58,8 @@ static inline struct entry lookup (uint64x2_t i) { struct entry e; - float64x2_t e1 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 0)].erf), - e2 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 1)].erf); + float64x2_t e1 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 0)].erf), + e2 = vld1q_f64 (&__v_erf_data.tab[vgetq_lane_u64 (i, 1)].erf); e.erf = vuzp1q_f64 (e1, e2); e.scale = vuzp2q_f64 (e1, e2); return e; @@ -87,8 +89,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) float64x2_t a = vabsq_f64 (x); /* Reciprocal conditions that do not catch NaNs so they can be used in BSLs to return expected results. */ - uint64x2_t a_le_max = vcleq_f64 (a, dat->max); - uint64x2_t a_gt_max = vcgtq_f64 (a, dat->max); + uint64x2_t a_le_max = vcaleq_f64 (x, dat->max); + uint64x2_t a_gt_max = vcagtq_f64 (x, dat->max); #if WANT_SIMD_EXCEPT /* |x| huge or tiny. */ @@ -115,7 +117,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) segfault. */ uint64x2_t i = vsubq_u64 (vreinterpretq_u64_f64 (z), vreinterpretq_u64_f64 (shift)); - i = vbslq_u64 (a_le_max, i, v_u64 (768)); + i = vbslq_u64 (a_le_max, i, dat->max_idx); struct entry e = lookup (i); float64x2_t r = vsubq_f64 (z, shift); @@ -125,14 +127,19 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x) float64x2_t d2 = vmulq_f64 (d, d); float64x2_t r2 = vmulq_f64 (r, r); + float64x2_t two_over_fifteen_and_fortyfive + = vld1q_f64 (&dat->two_over_fifteen); + /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5. */ float64x2_t p1 = r; float64x2_t p2 = vfmsq_f64 (dat->third, r2, vaddq_f64 (dat->third, dat->third)); float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->third)); - float64x2_t p4 = vfmaq_f64 (dat->two_over_five, r2, dat->two_over_fifteen); + float64x2_t p4 = vfmaq_laneq_f64 (dat->two_over_five, r2, + two_over_fifteen_and_fortyfive, 0); p4 = vfmsq_f64 (dat->tenth, r2, p4); - float64x2_t p5 = vfmaq_f64 (dat->two_over_nine, r2, dat->two_over_fortyfive); + float64x2_t p5 = vfmaq_laneq_f64 (dat->two_over_nine, r2, + two_over_fifteen_and_fortyfive, 1); p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->third), r2, p5)); float64x2_t p34 = vfmaq_f64 (p3, d, p4); diff --git a/sysdeps/aarch64/fpu/erf_data.c b/sysdeps/aarch64/fpu/erf_data.c index 6d2dcd235c..ea01fad7ca 100644 --- a/sysdeps/aarch64/fpu/erf_data.c +++ b/sysdeps/aarch64/fpu/erf_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erf. +/* Lookup table used in vector erf. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = 6.0 (769 values): - - the first entry __erff_data.tab.erf contains the values of erf(r), - - the second entry __erff_data.tab.scale contains the values of + - the first entry __v_erff_data.tab.erf contains the values of erf(r), + - the second entry __v_erff_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct erf_data __erf_data = { +const struct v_erf_data __v_erf_data = { .tab = { { 0x0.0000000000000p+0, 0x1.20dd750429b6dp+0 }, { 0x1.20dbf3deb1340p-7, 0x1.20d8f1975c85dp+0 }, { 0x1.20d77083f17a0p-6, 0x1.20cb67bd452c7p+0 }, diff --git a/sysdeps/aarch64/fpu/erf_sve.c b/sysdeps/aarch64/fpu/erf_sve.c index 7d51417406..671d55a02b 100644 --- a/sysdeps/aarch64/fpu/erf_sve.c +++ b/sysdeps/aarch64/fpu/erf_sve.c @@ -67,14 +67,16 @@ svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg) svfloat64_t a = svabs_x (pg, x); svfloat64_t shift = sv_f64 (dat->shift); svfloat64_t z = svadd_x (pg, a, shift); - svuint64_t i - = svsub_x (pg, svreinterpret_u64 (z), svreinterpret_u64 (shift)); + svuint64_t i = svand_x (pg, svreinterpret_u64 (z), 0xfff); + i = svadd_x (pg, i, i); /* Lookup without shortcut for small values but with predicate to avoid segfault for large values and NaNs. */ svfloat64_t r = svsub_x (pg, z, shift); - svfloat64_t erfr = svld1_gather_index (a_lt_max, __sv_erf_data.erf, i); - svfloat64_t scale = svld1_gather_index (a_lt_max, __sv_erf_data.scale, i); + svfloat64_t erfr + = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].erf, i); + svfloat64_t scale + = svld1_gather_index (a_lt_max, &__v_erf_data.tab[0].scale, i); /* erf(x) ~ erf(r) + scale * d * poly (r, d). */ svfloat64_t d = svsub_x (pg, a, r); diff --git a/sysdeps/aarch64/fpu/erfc_advsimd.c b/sysdeps/aarch64/fpu/erfc_advsimd.c index f1b3bfe830..d05eac61a2 100644 --- a/sysdeps/aarch64/fpu/erfc_advsimd.c +++ b/sysdeps/aarch64/fpu/erfc_advsimd.c @@ -24,8 +24,8 @@ static const struct data { uint64x2_t offset, table_scale; float64x2_t max, shift; - float64x2_t p20, p40, p41, p42; - float64x2_t p51, p52; + float64x2_t p20, p40, p41, p51; + double p42, p52; double qr5[2], qr6[2], qr7[2], qr8[2], qr9[2]; #if WANT_SIMD_EXCEPT float64x2_t uflow_bound; @@ -41,9 +41,9 @@ static const struct data .p20 = V2 (0x1.5555555555555p-2), /* 1/3, used to compute 2/3 and 1/6. */ .p40 = V2 (-0x1.999999999999ap-4), /* 1/10. */ .p41 = V2 (-0x1.999999999999ap-2), /* 2/5. */ - .p42 = V2 (0x1.1111111111111p-3), /* 2/15. */ + .p42 = 0x1.1111111111111p-3, /* 2/15. */ .p51 = V2 (-0x1.c71c71c71c71cp-3), /* 2/9. */ - .p52 = V2 (0x1.6c16c16c16c17p-5), /* 2/45. */ + .p52 = 0x1.6c16c16c16c17p-5, /* 2/45. */ /* Qi = (i+1) / i, Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9. */ .qr5 = { 0x1.3333333333333p0, -0x1.e79e79e79e79ep-3 }, .qr6 = { 0x1.2aaaaaaaaaaabp0, -0x1.b6db6db6db6dbp-3 }, @@ -69,9 +69,9 @@ lookup (uint64x2_t i) { struct entry e; float64x2_t e1 - = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc); + = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc); float64x2_t e2 - = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc); + = vld1q_f64 (&__v_erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc); e.erfc = vuzp1q_f64 (e1, e2); e.scale = vuzp2q_f64 (e1, e2); return e; @@ -157,9 +157,10 @@ float64x2_t V_NAME_D1 (erfc) (float64x2_t x) float64x2_t p1 = r; float64x2_t p2 = vfmsq_f64 (dat->p20, r2, vaddq_f64 (dat->p20, dat->p20)); float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->p20)); - float64x2_t p4 = vfmaq_f64 (dat->p41, r2, dat->p42); + float64x2_t p42_p52 = vld1q_f64 (&dat->p42); + float64x2_t p4 = vfmaq_laneq_f64 (dat->p41, r2, p42_p52, 0); p4 = vfmsq_f64 (dat->p40, r2, p4); - float64x2_t p5 = vfmaq_f64 (dat->p51, r2, dat->p52); + float64x2_t p5 = vfmaq_laneq_f64 (dat->p51, r2, p42_p52, 1); p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5)); /* Compute p_i using recurrence relation: p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */ diff --git a/sysdeps/aarch64/fpu/erfc_data.c b/sysdeps/aarch64/fpu/erfc_data.c index 76a94e4681..8dc6a8c42c 100644 --- a/sysdeps/aarch64/fpu/erfc_data.c +++ b/sysdeps/aarch64/fpu/erfc_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erfc. +/* Lookup table used in vector erfc. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = ~27.0 (3488 values): - - the first entry __erfc_data.tab.erfc contains the values of erfc(r), - - the second entry __erfc_data.tab.scale contains the values of + - the first entry __v_erfc_data.tab.erfc contains the values of erfc(r), + - the second entry __v_erfc_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore they are scaled by a large enough value 2^128 (fits in 8bit). */ -const struct erfc_data __erfc_data = { +const struct v_erfc_data __v_erfc_data = { .tab = { { 0x1p128, 0x1.20dd750429b6dp128 }, { 0x1.fb7c9030853b3p127, 0x1.20d8f1975c85dp128 }, { 0x1.f6f9447be0743p127, 0x1.20cb67bd452c7p128 }, diff --git a/sysdeps/aarch64/fpu/erfc_sve.c b/sysdeps/aarch64/fpu/erfc_sve.c index c17d3e4484..703926ee41 100644 --- a/sysdeps/aarch64/fpu/erfc_sve.c +++ b/sysdeps/aarch64/fpu/erfc_sve.c @@ -104,7 +104,7 @@ svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg) /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */ i = svadd_x (pg, i, i); - const float64_t *p = &__erfc_data.tab[0].erfc - 2 * dat->off_arr; + const float64_t *p = &__v_erfc_data.tab[0].erfc - 2 * dat->off_arr; svfloat64_t erfcr = svld1_gather_index (pg, p, i); svfloat64_t scale = svld1_gather_index (pg, p + 1, i); diff --git a/sysdeps/aarch64/fpu/erfcf_advsimd.c b/sysdeps/aarch64/fpu/erfcf_advsimd.c index ca5bc3ab33..59b0b0d64b 100644 --- a/sysdeps/aarch64/fpu/erfcf_advsimd.c +++ b/sysdeps/aarch64/fpu/erfcf_advsimd.c @@ -62,13 +62,13 @@ lookup (uint32x4_t i) { struct entry e; float32x2_t t0 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc); float32x2_t t1 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc); float32x2_t t2 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc); float32x2_t t3 - = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc); + = vld1_f32 (&__v_erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc); float32x4_t e1 = vcombine_f32 (t0, t1); float32x4_t e2 = vcombine_f32 (t2, t3); e.erfc = vuzp1q_f32 (e1, e2); diff --git a/sysdeps/aarch64/fpu/erfcf_data.c b/sysdeps/aarch64/fpu/erfcf_data.c index 77fb889a78..d45087bbb9 100644 --- a/sysdeps/aarch64/fpu/erfcf_data.c +++ b/sysdeps/aarch64/fpu/erfcf_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erfcf. +/* Lookup table used in vector erfcf. For each possible rounded input r (multiples of 1/64), between r = 0.0 and r = 10.0625 (645 values): - - the first entry __erfcf_data.tab.erfc contains the values of erfc(r), - - the second entry __erfcf_data.tab.scale contains the values of + - the first entry __v_erfcf_data.tab.erfc contains the values of erfc(r), + - the second entry __v_erfcf_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore they are scaled by a large enough value 2^47 (fits in 8 bits). */ -const struct erfcf_data __erfcf_data = { +const struct v_erfcf_data __v_erfcf_data = { .tab = { { 0x1p47, 0x1.20dd76p47 }, { 0x1.f6f944p46, 0x1.20cb68p47 }, { 0x1.edf3aap46, 0x1.209546p47 }, diff --git a/sysdeps/aarch64/fpu/erfcf_sve.c b/sysdeps/aarch64/fpu/erfcf_sve.c index 48d1677eb4..e4869263e3 100644 --- a/sysdeps/aarch64/fpu/erfcf_sve.c +++ b/sysdeps/aarch64/fpu/erfcf_sve.c @@ -76,23 +76,23 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg) svuint32_t i = svqadd (svreinterpret_u32 (z), dat->off_idx); /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */ - i = svmul_x (pg, i, 2); - const float32_t *p = &__erfcf_data.tab[0].erfc - 2 * dat->off_arr; + i = svlsl_x (svptrue_b32 (), i, 1); + const float32_t *p = &__v_erfcf_data.tab[0].erfc - 2 * dat->off_arr; svfloat32_t erfcr = svld1_gather_index (pg, p, i); svfloat32_t scale = svld1_gather_index (pg, p + 1, i); /* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */ svfloat32_t r = svsub_x (pg, z, shift); svfloat32_t d = svsub_x (pg, a, r); - svfloat32_t d2 = svmul_x (pg, d, d); - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t d2 = svmul_x (svptrue_b32 (), d, d); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); svfloat32_t coeffs = svld1rq (svptrue_b32 (), &dat->third); - svfloat32_t third = svdup_lane (coeffs, 0); svfloat32_t p1 = r; - svfloat32_t p2 = svmls_lane (third, r2, coeffs, 1); - svfloat32_t p3 = svmul_x (pg, r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0)); + svfloat32_t p2 = svmls_lane (sv_f32 (dat->third), r2, coeffs, 1); + svfloat32_t p3 + = svmul_x (svptrue_b32 (), r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0)); svfloat32_t p4 = svmla_lane (sv_f32 (dat->two_over_five), r2, coeffs, 2); p4 = svmls_x (pg, sv_f32 (dat->tenth), r2, p4); diff --git a/sysdeps/aarch64/fpu/erff_advsimd.c b/sysdeps/aarch64/fpu/erff_advsimd.c index f2fe6ff236..db39e789b6 100644 --- a/sysdeps/aarch64/fpu/erff_advsimd.c +++ b/sysdeps/aarch64/fpu/erff_advsimd.c @@ -47,10 +47,10 @@ static inline struct entry lookup (uint32x4_t i) { struct entry e; - float32x2_t t0 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 0)].erf); - float32x2_t t1 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 1)].erf); - float32x2_t t2 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 2)].erf); - float32x2_t t3 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 3)].erf); + float32x2_t t0 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 0)].erf); + float32x2_t t1 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 1)].erf); + float32x2_t t2 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 2)].erf); + float32x2_t t3 = vld1_f32 (&__v_erff_data.tab[vgetq_lane_u32 (i, 3)].erf); float32x4_t e1 = vcombine_f32 (t0, t1); float32x4_t e2 = vcombine_f32 (t2, t3); e.erf = vuzp1q_f32 (e1, e2); diff --git a/sysdeps/aarch64/fpu/erff_data.c b/sysdeps/aarch64/fpu/erff_data.c index 9a32940915..da38aed205 100644 --- a/sysdeps/aarch64/fpu/erff_data.c +++ b/sysdeps/aarch64/fpu/erff_data.c @@ -19,14 +19,14 @@ #include "vecmath_config.h" -/* Lookup table used in erff. +/* Lookup table used in vector erff. For each possible rounded input r (multiples of 1/128), between r = 0.0 and r = 4.0 (513 values): - - the first entry __erff_data.tab.erf contains the values of erf(r), - - the second entry __erff_data.tab.scale contains the values of + - the first entry __v_erff_data.tab.erf contains the values of erf(r), + - the second entry __v_erff_data.tab.scale contains the values of 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct erff_data __erff_data = { +const struct v_erff_data __v_erff_data = { .tab = { { 0x0.000000p+0, 0x1.20dd76p+0 }, { 0x1.20dbf4p-7, 0x1.20d8f2p+0 }, { 0x1.20d770p-6, 0x1.20cb68p+0 }, diff --git a/sysdeps/aarch64/fpu/erff_sve.c b/sysdeps/aarch64/fpu/erff_sve.c index 38f00db9be..0e382eb09a 100644 --- a/sysdeps/aarch64/fpu/erff_sve.c +++ b/sysdeps/aarch64/fpu/erff_sve.c @@ -62,18 +62,17 @@ svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg) svfloat32_t shift = sv_f32 (dat->shift); svfloat32_t z = svadd_x (pg, a, shift); - svuint32_t i - = svsub_x (pg, svreinterpret_u32 (z), svreinterpret_u32 (shift)); - - /* Saturate lookup index. */ - i = svsel (a_ge_max, sv_u32 (512), i); + svuint32_t i = svand_x (pg, svreinterpret_u32 (z), 0xfff); + i = svadd_x (pg, i, i); /* r and erf(r) set to 0 for |x| below min. */ svfloat32_t r = svsub_z (a_gt_min, z, shift); - svfloat32_t erfr = svld1_gather_index (a_gt_min, __sv_erff_data.erf, i); + svfloat32_t erfr + = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].erf, i); /* scale set to 2/sqrt(pi) for |x| below min. */ - svfloat32_t scale = svld1_gather_index (a_gt_min, __sv_erff_data.scale, i); + svfloat32_t scale + = svld1_gather_index (a_gt_min, &__v_erff_data.tab[0].scale, i); scale = svsel (a_gt_min, scale, sv_f32 (dat->scale)); /* erf(x) ~ erf(r) + scale * d * (1 - r * d + 1/3 * d^2). */ diff --git a/sysdeps/aarch64/fpu/exp10_sve.c b/sysdeps/aarch64/fpu/exp10_sve.c index ddf64708cb..bfd3fb9e19 100644 --- a/sysdeps/aarch64/fpu/exp10_sve.c +++ b/sysdeps/aarch64/fpu/exp10_sve.c @@ -18,21 +18,23 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" #define SpecialBound 307.0 /* floor (log10 (2^1023)). */ static const struct data { - double poly[5]; + double c1, c3, c2, c4, c0; double shift, log10_2, log2_10_hi, log2_10_lo, scale_thres, special_bound; } data = { /* Coefficients generated using Remez algorithm. rel error: 0x1.9fcb9b3p-60 abs error: 0x1.a20d9598p-60 in [ -log10(2)/128, log10(2)/128 ] max ulp err 0.52 +0.5. */ - .poly = { 0x1.26bb1bbb55516p1, 0x1.53524c73cd32ap1, 0x1.0470591daeafbp1, - 0x1.2bd77b1361ef6p0, 0x1.142b5d54e9621p-1 }, + .c0 = 0x1.26bb1bbb55516p1, + .c1 = 0x1.53524c73cd32ap1, + .c2 = 0x1.0470591daeafbp1, + .c3 = 0x1.2bd77b1361ef6p0, + .c4 = 0x1.142b5d54e9621p-1, /* 1.5*2^46+1023. This value is further explained below. */ .shift = 0x1.800000000ffc0p+46, .log10_2 = 0x1.a934f0979a371p1, /* 1/log2(10). */ @@ -70,9 +72,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n, /* |n| > 1280 => 2^(n) overflows. */ svbool_t p_cmp = svacgt (pg, n, d->scale_thres); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); return svsel (p_cmp, r1, r0); } @@ -103,11 +105,14 @@ svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg) comes at significant performance cost. */ svuint64_t u = svreinterpret_u64 (z); svfloat64_t scale = svexpa (u); - + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); /* Approximate exp10(r) using polynomial. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = svmla_x (pg, svmul_x (pg, r, d->poly[0]), r2, - sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly + 1)); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + svfloat64_t p14 = svmla_x (pg, p12, p34, r2); + + svfloat64_t y = svmla_x (pg, svmul_x (svptrue_b64 (), r, d->c0), r2, p14); /* Assemble result as exp10(x) = 2^n * exp10(r). If |x| > SpecialBound multiplication may overflow, so use special case routine. */ diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c index cf53e73290..55d9cd83f2 100644 --- a/sysdeps/aarch64/fpu/exp10f_advsimd.c +++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c @@ -18,16 +18,15 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" #define ScaleBound 192.0f static const struct data { - float32x4_t poly[5]; - float log10_2_and_inv[4]; - float32x4_t shift; - + float32x4_t c0, c1, c3; + float log10_2_high, log10_2_low, c2, c4; + float32x4_t inv_log10_2, special_bound; + uint32x4_t exponent_bias, special_offset, special_bias; #if !WANT_SIMD_EXCEPT float32x4_t scale_thresh; #endif @@ -37,19 +36,24 @@ static const struct data rel error: 0x1.89dafa3p-24 abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2] maxerr: 1.85943 +0.5 ulp. */ - .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f), - V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) }, - .shift = V4 (0x1.8p23f), - - /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */ - .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 }, + .c0 = V4 (0x1.26bb16p+1f), + .c1 = V4 (0x1.5350d2p+1f), + .c2 = 0x1.04744ap+1f, + .c3 = V4 (0x1.2d8176p+0f), + .c4 = 0x1.12b41ap-1f, + .inv_log10_2 = V4 (0x1.a934fp+1), + .log10_2_high = 0x1.344136p-2, + .log10_2_low = 0x1.ec10cp-27, + /* rint (log2 (2^127 / (1 + sqrt (2)))). */ + .special_bound = V4 (126.0f), + .exponent_bias = V4 (0x3f800000), + .special_offset = V4 (0x82000000), + .special_bias = V4 (0x7f000000), #if !WANT_SIMD_EXCEPT .scale_thresh = V4 (ScaleBound) #endif }; -#define ExponentBias v_u32 (0x3f800000) - #if WANT_SIMD_EXCEPT # define SpecialBound 38.0f /* rint(log10(2^127)). */ @@ -67,17 +71,15 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) #else -# define SpecialBound 126.0f /* rint (log2 (2^127 / (1 + sqrt (2)))). */ -# define SpecialOffset v_u32 (0x82000000) -# define SpecialBias v_u32 (0x7f000000) +# define SpecialBound 126.0f static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t scale, const struct data *d) { /* 2^n may overflow, break it up into s1*s2. */ - uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset); - float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias)); + uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset); + float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh); float32x4_t r2 = vmulq_f32 (s1, s1); @@ -112,23 +114,23 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x) /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)), with poly(r) in [1/sqrt(2), sqrt(2)] and x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */ - float32x4_t log10_2_and_inv = vld1q_f32 (d->log10_2_and_inv); - float32x4_t z = vfmaq_laneq_f32 (d->shift, x, log10_2_and_inv, 0); - float32x4_t n = vsubq_f32 (z, d->shift); - float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_and_inv, 1); - r = vfmsq_laneq_f32 (r, n, log10_2_and_inv, 2); - uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); + float32x4_t log10_2_c24 = vld1q_f32 (&d->log10_2_high); + float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_log10_2)); + float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_c24, 0); + r = vfmaq_laneq_f32 (r, n, log10_2_c24, 1); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (n)), 23); - float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias)); + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT - uint32x4_t cmp = vcagtq_f32 (n, v_f32 (SpecialBound)); + uint32x4_t cmp = vcagtq_f32 (n, d->special_bound); #endif float32x4_t r2 = vmulq_f32 (r, r); - float32x4_t poly - = vfmaq_f32 (vmulq_f32 (r, d->poly[0]), - v_pairwise_poly_3_f32 (r, r2, d->poly + 1), r2); + float32x4_t p12 = vfmaq_laneq_f32 (d->c1, r, log10_2_c24, 2); + float32x4_t p34 = vfmaq_laneq_f32 (d->c3, r, log10_2_c24, 3); + float32x4_t p14 = vfmaq_f32 (p12, r2, p34); + float32x4_t poly = vfmaq_f32 (vmulq_f32 (r, d->c0), p14, r2); if (__glibc_unlikely (v_any_u32 (cmp))) #if WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c index e09b2f3b27..0a4c264506 100644 --- a/sysdeps/aarch64/fpu/exp10f_sve.c +++ b/sysdeps/aarch64/fpu/exp10f_sve.c @@ -18,74 +18,70 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" -/* For x < -SpecialBound, the result is subnormal and not handled correctly by - FEXPA. */ -#define SpecialBound 37.9 +/* For x < -Thres (-log10(2^126)), the result is subnormal and not handled + correctly by FEXPA. */ +#define Thres 0x1.2f702p+5 static const struct data { - float poly[5]; - float shift, log10_2, log2_10_hi, log2_10_lo, special_bound; + float log10_2, log2_10_hi, log2_10_lo, c1; + float c0, shift, thres; } data = { /* Coefficients generated using Remez algorithm with minimisation of relative - error. - rel error: 0x1.89dafa3p-24 - abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2] - maxerr: 0.52 +0.5 ulp. */ - .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f, - 0x1.12b41ap-1f }, + error. */ + .c0 = 0x1.26bb62p1, + .c1 = 0x1.53524cp1, /* 1.5*2^17 + 127, a shift value suitable for FEXPA. */ - .shift = 0x1.903f8p17f, + .shift = 0x1.803f8p17f, .log10_2 = 0x1.a934fp+1, .log2_10_hi = 0x1.344136p-2, .log2_10_lo = -0x1.ec10cp-27, - .special_bound = SpecialBound, + .thres = Thres, }; -static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) -{ - return sv_call_f32 (exp10f, x, y, special); -} - -/* Single-precision SVE exp10f routine. Implements the same algorithm - as AdvSIMD exp10f. - Worst case error is 1.02 ULPs. - _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1 - want 0x1.ba5f9cp-1. */ -svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) +static inline svfloat32_t +sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) { - const struct data *d = ptr_barrier (&data); /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)), with poly(r) in [1/sqrt(2), sqrt(2)] and x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */ - - /* Load some constants in quad-word chunks to minimise memory access (last - lane is wasted). */ - svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log10_2); /* n = round(x/(log10(2)/N)). */ svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0); + svfloat32_t z = svmla_lane (shift, x, lane_consts, 0); svfloat32_t n = svsub_x (pg, z, shift); /* r = x - n*log10(2)/N. */ - svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1); - r = svmls_lane (r, n, log10_2_and_inv, 2); + svfloat32_t r = x; + r = svmls_lane (r, n, lane_consts, 1); + r = svmls_lane (r, n, lane_consts, 2); - svbool_t special = svacgt (pg, x, d->special_bound); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* Polynomial evaluation: poly(r) ~ exp10(r)-1. */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t poly - = svmla_x (pg, svmul_x (pg, r, d->poly[0]), - sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2); + svfloat32_t poly = svmla_lane (sv_f32 (d->c0), r, lane_consts, 3); + poly = svmul_x (pg, poly, r); + return svmla_x (pg, scale, scale, poly); +} - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (pg, scale, scale, poly), special); +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + return sv_call_f32 (exp10f, x, sv_exp10f_inline (x, svptrue_b32 (), d), + special); +} - return svmla_x (pg, scale, scale, poly); +/* Single-precision SVE exp10f routine. Based on the FEXPA instruction. + Worst case error is 1.10 ULP. + _ZGVsMxv_exp10f (0x1.cc76dep+3) got 0x1.be0172p+47 + want 0x1.be017p+47. */ +svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t special = svacgt (pg, x, d->thres); + if (__glibc_unlikely (svptest_any (special, special))) + return special_case (x, special, d); + return sv_exp10f_inline (x, pg, d); } diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c index 22848ebfa5..ed11423e45 100644 --- a/sysdeps/aarch64/fpu/exp2_sve.c +++ b/sysdeps/aarch64/fpu/exp2_sve.c @@ -18,25 +18,22 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" - -#define N (1 << V_EXP_TABLE_BITS) #define BigBound 1022 #define UOFlowBound 1280 static const struct data { - double poly[4]; + double c2, c4; + double c0, c1, c3; double shift, big_bound, uoflow_bound; } data = { /* Coefficients are computed using Remez algorithm with minimisation of the absolute error. */ - .poly = { 0x1.62e42fefa3686p-1, 0x1.ebfbdff82c241p-3, 0x1.c6b09b16de99ap-5, - 0x1.3b2abf5571ad8p-7 }, - .shift = 0x1.8p52 / N, - .uoflow_bound = UOFlowBound, - .big_bound = BigBound, + .c0 = 0x1.62e42fefa39efp-1, .c1 = 0x1.ebfbdff82a31bp-3, + .c2 = 0x1.c6b08d706c8a5p-5, .c3 = 0x1.3b2ad2ff7d2f3p-7, + .c4 = 0x1.5d8761184beb3p-10, .shift = 0x1.800000000ffc0p+46, + .uoflow_bound = UOFlowBound, .big_bound = BigBound, }; #define SpecialOffset 0x6000000000000000 /* 0x1p513. */ @@ -65,47 +62,52 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n, svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b)); /* |n| > 1280 => 2^(n) overflows. */ - svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound); + svbool_t p_cmp = svacle (pg, n, d->uoflow_bound); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); - return svsel (p_cmp, r1, r0); + return svsel (p_cmp, r0, r1); } /* Fast vector implementation of exp2. - Maximum measured error is 1.65 ulp. - _ZGVsMxv_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1 - want 0x1.f8db0d4df721dp-1. */ + Maximum measured error is 0.52 + 0.5 ulp. + _ZGVsMxv_exp2 (0x1.3b72ad5b701bfp-1) got 0x1.8861641b49e08p+0 + want 0x1.8861641b49e07p+0. */ svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - svbool_t no_big_scale = svacle (pg, x, d->big_bound); - svbool_t special = svnot_z (pg, no_big_scale); - - /* Reduce x to k/N + r, where k is integer and r in [-1/2N, 1/2N]. */ - svfloat64_t shift = sv_f64 (d->shift); - svfloat64_t kd = svadd_x (pg, x, shift); - svuint64_t ki = svreinterpret_u64 (kd); - /* kd = k/N. */ - kd = svsub_x (pg, kd, shift); - svfloat64_t r = svsub_x (pg, x, kd); - - /* scale ~= 2^(k/N). */ - svuint64_t idx = svand_x (pg, ki, N - 1); - svuint64_t sbits = svld1_gather_index (pg, __v_exp_data, idx); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS); - svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top)); + svbool_t special = svacge (pg, x, d->big_bound); + + svfloat64_t z = svadd_x (svptrue_b64 (), x, d->shift); + svfloat64_t n = svsub_x (svptrue_b64 (), z, d->shift); + svfloat64_t r = svsub_x (svptrue_b64 (), x, n); + + svfloat64_t scale = svexpa (svreinterpret_u64 (z)); + + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); /* Approximate exp2(r) using polynomial. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t p = sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly); - svfloat64_t y = svmul_x (pg, r, p); + /* y = exp2(r) - 1 ~= r * (C0 + C1 r + C2 r^2 + C3 r^3 + C4 r^4). */ + svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + svfloat64_t p = svmla_x (pg, p12, p34, r2); + p = svmad_x (pg, p, r, d->c0); + svfloat64_t y = svmul_x (svptrue_b64 (), r, p); /* Assemble exp2(x) = exp2(r) * scale. */ if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (pg, scale, y, kd, d); + { + /* FEXPA zeroes the sign bit, however the sign is meaningful to the + special case function so needs to be copied. + e = sign bit of u << 46. */ + svuint64_t e = svand_x (pg, svlsl_x (pg, svreinterpret_u64 (z), 46), + 0x8000000000000000); + scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale))); + return special_case (pg, scale, y, n, d); + } + return svmla_x (pg, scale, scale, y); } diff --git a/sysdeps/aarch64/fpu/exp2f_advsimd.c b/sysdeps/aarch64/fpu/exp2f_advsimd.c index 69e0b193a1..a4220da63c 100644 --- a/sysdeps/aarch64/fpu/exp2f_advsimd.c +++ b/sysdeps/aarch64/fpu/exp2f_advsimd.c @@ -21,24 +21,28 @@ static const struct data { - float32x4_t poly[5]; - uint32x4_t exponent_bias; + float32x4_t c1, c3; + uint32x4_t exponent_bias, special_offset, special_bias; #if !WANT_SIMD_EXCEPT - float32x4_t special_bound, scale_thresh; + float32x4_t scale_thresh, special_bound; #endif + float c0, c2, c4, zero; } data = { /* maxerr: 1.962 ulp. */ - .poly = { V4 (0x1.59977ap-10f), V4 (0x1.3ce9e4p-7f), V4 (0x1.c6bd32p-5f), - V4 (0x1.ebf9bcp-3f), V4 (0x1.62e422p-1f) }, + .c0 = 0x1.59977ap-10f, + .c1 = V4 (0x1.3ce9e4p-7f), + .c2 = 0x1.c6bd32p-5f, + .c3 = V4 (0x1.ebf9bcp-3f), + .c4 = 0x1.62e422p-1f, .exponent_bias = V4 (0x3f800000), + .special_offset = V4 (0x82000000), + .special_bias = V4 (0x7f000000), #if !WANT_SIMD_EXCEPT .special_bound = V4 (126.0f), .scale_thresh = V4 (192.0f), #endif }; -#define C(i) d->poly[i] - #if WANT_SIMD_EXCEPT # define TinyBound v_u32 (0x20000000) /* asuint (0x1p-63). */ @@ -55,16 +59,13 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) #else -# define SpecialOffset v_u32 (0x82000000) -# define SpecialBias v_u32 (0x7f000000) - static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t scale, const struct data *d) { /* 2^n may overflow, break it up into s1*s2. */ - uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset); - float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias)); + uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset); + float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh); float32x4_t r2 = vmulq_f32 (s1, s1); @@ -80,13 +81,11 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp2) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, r, r2, scale, p, q, poly; - uint32x4_t cmp, e; #if WANT_SIMD_EXCEPT /* asuint(|x|) - TinyBound >= BigBound - TinyBound. */ uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x)); - cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound); + uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound); float32x4_t xm = x; /* If any lanes are special, mask them with 1 and retain a copy of x to allow special_case to fix special lanes later. This is only necessary if fenv @@ -95,23 +94,24 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp2) (float32x4_t x) x = vbslq_f32 (cmp, v_f32 (1), x); #endif - /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = n + r, with r in [-1/2, 1/2]. */ - n = vrndaq_f32 (x); - r = vsubq_f32 (x, n); - e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23); - scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); + /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] + x = n + r, with r in [-1/2, 1/2]. */ + float32x4_t n = vrndaq_f32 (x); + float32x4_t r = vsubq_f32 (x, n); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23); + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT - cmp = vcagtq_f32 (n, d->special_bound); + uint32x4_t cmp = vcagtq_f32 (n, d->special_bound); #endif - r2 = vmulq_f32 (r, r); - p = vfmaq_f32 (C (1), C (0), r); - q = vfmaq_f32 (C (3), C (2), r); + float32x4_t c024 = vld1q_f32 (&d->c0); + float32x4_t r2 = vmulq_f32 (r, r); + float32x4_t p = vfmaq_laneq_f32 (d->c1, r, c024, 0); + float32x4_t q = vfmaq_laneq_f32 (d->c3, r, c024, 1); q = vfmaq_f32 (q, p, r2); - p = vmulq_f32 (C (4), r); - poly = vfmaq_f32 (p, q, r2); + p = vmulq_laneq_f32 (r, c024, 2); + float32x4_t poly = vfmaq_f32 (p, q, r2); if (__glibc_unlikely (v_any_u32 (cmp))) #if WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c index 8a686e3e05..cf01820288 100644 --- a/sysdeps/aarch64/fpu/exp2f_sve.c +++ b/sysdeps/aarch64/fpu/exp2f_sve.c @@ -18,60 +18,57 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" #define Thres 0x1.5d5e2ap+6f static const struct data { - float poly[5]; - float shift, thres; + float c0, c1, shift, thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. */ - .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f, - 0x1.59977ap-10f }, + /* Coefficients generated using Remez algorithm with minimisation of relative + error. */ + .c0 = 0x1.62e485p-1, + .c1 = 0x1.ebfbe0p-3, /* 1.5*2^17 + 127. */ - .shift = 0x1.903f8p17f, + .shift = 0x1.803f8p17f, /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled correctly by FEXPA. */ .thres = Thres, }; -static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) -{ - return sv_call_f32 (exp2f, x, y, special); -} - -/* Single-precision SVE exp2f routine. Implements the same algorithm - as AdvSIMD exp2f. - Worst case error is 1.04 ULPs. - SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0 - want 0x1.ba7ebp+0. */ -svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) +static inline svfloat32_t +sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) { - const struct data *d = ptr_barrier (&data); /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = n + r, with r in [-1/2, 1/2]. */ - svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svadd_x (pg, x, shift); - svfloat32_t n = svsub_x (pg, z, shift); - svfloat32_t r = svsub_x (pg, x, n); + svfloat32_t z = svadd_x (svptrue_b32 (), x, d->shift); + svfloat32_t n = svsub_x (svptrue_b32 (), z, d->shift); + svfloat32_t r = svsub_x (svptrue_b32 (), x, n); - svbool_t special = svacgt (pg, x, d->thres); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - /* Polynomial evaluation: poly(r) ~ exp2(r)-1. - Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for - coefficients 1 to 4, and apply most significant coefficient directly. */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1); - svfloat32_t p0 = svmul_x (pg, r, d->poly[0]); - svfloat32_t poly = svmla_x (pg, p0, r2, p14); - - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (pg, scale, scale, poly), special); + svfloat32_t poly = svmla_x (pg, sv_f32 (d->c0), r, sv_f32 (d->c1)); + poly = svmul_x (svptrue_b32 (), poly, r); return svmla_x (pg, scale, scale, poly); } + +static svfloat32_t NOINLINE +special_case (svfloat32_t x, svbool_t special, const struct data *d) +{ + return sv_call_f32 (exp2f, x, sv_exp2f_inline (x, svptrue_b32 (), d), + special); +} + +/* Single-precision SVE exp2f routine, based on the FEXPA instruction. + Worst case error is 1.09 ULPs. + _ZGVsMxv_exp2f (0x1.9a2a94p-1) got 0x1.be1054p+0 + want 0x1.be1052p+0. */ +svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t special = svacgt (pg, x, d->thres); + if (__glibc_unlikely (svptest_any (special, special))) + return special_case (x, special, d); + return sv_exp2f_inline (x, pg, d); +} diff --git a/sysdeps/aarch64/fpu/exp_sve.c b/sysdeps/aarch64/fpu/exp_sve.c index aabaaa1d61..b2421d493f 100644 --- a/sysdeps/aarch64/fpu/exp_sve.c +++ b/sysdeps/aarch64/fpu/exp_sve.c @@ -21,12 +21,15 @@ static const struct data { - double poly[4]; + double c0, c2; + double c1, c3; double ln2_hi, ln2_lo, inv_ln2, shift, thres; + } data = { - .poly = { /* ulp error: 0.53. */ - 0x1.fffffffffdbcdp-2, 0x1.555555555444cp-3, 0x1.555573c6a9f7dp-5, - 0x1.1111266d28935p-7 }, + .c0 = 0x1.fffffffffdbcdp-2, + .c1 = 0x1.555555555444cp-3, + .c2 = 0x1.555573c6a9f7dp-5, + .c3 = 0x1.1111266d28935p-7, .ln2_hi = 0x1.62e42fefa3800p-1, .ln2_lo = 0x1.ef35793c76730p-45, /* 1/ln2. */ @@ -36,7 +39,6 @@ static const struct data .thres = 704.0, }; -#define C(i) sv_f64 (d->poly[i]) #define SpecialOffset 0x6000000000000000 /* 0x1p513. */ /* SpecialBias1 + SpecialBias1 = asuint(1.0). */ #define SpecialBias1 0x7000000000000000 /* 0x1p769. */ @@ -56,20 +58,20 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n) svuint64_t b = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0. */ - /* Set s1 to generate overflow depending on sign of exponent n. */ - svfloat64_t s1 = svreinterpret_f64 ( - svsubr_x (pg, b, SpecialBias1)); /* 0x70...0 - b. */ - /* Offset s to avoid overflow in final result if n is below threshold. */ + /* Set s1 to generate overflow depending on sign of exponent n, + ie. s1 = 0x70...0 - b. */ + svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1)); + /* Offset s to avoid overflow in final result if n is below threshold. + ie. s2 = as_u64 (s) - 0x3010...0 + b. */ svfloat64_t s2 = svreinterpret_f64 ( - svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), - b)); /* as_u64 (s) - 0x3010...0 + b. */ + svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b)); /* |n| > 1280 => 2^(n) overflows. */ svbool_t p_cmp = svacgt (pg, n, 1280.0); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); return svsel (p_cmp, r1, r0); } @@ -103,16 +105,16 @@ svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg) svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); svuint64_t u = svreinterpret_u64 (z); svfloat64_t n = svsub_x (pg, z, d->shift); - + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); /* r = x - n * ln2, r is in [-ln2/(2N), ln2/(2N)]. */ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); svfloat64_t r = svmls_lane (x, n, ln2, 0); r = svmls_lane (r, n, ln2, 1); /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t p01 = svmla_x (pg, C (0), C (1), r); - svfloat64_t p23 = svmla_x (pg, C (2), C (3), r); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1); svfloat64_t p04 = svmla_x (pg, p01, p23, r2); svfloat64_t y = svmla_x (pg, r, p04, r2); diff --git a/sysdeps/aarch64/fpu/expf_advsimd.c b/sysdeps/aarch64/fpu/expf_advsimd.c index 99d2e647aa..70f137e2e5 100644 --- a/sysdeps/aarch64/fpu/expf_advsimd.c +++ b/sysdeps/aarch64/fpu/expf_advsimd.c @@ -21,21 +21,25 @@ static const struct data { - float32x4_t poly[5]; - float32x4_t shift, inv_ln2, ln2_hi, ln2_lo; - uint32x4_t exponent_bias; + float32x4_t c1, c3, c4, inv_ln2; + float ln2_hi, ln2_lo, c0, c2; + uint32x4_t exponent_bias, special_offset, special_bias; #if !WANT_SIMD_EXCEPT float32x4_t special_bound, scale_thresh; #endif } data = { /* maxerr: 1.45358 +0.5 ulp. */ - .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), - V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, - .shift = V4 (0x1.8p23f), + .c0 = 0x1.0e4020p-7f, + .c1 = V4 (0x1.573e2ep-5f), + .c2 = 0x1.555e66p-3f, + .c3 = V4 (0x1.fffdb6p-2f), + .c4 = V4 (0x1.ffffecp-1f), .inv_ln2 = V4 (0x1.715476p+0f), - .ln2_hi = V4 (0x1.62e4p-1f), - .ln2_lo = V4 (0x1.7f7d1cp-20f), + .ln2_hi = 0x1.62e4p-1f, + .ln2_lo = 0x1.7f7d1cp-20f, .exponent_bias = V4 (0x3f800000), + .special_offset = V4 (0x82000000), + .special_bias = V4 (0x7f000000), #if !WANT_SIMD_EXCEPT .special_bound = V4 (126.0f), .scale_thresh = V4 (192.0f), @@ -60,19 +64,17 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) #else -# define SpecialOffset v_u32 (0x82000000) -# define SpecialBias v_u32 (0x7f000000) - static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t scale, const struct data *d) { /* 2^n may overflow, break it up into s1*s2. */ - uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset); - float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias)); + uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset); + float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias)); float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b)); uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh); float32x4_t r2 = vmulq_f32 (s1, s1); + // (s2 + p*s2)*s1 = s2(p+1)s1 float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1); /* Similar to r1 but avoids double rounding in the subnormal range. */ float32x4_t r0 = vfmaq_f32 (scale, poly, scale); @@ -85,12 +87,11 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, r, r2, scale, p, q, poly, z; - uint32x4_t cmp, e; + float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi); #if WANT_SIMD_EXCEPT /* asuint(x) - TinyBound >= BigBound - TinyBound. */ - cmp = vcgeq_u32 ( + uint32x4_t cmp = vcgeq_u32 ( vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)), TinyBound), SpecialBound); @@ -104,23 +105,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - z = vfmaq_f32 (d->shift, x, d->inv_ln2); - n = vsubq_f32 (z, d->shift); - r = vfmsq_f32 (x, n, d->ln2_hi); - r = vfmsq_f32 (r, n, d->ln2_lo); - e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); - scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); + float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2)); + float32x4_t r = vfmsq_laneq_f32 (x, n, ln2_c02, 0); + r = vfmsq_laneq_f32 (r, n, ln2_c02, 1); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); #if !WANT_SIMD_EXCEPT - cmp = vcagtq_f32 (n, d->special_bound); + uint32x4_t cmp = vcagtq_f32 (n, d->special_bound); #endif - r2 = vmulq_f32 (r, r); - p = vfmaq_f32 (C (1), C (0), r); - q = vfmaq_f32 (C (3), C (2), r); + float32x4_t r2 = vmulq_f32 (r, r); + float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2); + float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3); q = vfmaq_f32 (q, p, r2); - p = vmulq_f32 (C (4), r); - poly = vfmaq_f32 (p, q, r2); + p = vmulq_f32 (d->c4, r); + float32x4_t poly = vfmaq_f32 (p, q, r2); if (__glibc_unlikely (v_any_u32 (cmp))) #if WANT_SIMD_EXCEPT diff --git a/sysdeps/aarch64/fpu/expf_sve.c b/sysdeps/aarch64/fpu/expf_sve.c index 3ba79bc4f1..aee86a2033 100644 --- a/sysdeps/aarch64/fpu/expf_sve.c +++ b/sysdeps/aarch64/fpu/expf_sve.c @@ -18,72 +18,36 @@ . */ #include "sv_math.h" +#include "sv_expf_inline.h" + +/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled + correctly by FEXPA. */ +#define Thres 0x1.5d5e2ap+6f static const struct data { - float poly[5]; - float inv_ln2, ln2_hi, ln2_lo, shift, thres; + struct sv_expf_data d; + float thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. */ - .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, - 0x1.0e4020p-7f }, - .inv_ln2 = 0x1.715476p+0f, - .ln2_hi = 0x1.62e4p-1f, - .ln2_lo = 0x1.7f7d1cp-20f, - /* 1.5*2^17 + 127. */ - .shift = 0x1.903f8p17f, - /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled - correctly by FEXPA. */ - .thres = 0x1.5d5e2ap+6f, + .d = SV_EXPF_DATA, + .thres = Thres, }; -#define C(i) sv_f32 (d->poly[i]) -#define ExponentBias 0x3f800000 - static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d) { - return sv_call_f32 (expf, x, y, special); + return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special); } /* Optimised single-precision SVE exp function. - Worst-case error is 1.04 ulp: - SV_NAME_F1 (exp)(0x1.a8eda4p+1) got 0x1.ba74bcp+4 - want 0x1.ba74bap+4. */ + Worst-case error is 0.88 +0.50 ULP: + _ZGVsMxv_expf(-0x1.bba276p-6) got 0x1.f25288p-1 + want 0x1.f2528ap-1. */ svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - - /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - - /* Load some constants in quad-word chunks to minimise memory access (last - lane is wasted). */ - svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2); - - /* n = round(x/(ln2/N)). */ - svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0); - svfloat32_t n = svsub_x (pg, z, d->shift); - - /* r = x - n*ln2/N. */ - svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1); - r = svmls_lane (r, n, invln2_and_ln2, 2); - - /* scale = 2^(n/N). */ svbool_t is_special_case = svacgt (pg, x, d->thres); - svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - - /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ - svfloat32_t p12 = svmla_x (pg, C (1), C (2), r); - svfloat32_t p34 = svmla_x (pg, C (3), C (4), r); - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_x (pg, r, C (0)); - svfloat32_t poly = svmla_x (pg, p0, r2, p14); - if (__glibc_unlikely (svptest_any (pg, is_special_case))) - return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case); - - return svmla_x (pg, scale, scale, poly); + return special_case (x, is_special_case, &d->d); + return expf_inline (x, pg, &d->d); } diff --git a/sysdeps/aarch64/fpu/expm1_sve.c b/sysdeps/aarch64/fpu/expm1_sve.c index c933cf9c0e..4c35e0341d 100644 --- a/sysdeps/aarch64/fpu/expm1_sve.c +++ b/sysdeps/aarch64/fpu/expm1_sve.c @@ -18,82 +18,164 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" -#define SpecialBound 0x1.62b7d369a5aa9p+9 -#define ExponentBias 0x3ff0000000000000 +#define FexpaBound 0x1.4cb5ecef28adap-3 /* 15*ln2/64. */ +#define SpecialBound 0x1.628c2855bfaddp+9 /* ln(2^(1023 + 1/128)). */ static const struct data { - double poly[11]; - double shift, inv_ln2, special_bound; - /* To be loaded in one quad-word. */ + double c2, c4; + double inv_ln2; double ln2_hi, ln2_lo; + double c0, c1, c3; + double shift, thres; + uint64_t expm1_data[32]; } data = { - /* Generated using fpminimax. */ - .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5, - 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, 0x1.a01a01affa35dp-13, - 0x1.a01a018b4ecbbp-16, 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22, - 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, }, - - .special_bound = SpecialBound, - .inv_ln2 = 0x1.71547652b82fep0, - .ln2_hi = 0x1.62e42fefa39efp-1, - .ln2_lo = 0x1.abc9e3b39803fp-56, - .shift = 0x1.8p52, + /* Table emulating FEXPA - 1, for values of FEXPA close to 1. + The table holds values of 2^(i/64) - 1, computed in arbitrary precision. + The first half of the table stores values associated to i from 0 to 15. + The second half of the table stores values associated to i from 0 to -15. */ + .expm1_data = { + 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901, + 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb, + 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2, + 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a, + 0x0000000000000000, 0xbfc331751ec3a814, 0xbfc20224341286e4, 0xbfc0cf85bed0f8b7, + 0xbfbf332113d56b1f, 0xbfbcc0768d4175a6, 0xbfba46f918837cb7, 0xbfb7c695afc3b424, + 0xbfb53f391822dbc7, 0xbfb2b0cfe1266bd4, 0xbfb01b466423250a, 0xbfaafd11874c009e, + 0xbfa5b505d5b6f268, 0xbfa05e4119ea5d89, 0xbf95f134923757f3, 0xbf860f9f985bc9f4, + }, + + /* Generated using Remez, in [-log(2)/128, log(2)/128]. */ + .c0 = 0x1p-1, + .c1 = 0x1.55555555548f9p-3, + .c2 = 0x1.5555555554c22p-5, + .c3 = 0x1.111123aaa2fb2p-7, + .c4 = 0x1.6c16d77d98e5bp-10, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .inv_ln2 = 0x1.71547652b82fep+0, + .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */ + .thres = SpecialBound, }; -static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t pg) +#define SpecialOffset 0x6000000000000000 /* 0x1p513. */ +/* SpecialBias1 + SpecialBias1 = asuint(1.0). */ +#define SpecialBias1 0x7000000000000000 /* 0x1p769. */ +#define SpecialBias2 0x3010000000000000 /* 0x1p-254. */ + +static NOINLINE svfloat64_t +special_case (svbool_t pg, svfloat64_t y, svfloat64_t s, svfloat64_t p, + svfloat64_t n) { - return sv_call_f64 (expm1, x, y, pg); + /* s=2^n may overflow, break it up into s=s1*s2, + such that exp = s + s*y can be computed as s1*(s2+s2*y) + and s1*s1 overflows only if n>0. */ + + /* If n<=0 then set b to 0x6, 0 otherwise. */ + svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0. */ + svuint64_t b + = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0. */ + + /* Set s1 to generate overflow depending on sign of exponent n, + ie. s1 = 0x70...0 - b. */ + svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1)); + /* Offset s to avoid overflow in final result if n is below threshold. + ie. s2 = as_u64 (s) - 0x3010...0 + b. */ + svfloat64_t s2 = svreinterpret_f64 ( + svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b)); + + /* |n| > 1280 => 2^(n) overflows. */ + svbool_t p_cmp = svacgt (pg, n, 1280.0); + + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); + svfloat64_t r2 = svmla_x (pg, s2, s2, p); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); + + svbool_t is_safe = svacle (pg, n, 1023); /* Only correct special lanes. */ + return svsel (is_safe, y, svsub_x (pg, svsel (p_cmp, r1, r0), 1.0)); } -/* Double-precision vector exp(x) - 1 function. - The maximum error observed error is 2.18 ULP: - _ZGVsMxv_expm1(0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2 - want 0x1.a8b9ea8d66e2p-2. */ +/* FEXPA based SVE expm1 algorithm. + Maximum measured error is 2.81 + 0.5 ULP: + _ZGVsMxv_expm1 (0x1.974060e619bfp-3) got 0x1.c290e5858bb53p-3 + want 0x1.c290e5858bb5p-3. */ svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - /* Large, Nan/Inf. */ - svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound)); - - /* Reduce argument to smaller range: - Let i = round(x / ln2) - and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. - exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 - where 2^i is exact because i is an integer. */ - svfloat64_t shift = sv_f64 (d->shift); - svfloat64_t n = svsub_x (pg, svmla_x (pg, shift, x, d->inv_ln2), shift); - svint64_t i = svcvt_s64_x (pg, n); - svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); - svfloat64_t f = svmls_lane (x, n, ln2, 0); - f = svmls_lane (f, n, ln2, 1); - - /* Approximate expm1(f) using polynomial. - Taylor expansion for expm1(x) has the form: - x + ax^2 + bx^3 + cx^4 .... - So we calculate the polynomial P(f) = a + bf + cf^2 + ... - and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - svfloat64_t f2 = svmul_x (pg, f, f); - svfloat64_t f4 = svmul_x (pg, f2, f2); - svfloat64_t f8 = svmul_x (pg, f4, f4); - svfloat64_t p - = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly)); - - /* Assemble the result. - expm1(x) ~= 2^i * (p + 1) - 1 - Let t = 2^i. */ - svint64_t u = svadd_x (pg, svlsl_x (pg, i, 52), ExponentBias); - svfloat64_t t = svreinterpret_f64 (u); - - /* expm1(x) ~= p * t + (t - 1). */ - svfloat64_t y = svmla_x (pg, svsub_x (pg, t, 1), p, t); + svbool_t special = svacgt (pg, x, d->thres); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, y, special); + svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); + svuint64_t u = svreinterpret_u64 (z); + svfloat64_t n = svsub_x (pg, z, d->shift); + /* r = x - n * ln2, r is in [-ln2/128, ln2/128]. */ + svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); + svfloat64_t r = x; + r = svmls_lane (r, n, ln2, 0); + r = svmls_lane (r, n, ln2, 1); + + /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); + + svfloat64_t p; + svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + p = svmad_x (pg, c34, r2, c12); + p = svmad_x (pg, p, r, sv_f64 (d->c0)); + p = svmad_x (pg, p, r2, r); + + svfloat64_t scale = svexpa (u); + svfloat64_t scalem1 = svsub_x (pg, scale, sv_f64 (1.0)); + + /* We want to construct expm1(x) = (scale - 1) + scale * poly. + However, for values of scale close to 1, scale-1 causes large ULP errors + due to cancellation. + + This can be circumvented by using a small lookup for scale-1 + when our input is below a certain bound, otherwise we can use FEXPA. + + This bound is based upon the table size: + Bound = (TableSize-1/64) * ln2. + The current bound is based upon a table size of 16. */ + svbool_t is_small = svaclt (pg, x, FexpaBound); + + if (svptest_any (pg, is_small)) + { + /* Index via the input of FEXPA, but we only care about the lower 4 bits. + */ + svuint64_t base_idx = svand_x (pg, u, 0xf); + + /* We can use the sign of x as a fifth bit to account for the asymmetry + of e^x around 0. */ + svuint64_t signBit + = svlsl_x (pg, svlsr_x (pg, svreinterpret_u64 (x), 63), 4); + svuint64_t idx = svorr_x (pg, base_idx, signBit); + + /* Lookup values for scale - 1 for small x. */ + svfloat64_t lookup = svreinterpret_f64 ( + svld1_gather_index (is_small, d->expm1_data, idx)); + + /* Select the appropriate scale - 1 value based on x. */ + scalem1 = svsel (is_small, lookup, scalem1); + } + + svfloat64_t y = svmla_x (pg, scalem1, scale, p); + + /* FEXPA returns nan for large inputs so we special case those. */ + if (__glibc_unlikely (svptest_any (pg, special))) + { + /* FEXPA zeroes the sign bit, however the sign is meaningful to the + special case function so needs to be copied. + e = sign bit of u << 46. */ + svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000); + /* Copy sign to s. */ + scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale))); + return special_case (pg, y, scale, p, n); + } + + /* return expm1 = (scale - 1) + (scale * poly). */ return y; } diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c index a0616ec754..8303ca296e 100644 --- a/sysdeps/aarch64/fpu/expm1f_advsimd.c +++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c @@ -18,27 +18,18 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "v_expm1f_inline.h" static const struct data { - float32x4_t poly[5]; - float invln2_and_ln2[4]; - float32x4_t shift; - int32x4_t exponent_bias; + struct v_expm1f_data d; #if WANT_SIMD_EXCEPT uint32x4_t thresh; #else float32x4_t oflow_bound; #endif } data = { - /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */ - .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), - V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, - /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, - .shift = V4 (0x1.8p23f), - .exponent_bias = V4 (0x3f800000), + .d = V_EXPM1F_DATA, #if !WANT_SIMD_EXCEPT /* Value above which expm1f(x) should overflow. Absolute value of the underflow bound is greater than this, so it catches both cases - there is @@ -55,67 +46,38 @@ static const struct data #define TinyBound v_u32 (0x34000000 << 1) static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t special, const struct data *d) { - return v_call_f32 (expm1f, x, y, special); + return v_call_f32 ( + expm1f, x, expm1f_inline (v_zerofy_f32 (x, special), &d->d), special); } /* Single-precision vector exp(x) - 1 function. - The maximum error is 1.51 ULP: - _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2 - want 0x1.e2fb94p-2. */ + The maximum error is 1.62 ULP: + _ZGVnN4v_expm1f(0x1.85f83p-2) got 0x1.da9f4p-2 + want 0x1.da9f44p-2. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t ix = vreinterpretq_u32_f32 (x); #if WANT_SIMD_EXCEPT + uint32x4_t ix = vreinterpretq_u32_f32 (x); /* If fp exceptions are to be triggered correctly, fall back to scalar for |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for shift-left by 1, and compare with thresh which was left-shifted offline - this is effectively an absolute compare. */ uint32x4_t special = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh); - if (__glibc_unlikely (v_any_u32 (special))) - x = v_zerofy_f32 (x, special); #else /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */ uint32x4_t special = vcagtq_f32 (x, d->oflow_bound); #endif - /* Reduce argument to smaller range: - Let i = round(x / ln2) - and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. - exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 - where 2^i is exact because i is an integer. */ - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - float32x4_t j - = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift); - int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1); - f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2); - - /* Approximate expm1(f) using polynomial. - Taylor expansion for expm1(x) has the form: - x + ax^2 + bx^3 + cx^4 .... - So we calculate the polynomial P(f) = a + bf + cf^2 + ... - and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - float32x4_t p = v_horner_4_f32 (f, d->poly); - p = vfmaq_f32 (f, vmulq_f32 (f, f), p); - - /* Assemble the result. - expm1(x) ~= 2^i * (p + 1) - 1 - Let t = 2^i. */ - int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias); - float32x4_t t = vreinterpretq_f32_s32 (u); - if (__glibc_unlikely (v_any_u32 (special))) - return special_case (vreinterpretq_f32_u32 (ix), - vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t), - special); + return special_case (x, special, d); /* expm1(x) ~= p * t + (t - 1). */ - return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t); + return expm1f_inline (x, &d->d); } libmvec_hidden_def (V_NAME_F1 (expm1)) HALF_WIDTH_ALIAS_F1 (expm1) diff --git a/sysdeps/aarch64/fpu/expm1f_sve.c b/sysdeps/aarch64/fpu/expm1f_sve.c index 7c852125cd..05a66400d4 100644 --- a/sysdeps/aarch64/fpu/expm1f_sve.c +++ b/sysdeps/aarch64/fpu/expm1f_sve.c @@ -18,7 +18,6 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" /* Largest value of x for which expm1(x) should round to -1. */ #define SpecialBound 0x1.5ebc4p+6f @@ -28,20 +27,17 @@ static const struct data /* These 4 are grouped together so they can be loaded as one quadword, then used with _lane forms of svmla/svmls. */ float c2, c4, ln2_hi, ln2_lo; - float c0, c1, c3, inv_ln2, special_bound, shift; + float c0, inv_ln2, c1, c3, special_bound; } data = { /* Generated using fpminimax. */ .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7, - .c4 = 0x1.6b55a2p-10, + .c4 = 0x1.6b55a2p-10, .inv_ln2 = 0x1.715476p+0f, + .special_bound = SpecialBound, .ln2_lo = 0x1.7f7d1cp-20f, + .ln2_hi = 0x1.62e4p-1f, - .special_bound = SpecialBound, .shift = 0x1.8p23f, - .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, - .ln2_lo = 0x1.7f7d1cp-20f, }; -#define C(i) sv_f32 (d->c##i) - static svfloat32_t NOINLINE special_case (svfloat32_t x, svbool_t pg) { @@ -71,9 +67,8 @@ svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg) and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 where 2^i is exact because i is an integer. */ - svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2); - j = svsub_x (pg, j, d->shift); - svint32_t i = svcvt_s32_x (pg, j); + svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2); + j = svrinta_x (pg, j); svfloat32_t f = svmls_lane (x, j, lane_constants, 2); f = svmls_lane (f, j, lane_constants, 3); @@ -83,17 +78,17 @@ svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg) x + ax^2 + bx^3 + cx^4 .... So we calculate the polynomial P(f) = a + bf + cf^2 + ... and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0); - svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1); - svfloat32_t f2 = svmul_x (pg, f, f); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1); + svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f); svfloat32_t p = svmla_x (pg, p12, f2, p34); - p = svmla_x (pg, C (0), f, p); + + p = svmla_x (pg, sv_f32 (d->c0), f, p); p = svmla_x (pg, f, f2, p); /* Assemble the result. expm1(x) ~= 2^i * (p + 1) - 1 Let t = 2^i. */ - svfloat32_t t = svreinterpret_f32 ( - svadd_x (pg, svreinterpret_u32 (svlsl_x (pg, i, 23)), 0x3f800000)); - return svmla_x (pg, svsub_x (pg, t, 1), p, t); + svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j)); + return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t); } diff --git a/sysdeps/aarch64/fpu/log10_advsimd.c b/sysdeps/aarch64/fpu/log10_advsimd.c index c065aaebae..f69ed21c39 100644 --- a/sysdeps/aarch64/fpu/log10_advsimd.c +++ b/sysdeps/aarch64/fpu/log10_advsimd.c @@ -18,36 +18,36 @@ . */ #include "v_math.h" -#include "poly_advsimd_f64.h" - -#define N (1 << V_LOG10_TABLE_BITS) static const struct data { - uint64x2_t min_norm; + uint64x2_t off, sign_exp_mask, offset_lower_bound; uint32x4_t special_bound; - float64x2_t poly[5]; - float64x2_t invln10, log10_2, ln2; - uint64x2_t sign_exp_mask; + double invln10, log10_2; + double c1, c3; + float64x2_t c0, c2, c4; } data = { /* Computed from log coefficients divided by log(10) then rounded to double precision. */ - .poly = { V2 (-0x1.bcb7b1526e506p-3), V2 (0x1.287a7636be1d1p-3), - V2 (-0x1.bcb7b158af938p-4), V2 (0x1.63c78734e6d07p-4), - V2 (-0x1.287461742fee4p-4) }, - .ln2 = V2 (0x1.62e42fefa39efp-1), - .invln10 = V2 (0x1.bcb7b1526e50ep-2), - .log10_2 = V2 (0x1.34413509f79ffp-2), - .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022). */ - .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm. */ + .c0 = V2 (-0x1.bcb7b1526e506p-3), + .c1 = 0x1.287a7636be1d1p-3, + .c2 = V2 (-0x1.bcb7b158af938p-4), + .c3 = 0x1.63c78734e6d07p-4, + .c4 = V2 (-0x1.287461742fee4p-4), + .invln10 = 0x1.bcb7b1526e50ep-2, + .log10_2 = 0x1.34413509f79ffp-2, + .off = V2 (0x3fe6900900000000), .sign_exp_mask = V2 (0xfff0000000000000), + /* Lower bound is 0x0010000000000000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound - offset (which wraps around). */ + .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000), + .special_bound = V4 (0x7fe00000), /* asuint64(inf) - 0x0010000000000000. */ }; -#define Off v_u64 (0x3fe6900900000000) +#define N (1 << V_LOG10_TABLE_BITS) #define IndexMask (N - 1) -#define T(s, i) __v_log10_data.s[i] - struct entry { float64x2_t invc; @@ -70,10 +70,11 @@ lookup (uint64x2_t i) } static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2, - uint32x2_t special) +special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, + uint32x2_t special, const struct data *d) { - return v_call_f64 (log10, x, vfmaq_f64 (hi, r2, y), vmovl_u32 (special)); + float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); + return v_call_f64 (log10, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special)); } /* Fast implementation of double-precision vector log10 @@ -85,19 +86,24 @@ special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2, float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x) { const struct data *d = ptr_barrier (&data); - uint64x2_t ix = vreinterpretq_u64_f64 (x); - uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm), - vget_low_u32 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint64x2_t u = vreinterpretq_u64_f64 (x); + uint64x2_t u_off = vsubq_u64 (u, d->off); /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - uint64x2_t tmp = vsubq_u64 (ix, Off); - int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); - uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask)); + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52); + uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask)); float64x2_t z = vreinterpretq_f64_u64 (iz); - struct entry e = lookup (tmp); + struct entry e = lookup (u_off); + + uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound), + vget_low_u32 (d->special_bound)); /* log10(x) = log1p(z/c-1)/log(10) + log10(c) + k*log10(2). */ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); @@ -105,17 +111,22 @@ float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x) /* hi = r / log(10) + log10(c) + k*log10(2). Constants in v_log10_data.c are computed (in extended precision) as - e.log10c := e.logc * ivln10. */ - float64x2_t w = vfmaq_f64 (e.log10c, r, d->invln10); + e.log10c := e.logc * invln10. */ + float64x2_t cte = vld1q_f64 (&d->invln10); + float64x2_t hi = vfmaq_laneq_f64 (e.log10c, r, cte, 0); /* y = log10(1+r) + n * log10(2). */ - float64x2_t hi = vfmaq_f64 (w, kd, d->log10_2); + hi = vfmaq_laneq_f64 (hi, kd, cte, 1); /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ float64x2_t r2 = vmulq_f64 (r, r); - float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly); + float64x2_t odd_coeffs = vld1q_f64 (&d->c1); + float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1); + float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0); + y = vfmaq_f64 (y, d->c4, r2); + y = vfmaq_f64 (p, y, r2); if (__glibc_unlikely (v_any_u32h (special))) - return special_case (x, y, hi, r2, special); - return vfmaq_f64 (hi, r2, y); + return special_case (hi, u_off, y, r2, special, d); + return vfmaq_f64 (hi, y, r2); } diff --git a/sysdeps/aarch64/fpu/log10_sve.c b/sysdeps/aarch64/fpu/log10_sve.c index ab7362128d..f1cad2759a 100644 --- a/sysdeps/aarch64/fpu/log10_sve.c +++ b/sysdeps/aarch64/fpu/log10_sve.c @@ -23,28 +23,49 @@ #define Min 0x0010000000000000 #define Max 0x7ff0000000000000 #define Thres 0x7fe0000000000000 /* Max - Min. */ -#define Off 0x3fe6900900000000 #define N (1 << V_LOG10_TABLE_BITS) +static const struct data +{ + double c0, c2; + double c1, c3; + double invln10, log10_2; + double c4; + uint64_t off; +} data = { + .c0 = -0x1.bcb7b1526e506p-3, + .c1 = 0x1.287a7636be1d1p-3, + .c2 = -0x1.bcb7b158af938p-4, + .c3 = 0x1.63c78734e6d07p-4, + .c4 = -0x1.287461742fee4p-4, + .invln10 = 0x1.bcb7b1526e50ep-2, + .log10_2 = 0x1.34413509f79ffp-2, + .off = 0x3fe6900900000000, +}; + static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) +special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2, + svbool_t special, const struct data *d) { - return sv_call_f64 (log10, x, y, special); + svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off)); + return sv_call_f64 (log10, x, svmla_x (svptrue_b64 (), hi, r2, y), special); } -/* SVE log10 algorithm. +/* Double-precision SVE log10 routine. Maximum measured error is 2.46 ulps. SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6 want 0x1.fffbdf6eaa667p-6. */ svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + svuint64_t ix = svreinterpret_u64 (x); svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG10_TABLE_BITS); i = svand_x (pg, i, (N - 1) << 1); svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52)); @@ -62,15 +83,19 @@ svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg) svfloat64_t r = svmad_x (pg, invc, z, -1.0); /* hi = log(c) + k*log(2). */ - svfloat64_t w = svmla_x (pg, logc, r, __v_log10_data.invln10); - svfloat64_t hi = svmla_x (pg, w, k, __v_log10_data.log10_2); + svfloat64_t invln10_log10_2 = svld1rq_f64 (svptrue_b64 (), &d->invln10); + svfloat64_t w = svmla_lane_f64 (logc, r, invln10_log10_2, 0); + svfloat64_t hi = svmla_lane_f64 (w, k, invln10_log10_2, 1); /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log10_data.poly); + svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1); + svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0); + y = svmla_x (pg, y, r2, d->c4); + y = svmla_x (pg, p, r2, y); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y), - special); + return special_case (hi, tmp, y, r2, special, d); return svmla_x (pg, hi, r2, y); } diff --git a/sysdeps/aarch64/fpu/log10f_advsimd.c b/sysdeps/aarch64/fpu/log10f_advsimd.c index 9347422a77..0d792c3df9 100644 --- a/sysdeps/aarch64/fpu/log10f_advsimd.c +++ b/sysdeps/aarch64/fpu/log10f_advsimd.c @@ -18,35 +18,43 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" static const struct data { - uint32x4_t min_norm; + float32x4_t c0, c2, c4, c6, inv_ln10, ln2; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; - float32x4_t poly[8]; - float32x4_t inv_ln10, ln2; - uint32x4_t off, mantissa_mask; + uint32x4_t mantissa_mask; + float c1, c3, c5, c7; } data = { /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25. */ - .poly = { V4 (-0x1.bcb79cp-3f), V4 (0x1.2879c8p-3f), V4 (-0x1.bcd472p-4f), - V4 (0x1.6408f8p-4f), V4 (-0x1.246f8p-4f), V4 (0x1.f0e514p-5f), - V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) }, + .c0 = V4 (-0x1.bcb79cp-3f), + .c1 = 0x1.2879c8p-3f, + .c2 = V4 (-0x1.bcd472p-4f), + .c3 = 0x1.6408f8p-4f, + .c4 = V4 (-0x1.246f8p-4f), + .c5 = 0x1.f0e514p-5f, + .c6 = V4 (-0x1.0fc92cp-4f), + .c7 = 0x1.f5f76ap-5f, .ln2 = V4 (0x1.62e43p-1f), .inv_ln10 = V4 (0x1.bcb7b2p-2f), - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff), }; static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2, - uint16x4_t cmp) +special_case (float32x4_t y, uint32x4_t u_off, float32x4_t p, float32x4_t r2, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp)); + return v_call_f32 (log10f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (y, p, r2), vmovl_u16 (cmp)); } /* Fast implementation of AdvSIMD log10f, @@ -58,26 +66,41 @@ special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t u = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + float32x4_t c1357 = vld1q_f32 (&d->c1); + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); float32x4_t n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + + uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log10(1+r) + n * log10(2). */ float32x4_t r2 = vmulq_f32 (r, r); - float32x4_t poly = v_pw_horner_7_f32 (r, r2, d->poly); + + float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0); + float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1); + float32x4_t c45 = vfmaq_laneq_f32 (d->c4, r, c1357, 2); + float32x4_t c67 = vfmaq_laneq_f32 (d->c6, r, c1357, 3); + + float32x4_t p47 = vfmaq_f32 (c45, r2, c67); + float32x4_t p27 = vfmaq_f32 (c23, r2, p47); + float32x4_t poly = vfmaq_f32 (c01, r2, p27); + /* y = Log10(2) * n + poly * InvLn(10). */ float32x4_t y = vfmaq_f32 (r, d->ln2, n); y = vmulq_f32 (y, d->inv_ln10); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, y, poly, r2, special); + return special_case (y, u_off, poly, r2, special, d); return vfmaq_f32 (y, poly, r2); } libmvec_hidden_def (V_NAME_F1 (log10)) diff --git a/sysdeps/aarch64/fpu/log10f_sve.c b/sysdeps/aarch64/fpu/log10f_sve.c index bdbb49cd32..7913679f67 100644 --- a/sysdeps/aarch64/fpu/log10f_sve.c +++ b/sysdeps/aarch64/fpu/log10f_sve.c @@ -24,6 +24,7 @@ static const struct data float poly_0246[4]; float poly_1357[4]; float ln2, inv_ln10; + uint32_t off, lower; } data = { .poly_1357 = { /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs @@ -35,18 +36,23 @@ static const struct data -0x1.0fc92cp-4f }, .ln2 = 0x1.62e43p-1f, .inv_ln10 = 0x1.bcb7b2p-2f, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min 0x00800000 -#define Max 0x7f800000 -#define Thres 0x7f000000 /* Max - Min. */ -#define Offset 0x3f2aaaab /* 0.666667. */ +#define Thres 0x7f000000 /* asuint32(inf) - 0x00800000. */ #define MantissaMask 0x007fffff static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (log10f, x, y, special); + return sv_call_f32 ( + log10f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE log10f using the same algorithm and @@ -57,23 +63,25 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t ix = svreinterpret_u32 (x); - svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres); + + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thres); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - ix = svsub_x (pg, ix, Offset); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend. */ - ix = svand_x (pg, ix, MantissaMask); - ix = svadd_x (pg, ix, Offset); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* signextend. */ + svuint32_t ix = svand_x (pg, u_off, MantissaMask); + ix = svadd_x (pg, ix, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f); /* y = log10(1+r) + n*log10(2) log10(1+r) ~ r * InvLn(10) + P(r) where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). */ - svfloat32_t r2 = svmul_x (pg, r, r); - svfloat32_t r4 = svmul_x (pg, r2, r2); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t r4 = svmul_x (svptrue_b32 (), r2, r2); svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]); svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0); svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1); @@ -88,7 +96,6 @@ svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg) hi = svmul_x (pg, hi, d->inv_ln10); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y), - special); - return svmla_x (pg, hi, r2, y); + return special_case (u_off, hi, r2, y, special); + return svmla_x (svptrue_b32 (), hi, r2, y); } diff --git a/sysdeps/aarch64/fpu/log1p_advsimd.c b/sysdeps/aarch64/fpu/log1p_advsimd.c index ffc418fc9c..9d18578ce6 100644 --- a/sysdeps/aarch64/fpu/log1p_advsimd.c +++ b/sysdeps/aarch64/fpu/log1p_advsimd.c @@ -17,43 +17,26 @@ License along with the GNU C Library; if not, see . */ -#include "v_math.h" -#include "poly_advsimd_f64.h" +#define WANT_V_LOG1P_K0_SHORTCUT 0 +#include "v_log1p_inline.h" const static struct data { - float64x2_t poly[19], ln2[2]; - uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask, inf, minus_one; - int64x2_t one_top; -} data = { - /* Generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */ - .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2), - V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3), - V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3), - V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4), - V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4), - V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4), - V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4), - V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5), - V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4), - V2 (-0x1.cfa7385bdb37ep-6) }, - .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) }, - /* top32(asuint64(sqrt(2)/2)) << 32. */ - .hf_rt2_top = V2 (0x3fe6a09e00000000), - /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32. */ - .one_m_hf_rt2_top = V2 (0x00095f6200000000), - .umask = V2 (0x000fffff00000000), - .one_top = V2 (0x3ff), - .inf = V2 (0x7ff0000000000000), - .minus_one = V2 (0xbff0000000000000) -}; + struct v_log1p_data d; + uint64x2_t inf, minus_one; +} data = { .d = V_LOG1P_CONSTANTS_TABLE, + .inf = V2 (0x7ff0000000000000), + .minus_one = V2 (0xbff0000000000000) }; #define BottomMask v_u64 (0xffffffff) -static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, uint64x2_t special) +static float64x2_t NOINLINE VPCS_ATTR +special_case (float64x2_t x, uint64x2_t cmp, const struct data *d) { - return v_call_f64 (log1p, x, y, special); + /* Side-step special lanes so fenv exceptions are not triggered + inadvertently. */ + float64x2_t x_nospecial = v_zerofy_f64 (x, cmp); + return v_call_f64 (log1p, x, log1p_inline (x_nospecial, &d->d), cmp); } /* Vector log1p approximation using polynomial on reduced interval. Routine is @@ -66,64 +49,12 @@ VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x) const struct data *d = ptr_barrier (&data); uint64x2_t ix = vreinterpretq_u64_f64 (x); uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x)); - uint64x2_t special = vcgeq_u64 (ia, d->inf); -#if WANT_SIMD_EXCEPT - special = vorrq_u64 (special, - vcgeq_u64 (ix, vreinterpretq_u64_f64 (v_f64 (-1)))); - if (__glibc_unlikely (v_any_u64 (special))) - x = v_zerofy_f64 (x, special); -#else - special = vorrq_u64 (special, vcleq_f64 (x, v_f64 (-1))); -#endif + uint64x2_t special_cases + = vorrq_u64 (vcgeq_u64 (ia, d->inf), vcgeq_u64 (ix, d->minus_one)); - /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f - is in [sqrt(2)/2, sqrt(2)]): - log1p(x) = k*log(2) + log1p(f). + if (__glibc_unlikely (v_any_u64 (special_cases))) + return special_case (x, special_cases, d); - f may not be representable exactly, so we need a correction term: - let m = round(1 + x), c = (1 + x) - m. - c << m: at very small x, log1p(x) ~ x, hence: - log(1+x) - log(m) ~ c/m. - - We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */ - - /* Obtain correctly scaled k by manipulation in the exponent. - The scalar algorithm casts down to 32-bit at this point to calculate k and - u_red. We stay in double-width to obtain f and k, using the same constants - as the scalar algorithm but shifted left by 32. */ - float64x2_t m = vaddq_f64 (x, v_f64 (1)); - uint64x2_t mi = vreinterpretq_u64_f64 (m); - uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top); - - int64x2_t ki - = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top); - float64x2_t k = vcvtq_f64_s64 (ki); - - /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ - uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top); - uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask)); - float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1)); - - /* Correction term c/m. */ - float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m); - - /* Approximate log1p(x) on the reduced input using a polynomial. Because - log1p(0)=0 we choose an approximation of the form: - x + C0*x^2 + C1*x^3 + C2x^4 + ... - Hence approximation has the form f + f^2 * P(f) - where P(x) = C0 + C1*x + C2x^2 + ... - Assembling this all correctly is dealt with at the final step. */ - float64x2_t f2 = vmulq_f64 (f, f); - float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly); - - float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]); - float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]); - float64x2_t y = vaddq_f64 (ylo, yhi); - - if (__glibc_unlikely (v_any_u64 (special))) - return special_case (vreinterpretq_f64_u64 (ix), vfmaq_f64 (y, f2, p), - special); - - return vfmaq_f64 (y, f2, p); + return log1p_inline (x, &d->d); } diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c index 04f7e5720e..5251f3c075 100644 --- a/sysdeps/aarch64/fpu/log1p_sve.c +++ b/sysdeps/aarch64/fpu/log1p_sve.c @@ -22,19 +22,33 @@ static const struct data { - double poly[19]; + float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16; + float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c18; double ln2_hi, ln2_lo; uint64_t hfrt2_top, onemhfrt2_top, inf, mone; } data = { /* Generated using Remez in [ sqrt(2)/2 - 1, sqrt(2) - 1]. Order 20 - polynomial, however first 2 coefficients are 0 and 1 so are not stored. */ - .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2, - 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3, - -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4, - 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4, - -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5, - 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4, - -0x1.cfa7385bdb37ep-6, }, + polynomial, however first 2 coefficients are 0 and 1 so are not + stored. */ + .c0 = -0x1.ffffffffffffbp-2, + .c1 = 0x1.55555555551a9p-2, + .c2 = -0x1.00000000008e3p-2, + .c3 = 0x1.9999999a32797p-3, + .c4 = -0x1.555555552fecfp-3, + .c5 = 0x1.249248e071e5ap-3, + .c6 = -0x1.ffffff8bf8482p-4, + .c7 = 0x1.c71c8f07da57ap-4, + .c8 = -0x1.9999ca4ccb617p-4, + .c9 = 0x1.7459ad2e1dfa3p-4, + .c10 = -0x1.554d2680a3ff2p-4, + .c11 = 0x1.3b4c54d487455p-4, + .c12 = -0x1.2548a9ffe80e6p-4, + .c13 = 0x1.0f389a24b2e07p-4, + .c14 = -0x1.eee4db15db335p-5, + .c15 = 0x1.e95b494d4a5ddp-5, + .c16 = -0x1.15fdf07cb7c73p-4, + .c17 = 0x1.0310b70800fcfp-4, + .c18 = -0x1.cfa7385bdb37ep-6, .ln2_hi = 0x1.62e42fefa3800p-1, .ln2_lo = 0x1.ef35793c76730p-45, /* top32(asuint64(sqrt(2)/2)) << 32. */ @@ -49,7 +63,7 @@ static const struct data #define BottomMask 0xffffffff static svfloat64_t NOINLINE -special_case (svbool_t special, svfloat64_t x, svfloat64_t y) +special_case (svfloat64_t x, svfloat64_t y, svbool_t special) { return sv_call_f64 (log1p, x, y, special); } @@ -91,8 +105,9 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ svuint64_t utop = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hfrt2_top); - svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, BottomMask)); - svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1); + svuint64_t u_red + = svorr_x (pg, utop, svand_x (svptrue_b64 (), mi, BottomMask)); + svfloat64_t f = svsub_x (svptrue_b64 (), svreinterpret_f64 (u_red), 1); /* Correction term c/m. */ svfloat64_t cm = svdiv_x (pg, svsub_x (pg, x, svsub_x (pg, m, 1)), m); @@ -103,16 +118,47 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) Hence approximation has the form f + f^2 * P(f) where P(x) = C0 + C1*x + C2x^2 + ... Assembling this all correctly is dealt with at the final step. */ - svfloat64_t f2 = svmul_x (pg, f, f), f4 = svmul_x (pg, f2, f2), - f8 = svmul_x (pg, f4, f4), f16 = svmul_x (pg, f8, f8); - svfloat64_t p = sv_estrin_18_f64_x (pg, f, f2, f4, f8, f16, d->poly); + svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f), + f4 = svmul_x (svptrue_b64 (), f2, f2), + f8 = svmul_x (svptrue_b64 (), f4, f4), + f16 = svmul_x (svptrue_b64 (), f8, f8); + + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5); + svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9); + svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13); + svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17); + + /* Order-18 Estrin scheme. */ + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1); + + svfloat64_t p03 = svmla_x (pg, p01, f2, p23); + svfloat64_t p47 = svmla_x (pg, p45, f2, p67); + svfloat64_t p07 = svmla_x (pg, p03, f4, p47); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1); + svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0); + svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1); + + svfloat64_t p811 = svmla_x (pg, p89, f2, p1011); + svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415); + svfloat64_t p815 = svmla_x (pg, p811, f4, p1215); + + svfloat64_t p015 = svmla_x (pg, p07, f8, p815); + svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0); + svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1); + svfloat64_t p = svmla_x (pg, p015, f16, p1618); svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2_lo); svfloat64_t yhi = svmla_x (pg, f, k, d->ln2_hi); - svfloat64_t y = svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (special, x, y); - - return y; + return special_case ( + x, svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p), + special); + return svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p); } diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c index dc15334a85..f2d47962fe 100644 --- a/sysdeps/aarch64/fpu/log1pf_advsimd.c +++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c @@ -18,113 +18,78 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "v_log1pf_inline.h" + +#if WANT_SIMD_EXCEPT const static struct data { - float32x4_t poly[8], ln2; - uint32x4_t tiny_bound, minus_one, four, thresh; - int32x4_t three_quarters; + uint32x4_t minus_one, thresh; + struct v_log1pf_data d; } data = { - .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients - (1, -0.5) are not stored as they can be generated more - efficiently. */ - V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), - V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), - V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, - .ln2 = V4 (0x1.62e43p-1f), - .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */ - .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound. */ + .d = V_LOG1PF_CONSTANTS_TABLE, + .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - TinyBound. */ .minus_one = V4 (0xbf800000), - .four = V4 (0x40800000), - .three_quarters = V4 (0x3f400000) }; -static inline float32x4_t -eval_poly (float32x4_t m, const float32x4_t *p) -{ - /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme. */ - float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]); - float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]); - float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]); - float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]); - - float32x4_t m2 = vmulq_f32 (m, m); - float32x4_t p_02 = vfmaq_f32 (m, m2, p_12); - float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56); - float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]); - - float32x4_t m4 = vmulq_f32 (m2, m2); - float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36); - return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79)); -} +/* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */ +# define TinyBound v_u32 (0x34000000) static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t cmp, const struct data *d) { - return v_call_f32 (log1pf, x, y, special); + /* Side-step special lanes so fenv exceptions are not triggered + inadvertently. */ + float32x4_t x_nospecial = v_zerofy_f32 (x, cmp); + return v_call_f32 (log1pf, x, log1pf_inline (x_nospecial, &d->d), cmp); } -/* Vector log1pf approximation using polynomial on reduced interval. Accuracy - is roughly 2.02 ULP: - log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */ +/* Vector log1pf approximation using polynomial on reduced interval. Worst-case + error is 1.69 ULP: + _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3 + want 0x1.cfcbdcp-3. */ VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t ix = vreinterpretq_u32_f32 (x); uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x)); + uint32x4_t special_cases - = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh), + = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, TinyBound), d->thresh), vcgeq_u32 (ix, d->minus_one)); - float32x4_t special_arg = x; -#if WANT_SIMD_EXCEPT if (__glibc_unlikely (v_any_u32 (special_cases))) - /* Side-step special lanes so fenv exceptions are not triggered - inadvertently. */ - x = v_zerofy_f32 (x, special_cases); -#endif + return special_case (x, special_cases, d); - /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m - is in [-0.25, 0.5]): - log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). - - We approximate log1p(m) with a polynomial, then scale by - k*log(2). Instead of doing this directly, we use an intermediate - scale factor s = 4*k*log(2) to ensure the scale is representable - as a normalised fp32 number. */ + return log1pf_inline (x, &d->d); +} - float32x4_t m = vaddq_f32 (x, v_f32 (1.0f)); +#else - /* Choose k to scale x to the range [-1/4, 1/2]. */ - int32x4_t k - = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters), - v_s32 (0xff800000)); - uint32x4_t ku = vreinterpretq_u32_s32 (k); +const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE; - /* Scale x by exponent manipulation. */ - float32x4_t m_scale - = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku)); +static float32x4_t NOINLINE VPCS_ATTR +special_case (float32x4_t x, uint32x4_t cmp) +{ + return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp); +} - /* Scale up to ensure that the scale factor is representable as normalised - fp32 number, and scale m down accordingly. */ - float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku)); - m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s)); +/* Vector log1pf approximation using polynomial on reduced interval. Worst-case + error is 1.63 ULP: + _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3 + want 0x1.fdcb16p-3. */ +VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x) +{ + uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)), + vcaleq_f32 (x, v_f32 (0x1p127f))); - /* Evaluate polynomial on the reduced interval. */ - float32x4_t p = eval_poly (m_scale, d->poly); + if (__glibc_unlikely (v_any_u32 (special_cases))) + return special_case (x, special_cases); - /* The scale factor to be applied back at the end - by multiplying float(k) - by 2^-23 we get the unbiased exponent of k. */ - float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23)); + return log1pf_inline (x, ptr_barrier (&data)); +} - /* Apply the scaling back. */ - float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2); +#endif - if (__glibc_unlikely (v_any_u32 (special_cases))) - return special_case (special_arg, y, special_cases); - return y; -} libmvec_hidden_def (V_NAME_F1 (log1p)) HALF_WIDTH_ALIAS_F1 (log1p) diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c index f645cc997e..4f17c44e2d 100644 --- a/sysdeps/aarch64/fpu/log1pf_sve.c +++ b/sysdeps/aarch64/fpu/log1pf_sve.c @@ -18,30 +18,13 @@ . */ #include "sv_math.h" -#include "poly_sve_f32.h" - -static const struct data -{ - float poly[8]; - float ln2, exp_bias; - uint32_t four, three_quarters; -} data = {.poly = {/* Do not store first term of polynomial, which is -0.5, as - this can be fmov-ed directly instead of including it in - the main load-and-mla polynomial schedule. */ - 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f, - -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, - 0x1.abcb6p-4f, -0x1.6f0d5ep-5f}, - .ln2 = 0x1.62e43p-1f, - .exp_bias = 0x1p-23f, - .four = 0x40800000, - .three_quarters = 0x3f400000}; - -#define SignExponentMask 0xff800000 +#include "sv_log1pf_inline.h" static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t x, svbool_t special) { - return sv_call_f32 (log1pf, x, y, special); + return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()), + special); } /* Vector log1pf approximation using polynomial on reduced interval. Worst-case @@ -50,51 +33,12 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special) want 0x1.9f323ep-2. */ svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg) { - const struct data *d = ptr_barrier (&data); /* x < -1, Inf/Nan. */ svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000); special = svorn_z (pg, special, svcmpge (pg, x, -1)); - /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m - is in [-0.25, 0.5]): - log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). - - We approximate log1p(m) with a polynomial, then scale by - k*log(2). Instead of doing this directly, we use an intermediate - scale factor s = 4*k*log(2) to ensure the scale is representable - as a normalised fp32 number. */ - svfloat32_t m = svadd_x (pg, x, 1); - - /* Choose k to scale x to the range [-1/4, 1/2]. */ - svint32_t k - = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters), - sv_s32 (SignExponentMask)); - - /* Scale x by exponent manipulation. */ - svfloat32_t m_scale = svreinterpret_f32 ( - svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k))); - - /* Scale up to ensure that the scale factor is representable as normalised - fp32 number, and scale m down accordingly. */ - svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four)); - m_scale = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1), s, 0.25)); - - /* Evaluate polynomial on reduced interval. */ - svfloat32_t ms2 = svmul_x (pg, m_scale, m_scale), - ms4 = svmul_x (pg, ms2, ms2); - svfloat32_t p = sv_estrin_7_f32_x (pg, m_scale, ms2, ms4, d->poly); - p = svmad_x (pg, m_scale, p, -0.5); - p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p)); - - /* The scale factor to be applied back at the end - by multiplying float(k) - by 2^-23 we get the unbiased exponent of k. */ - svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->exp_bias); - - /* Apply the scaling back. */ - svfloat32_t y = svmla_x (pg, p, scale_back, d->ln2); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, y, special); + return special_case (x, special); - return y; + return sv_log1pf_inline (x, pg); } diff --git a/sysdeps/aarch64/fpu/log2_advsimd.c b/sysdeps/aarch64/fpu/log2_advsimd.c index 4057c552d8..1eea1f86eb 100644 --- a/sysdeps/aarch64/fpu/log2_advsimd.c +++ b/sysdeps/aarch64/fpu/log2_advsimd.c @@ -18,31 +18,33 @@ . */ #include "v_math.h" -#include "poly_advsimd_f64.h" - -#define N (1 << V_LOG2_TABLE_BITS) static const struct data { - uint64x2_t min_norm; + uint64x2_t off, sign_exp_mask, offset_lower_bound; uint32x4_t special_bound; - float64x2_t poly[5]; - float64x2_t invln2; - uint64x2_t sign_exp_mask; + float64x2_t c0, c2; + double c1, c3, invln2, c4; } data = { /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9 and N = 128, then scaled by log2(e) in extended precision and rounded back to double precision. */ - .poly = { V2 (-0x1.71547652b83p-1), V2 (0x1.ec709dc340953p-2), - V2 (-0x1.71547651c8f35p-2), V2 (0x1.2777ebe12dda5p-2), - V2 (-0x1.ec738d616fe26p-3) }, - .invln2 = V2 (0x1.71547652b82fep0), - .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022). */ - .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm. */ + .c0 = V2 (-0x1.71547652b8300p-1), + .c1 = 0x1.ec709dc340953p-2, + .c2 = V2 (-0x1.71547651c8f35p-2), + .c3 = 0x1.2777ebe12dda5p-2, + .c4 = -0x1.ec738d616fe26p-3, + .invln2 = 0x1.71547652b82fep0, + .off = V2 (0x3fe6900900000000), .sign_exp_mask = V2 (0xfff0000000000000), + /* Lower bound is 0x0010000000000000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound - offset (which wraps around). */ + .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000), + .special_bound = V4 (0x7fe00000), /* asuint64(inf) - asuint64(0x1p-1022). */ }; -#define Off v_u64 (0x3fe6900900000000) +#define N (1 << V_LOG2_TABLE_BITS) #define IndexMask (N - 1) struct entry @@ -67,10 +69,11 @@ lookup (uint64x2_t i) } static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, float64x2_t w, float64x2_t r2, - uint32x2_t special) +special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, + uint32x2_t special, const struct data *d) { - return v_call_f64 (log2, x, vfmaq_f64 (w, r2, y), vmovl_u32 (special)); + float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); + return v_call_f64 (log2, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special)); } /* Double-precision vector log2 routine. Implements the same algorithm as @@ -81,31 +84,41 @@ special_case (float64x2_t x, float64x2_t y, float64x2_t w, float64x2_t r2, float64x2_t VPCS_ATTR V_NAME_D1 (log2) (float64x2_t x) { const struct data *d = ptr_barrier (&data); - uint64x2_t ix = vreinterpretq_u64_f64 (x); - uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm), - vget_low_u32 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint64x2_t u = vreinterpretq_u64_f64 (x); + uint64x2_t u_off = vsubq_u64 (u, d->off); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - uint64x2_t tmp = vsubq_u64 (ix, Off); - int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); - uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask)); + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52); + uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask)); float64x2_t z = vreinterpretq_f64_u64 (iz); - struct entry e = lookup (tmp); + struct entry e = lookup (u_off); - /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */ + uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound), + vget_low_u32 (d->special_bound)); + /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); float64x2_t kd = vcvtq_f64_s64 (k); - float64x2_t w = vfmaq_f64 (e.log2c, r, d->invln2); + + float64x2_t invln2_and_c4 = vld1q_f64 (&d->invln2); + float64x2_t hi + = vfmaq_laneq_f64 (vaddq_f64 (e.log2c, kd), r, invln2_and_c4, 0); float64x2_t r2 = vmulq_f64 (r, r); - float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly); - w = vaddq_f64 (kd, w); + float64x2_t odd_coeffs = vld1q_f64 (&d->c1); + float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1); + float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0); + y = vfmaq_laneq_f64 (y, r2, invln2_and_c4, 1); + y = vfmaq_f64 (p, r2, y); if (__glibc_unlikely (v_any_u32h (special))) - return special_case (x, y, w, r2, special); - return vfmaq_f64 (w, r2, y); + return special_case (hi, u_off, y, r2, special, d); + return vfmaq_f64 (hi, y, r2); } diff --git a/sysdeps/aarch64/fpu/log2_sve.c b/sysdeps/aarch64/fpu/log2_sve.c index 743fa2a913..908e638246 100644 --- a/sysdeps/aarch64/fpu/log2_sve.c +++ b/sysdeps/aarch64/fpu/log2_sve.c @@ -21,15 +21,32 @@ #include "poly_sve_f64.h" #define N (1 << V_LOG2_TABLE_BITS) -#define Off 0x3fe6900900000000 #define Max (0x7ff0000000000000) #define Min (0x0010000000000000) #define Thresh (0x7fe0000000000000) /* Max - Min. */ +static const struct data +{ + double c0, c2; + double c1, c3; + double invln2, c4; + uint64_t off; +} data = { + .c0 = -0x1.71547652b83p-1, + .c1 = 0x1.ec709dc340953p-2, + .c2 = -0x1.71547651c8f35p-2, + .c3 = 0x1.2777ebe12dda5p-2, + .c4 = -0x1.ec738d616fe26p-3, + .invln2 = 0x1.71547652b82fep0, + .off = 0x3fe6900900000000, +}; + static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp) +special_case (svfloat64_t w, svuint64_t tmp, svfloat64_t y, svfloat64_t r2, + svbool_t special, const struct data *d) { - return sv_call_f64 (log2, x, y, cmp); + svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off)); + return sv_call_f64 (log2, x, svmla_x (svptrue_b64 (), w, r2, y), special); } /* Double-precision SVE log2 routine. @@ -40,13 +57,15 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp) want 0x1.fffb34198d9ddp-5. */ svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + svuint64_t ix = svreinterpret_u64 (x); svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG2_TABLE_BITS); i = svand_x (pg, i, (N - 1) << 1); svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52)); @@ -59,15 +78,19 @@ svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg) /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */ + svfloat64_t invln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->invln2); svfloat64_t r = svmad_x (pg, invc, z, -1.0); - svfloat64_t w = svmla_x (pg, log2c, r, __v_log2_data.invln2); - - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log2_data.poly); + svfloat64_t w = svmla_lane_f64 (log2c, r, invln2_and_c4, 0); w = svadd_x (pg, k, w); + svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1); + svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0); + y = svmla_lane_f64 (y, r2, invln2_and_c4, 1); + y = svmla_x (pg, p, r2, y); + if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), w, r2, y), - special); + return special_case (w, tmp, y, r2, special, d); return svmla_x (pg, w, r2, y); } diff --git a/sysdeps/aarch64/fpu/log2f_advsimd.c b/sysdeps/aarch64/fpu/log2f_advsimd.c index db21836749..116c36c8e2 100644 --- a/sysdeps/aarch64/fpu/log2f_advsimd.c +++ b/sysdeps/aarch64/fpu/log2f_advsimd.c @@ -18,34 +18,43 @@ . */ #include "v_math.h" -#include "poly_advsimd_f32.h" static const struct data { - uint32x4_t min_norm; + float32x4_t c0, c2, c4, c6, c8; + uint32x4_t off, offset_lower_bound; uint16x8_t special_bound; - uint32x4_t off, mantissa_mask; - float32x4_t poly[9]; + uint32x4_t mantissa_mask; + float c1, c3, c5, c7; } data = { /* Coefficients generated using Remez algorithm approximate log2(1+r)/r for r in [ -1/3, 1/3 ]. rel error: 0x1.c4c4b0cp-26. */ - .poly = { V4 (0x1.715476p0f), /* (float)(1 / ln(2)). */ - V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f), - V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f), - V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) }, - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + .c0 = V4 (0x1.715476p0f), /* (float)(1 / ln(2)). */ + .c1 = -0x1.715458p-1f, + .c2 = V4 (0x1.ec701cp-2f), + .c3 = -0x1.7171a4p-2f, + .c4 = V4 (0x1.27a0b8p-2f), + .c5 = -0x1.e5143ep-3f, + .c6 = V4 (0x1.9d8ecap-3f), + .c7 = -0x1.c675bp-3f, + .c8 = V4 (0x1.9e495p-3f), + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff), }; static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r, - uint16x4_t cmp) +special_case (float32x4_t n, uint32x4_t u_off, float32x4_t p, float32x4_t r, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (log2f, x, vfmaq_f32 (n, p, r), vmovl_u16 (cmp)); + return v_call_f32 (log2f, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (n, p, r), vmovl_u16 (cmp)); } /* Fast implementation for single precision AdvSIMD log2, @@ -56,24 +65,40 @@ special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r, float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log2) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - uint32x4_t u = vreinterpretq_u32_f32 (x); - uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vreinterpretq_u32_f32 (x); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); + u_off = vsubq_u32 (u_off, d->off); float32x4_t n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off); + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + + uint16x4_t special = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log2(1+r) + n. */ float32x4_t r2 = vmulq_f32 (r, r); - float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly); + + float32x4_t c1357 = vld1q_f32 (&d->c1); + float32x4_t c01 = vfmaq_laneq_f32 (d->c0, r, c1357, 0); + float32x4_t c23 = vfmaq_laneq_f32 (d->c2, r, c1357, 1); + float32x4_t c45 = vfmaq_laneq_f32 (d->c4, r, c1357, 2); + float32x4_t c67 = vfmaq_laneq_f32 (d->c6, r, c1357, 3); + float32x4_t p68 = vfmaq_f32 (c67, r2, d->c8); + float32x4_t p48 = vfmaq_f32 (c45, r2, p68); + float32x4_t p28 = vfmaq_f32 (c23, r2, p48); + float32x4_t p = vfmaq_f32 (c01, r2, p28); if (__glibc_unlikely (v_any_u16h (special))) - return special_case (x, n, p, r, special); + return special_case (n, u_off, p, r, special, d); return vfmaq_f32 (n, p, r); } + libmvec_hidden_def (V_NAME_F1 (log2)) HALF_WIDTH_ALIAS_F1 (log2) diff --git a/sysdeps/aarch64/fpu/log2f_sve.c b/sysdeps/aarch64/fpu/log2f_sve.c index 5031c42483..939d89bfb9 100644 --- a/sysdeps/aarch64/fpu/log2f_sve.c +++ b/sysdeps/aarch64/fpu/log2f_sve.c @@ -23,6 +23,7 @@ static const struct data { float poly_02468[5]; float poly_1357[4]; + uint32_t off, lower; } data = { .poly_1357 = { /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs @@ -32,18 +33,23 @@ static const struct data }, .poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f, 0x1.9d8ecap-3f, 0x1.9e495p-3f }, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min (0x00800000) -#define Max (0x7f800000) -#define Thres (0x7f000000) /* Max - Min. */ +#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */ #define MantissaMask (0x007fffff) -#define Off (0x3f2aaaab) /* 0.666667. */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (log2f, x, y, cmp); + return sv_call_f32 ( + log2f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE log2f, using the same algorithm @@ -55,19 +61,20 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t u = svreinterpret_u32 (x); - svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres); + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = svsub_x (pg, u, Off); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */ - u = svand_x (pg, u, MantissaMask); - u = svadd_x (pg, u, Off); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */ + svuint32_t u = svand_x (pg, u_off, MantissaMask); + u = svadd_x (pg, u, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f); /* y = log2(1+r) + n. */ - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); /* Evaluate polynomial using pairwise Horner scheme. */ svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]); @@ -81,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg) y = svmla_x (pg, q_01, r2, y); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special); - return svmla_x (pg, n, r, y); + return special_case (u_off, n, r, y, special); + return svmla_x (svptrue_b32 (), n, r, y); } diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c index 015a6da7d7..b1a27fbc29 100644 --- a/sysdeps/aarch64/fpu/log_advsimd.c +++ b/sysdeps/aarch64/fpu/log_advsimd.c @@ -21,27 +21,29 @@ static const struct data { - uint64x2_t min_norm; + uint64x2_t off, sign_exp_mask, offset_lower_bound; uint32x4_t special_bound; - float64x2_t poly[5]; - float64x2_t ln2; - uint64x2_t sign_exp_mask; + float64x2_t c0, c2; + double c1, c3, ln2, c4; } data = { - /* Worst-case error: 1.17 + 0.5 ulp. - Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ]. */ - .poly = { V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2), - V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3), - V2 (-0x1.554e550bd501ep-3) }, - .ln2 = V2 (0x1.62e42fefa39efp-1), - .min_norm = V2 (0x0010000000000000), - .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm. */ - .sign_exp_mask = V2 (0xfff0000000000000) + /* Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ]. */ + .c0 = V2 (-0x1.ffffffffffff7p-2), + .c1 = 0x1.55555555170d4p-2, + .c2 = V2 (-0x1.0000000399c27p-2), + .c3 = 0x1.999b2e90e94cap-3, + .c4 = -0x1.554e550bd501ep-3, + .ln2 = 0x1.62e42fefa39efp-1, + .sign_exp_mask = V2 (0xfff0000000000000), + .off = V2 (0x3fe6900900000000), + /* Lower bound is 0x0010000000000000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound - offset (which wraps around). */ + .offset_lower_bound = V2 (0x0010000000000000 - 0x3fe6900900000000), + .special_bound = V4 (0x7fe00000), /* asuint64(inf) - asuint64(0x1p-126). */ }; -#define A(i) d->poly[i] #define N (1 << V_LOG_TABLE_BITS) #define IndexMask (N - 1) -#define Off v_u64 (0x3fe6900900000000) struct entry { @@ -64,48 +66,56 @@ lookup (uint64x2_t i) } static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2, - uint32x2_t cmp) +special_case (float64x2_t hi, uint64x2_t u_off, float64x2_t y, float64x2_t r2, + uint32x2_t special, const struct data *d) { - return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (cmp)); + float64x2_t x = vreinterpretq_f64_u64 (vaddq_u64 (u_off, d->off)); + return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (special)); } +/* Double-precision vector log routine. + The maximum observed error is 2.17 ULP: + _ZGVnN2v_log(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2 + want 0x1.ffffff1cca045p-2. */ float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x) { const struct data *d = ptr_barrier (&data); - float64x2_t z, r, r2, p, y, kd, hi; - uint64x2_t ix, iz, tmp; - uint32x2_t cmp; - int64x2_t k; - struct entry e; - ix = vreinterpretq_u64_f64 (x); - cmp = vcge_u32 (vsubhn_u64 (ix, d->min_norm), - vget_low_u32 (d->special_bound)); + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint64x2_t u = vreinterpretq_u64_f64 (x); + uint64x2_t u_off = vsubq_u64 (u, d->off); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - tmp = vsubq_u64 (ix, Off); - k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift. */ - iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask)); - z = vreinterpretq_f64_u64 (iz); - e = lookup (tmp); + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52); + uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->sign_exp_mask)); + float64x2_t z = vreinterpretq_f64_u64 (iz); + + struct entry e = lookup (u_off); + + uint32x2_t special = vcge_u32 (vsubhn_u64 (u_off, d->offset_lower_bound), + vget_low_u32 (d->special_bound)); /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ - r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); - kd = vcvtq_f64_s64 (k); + float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc); + float64x2_t kd = vcvtq_f64_s64 (k); /* hi = r + log(c) + k*Ln2. */ - hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2); + float64x2_t ln2_and_c4 = vld1q_f64 (&d->ln2); + float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_c4, 0); + /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ - r2 = vmulq_f64 (r, r); - y = vfmaq_f64 (A (2), A (3), r); - p = vfmaq_f64 (A (0), A (1), r); - y = vfmaq_f64 (y, A (4), r2); - y = vfmaq_f64 (p, y, r2); - - if (__glibc_unlikely (v_any_u32h (cmp))) - return special_case (x, y, hi, r2, cmp); + float64x2_t odd_coeffs = vld1q_f64 (&d->c1); + float64x2_t r2 = vmulq_f64 (r, r); + float64x2_t y = vfmaq_laneq_f64 (d->c2, r, odd_coeffs, 1); + float64x2_t p = vfmaq_laneq_f64 (d->c0, r, odd_coeffs, 0); + y = vfmaq_laneq_f64 (y, r2, ln2_and_c4, 1); + y = vfmaq_f64 (p, r2, y); + + if (__glibc_unlikely (v_any_u32h (special))) + return special_case (hi, u_off, y, r2, special, d); return vfmaq_f64 (hi, y, r2); } diff --git a/sysdeps/aarch64/fpu/log_sve.c b/sysdeps/aarch64/fpu/log_sve.c index 9b689f2ec7..044223400b 100644 --- a/sysdeps/aarch64/fpu/log_sve.c +++ b/sysdeps/aarch64/fpu/log_sve.c @@ -19,39 +19,54 @@ #include "sv_math.h" -#define P(i) sv_f64 (__v_log_data.poly[i]) #define N (1 << V_LOG_TABLE_BITS) -#define Off (0x3fe6900900000000) -#define MaxTop (0x7ff) -#define MinTop (0x001) -#define ThreshTop (0x7fe) /* MaxTop - MinTop. */ +#define Max (0x7ff0000000000000) +#define Min (0x0010000000000000) +#define Thresh (0x7fe0000000000000) /* Max - Min. */ + +static const struct data +{ + double c0, c2; + double c1, c3; + double ln2, c4; + uint64_t off; +} data = { + .c0 = -0x1.ffffffffffff7p-2, + .c1 = 0x1.55555555170d4p-2, + .c2 = -0x1.0000000399c27p-2, + .c3 = 0x1.999b2e90e94cap-3, + .c4 = -0x1.554e550bd501ep-3, + .ln2 = 0x1.62e42fefa39efp-1, + .off = 0x3fe6900900000000, +}; static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp) +special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2, + svbool_t special, const struct data *d) { - return sv_call_f64 (log, x, y, cmp); + svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off)); + return sv_call_f64 (log, x, svmla_x (svptrue_b64 (), hi, r2, y), special); } -/* SVE port of AdvSIMD log algorithm. - Maximum measured error is 2.17 ulp: - SV_NAME_D1 (log)(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2 - want 0x1.ffffff1cca045p-2. */ +/* Double-precision SVE log routine. + Maximum measured error is 2.64 ulp: + SV_NAME_D1 (log)(0x1.95e54bc91a5e2p+184) got 0x1.fffffffe88cacp+6 + want 0x1.fffffffe88cafp+6. */ svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + svuint64_t ix = svreinterpret_u64 (x); - svuint64_t top = svlsr_x (pg, ix, 52); - svbool_t cmp = svcmpge (pg, svsub_x (pg, top, MinTop), sv_u64 (ThreshTop)); + svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh); /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N. The actual value of i is double this due to table layout. */ svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1); - svint64_t k - = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift. */ svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)); svfloat64_t z = svreinterpret_f64 (iz); /* Lookup in 2 global lists (length N). */ @@ -59,18 +74,22 @@ svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg) svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i); /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ - svfloat64_t r = svmad_x (pg, invc, z, -1); - svfloat64_t kd = svcvt_f64_x (pg, k); + svfloat64_t kd = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52)); /* hi = r + log(c) + k*Ln2. */ - svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2); + svfloat64_t ln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->ln2); + svfloat64_t r = svmad_x (pg, invc, z, -1); + svfloat64_t hi = svmla_lane_f64 (logc, kd, ln2_and_c4, 0); + hi = svadd_x (pg, r, hi); + /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = svmla_x (pg, P (2), r, P (3)); - svfloat64_t p = svmla_x (pg, P (0), r, P (1)); - y = svmla_x (pg, y, r2, P (4)); + svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t y = svmla_lane_f64 (sv_f64 (d->c2), r, odd_coeffs, 1); + svfloat64_t p = svmla_lane_f64 (sv_f64 (d->c0), r, odd_coeffs, 0); + y = svmla_lane_f64 (y, r2, ln2_and_c4, 1); y = svmla_x (pg, p, r2, y); - if (__glibc_unlikely (svptest_any (pg, cmp))) - return special_case (x, svmla_x (svnot_z (pg, cmp), hi, r2, y), cmp); + if (__glibc_unlikely (svptest_any (pg, special))) + return special_case (hi, tmp, y, r2, special, d); return svmla_x (pg, hi, r2, y); } diff --git a/sysdeps/aarch64/fpu/logf_advsimd.c b/sysdeps/aarch64/fpu/logf_advsimd.c index 3c0d0fcdc7..d9e64c732d 100644 --- a/sysdeps/aarch64/fpu/logf_advsimd.c +++ b/sysdeps/aarch64/fpu/logf_advsimd.c @@ -21,66 +21,71 @@ static const struct data { - uint32x4_t min_norm; + float32x4_t c2, c4, c6, ln2; + uint32x4_t off, offset_lower_bound, mantissa_mask; uint16x8_t special_bound; - float32x4_t poly[7]; - float32x4_t ln2, tiny_bound; - uint32x4_t off, mantissa_mask; + float c1, c3, c5, c0; } data = { /* 3.34 ulp error. */ - .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f), - V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f), - V4 (-0x1.ffffc8p-2f) }, + .c0 = -0x1.3e737cp-3f, + .c1 = 0x1.5a9aa2p-3f, + .c2 = V4 (-0x1.4f9934p-3f), + .c3 = 0x1.961348p-3f, + .c4 = V4 (-0x1.00187cp-2f), + .c5 = 0x1.555d7cp-2f, + .c6 = V4 (-0x1.ffffc8p-2f), .ln2 = V4 (0x1.62e43p-1f), - .tiny_bound = V4 (0x1p-126), - .min_norm = V4 (0x00800000), - .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */ + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .offset_lower_bound = V4 (0x00800000 - 0x3f2aaaab), + .special_bound = V8 (0x7f00), /* top16(asuint32(inf) - 0x00800000). */ .off = V4 (0x3f2aaaab), /* 0.666667. */ .mantissa_mask = V4 (0x007fffff) }; -#define P(i) d->poly[7 - i] - static float32x4_t VPCS_ATTR NOINLINE -special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p, - uint16x4_t cmp) +special_case (float32x4_t p, uint32x4_t u_off, float32x4_t y, float32x4_t r2, + uint16x4_t cmp, const struct data *d) { /* Fall back to scalar code. */ - return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp)); + return v_call_f32 (logf, vreinterpretq_f32_u32 (vaddq_u32 (u_off, d->off)), + vfmaq_f32 (p, y, r2), vmovl_u16 (cmp)); } float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - float32x4_t n, p, q, r, r2, y; - uint32x4_t u; - uint16x4_t cmp; + float32x4_t c1350 = vld1q_f32 (&d->c1); - u = vreinterpretq_u32_f32 (x); - cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm), - vget_low_u16 (d->special_bound)); + /* To avoid having to mov x out of the way, keep u after offset has been + applied, and recover x by adding the offset back in the special-case + handler. */ + uint32x4_t u_off = vsubq_u32 (vreinterpretq_u32_f32 (x), d->off); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = vsubq_u32 (u, d->off); - n = vcvtq_f32_s32 ( - vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */ - u = vandq_u32 (u, d->mantissa_mask); - u = vaddq_u32 (u, d->off); - r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); + float32x4_t n = vcvtq_f32_s32 ( + vshrq_n_s32 (vreinterpretq_s32_u32 (u_off), 23)); /* signextend. */ + uint16x4_t cmp = vcge_u16 (vsubhn_u32 (u_off, d->offset_lower_bound), + vget_low_u16 (d->special_bound)); + + uint32x4_t u = vaddq_u32 (vandq_u32 (u_off, d->mantissa_mask), d->off); + float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f)); /* y = log(1+r) + n*ln2. */ - r2 = vmulq_f32 (r, r); + float32x4_t r2 = vmulq_f32 (r, r); /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))). */ - p = vfmaq_f32 (P (5), P (6), r); - q = vfmaq_f32 (P (3), P (4), r); - y = vfmaq_f32 (P (1), P (2), r); - p = vfmaq_f32 (p, P (7), r2); + float32x4_t p = vfmaq_laneq_f32 (d->c2, r, c1350, 0); + float32x4_t q = vfmaq_laneq_f32 (d->c4, r, c1350, 1); + float32x4_t y = vfmaq_laneq_f32 (d->c6, r, c1350, 2); + p = vfmaq_laneq_f32 (p, r2, c1350, 3); + q = vfmaq_f32 (q, p, r2); y = vfmaq_f32 (y, q, r2); p = vfmaq_f32 (r, d->ln2, n); if (__glibc_unlikely (v_any_u16h (cmp))) - return special_case (x, y, r2, p, cmp); + return special_case (p, u_off, y, r2, cmp, d); return vfmaq_f32 (p, y, r2); } libmvec_hidden_def (V_NAME_F1 (log)) diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c index d64e810cfe..5b9324678d 100644 --- a/sysdeps/aarch64/fpu/logf_sve.c +++ b/sysdeps/aarch64/fpu/logf_sve.c @@ -24,6 +24,7 @@ static const struct data float poly_0135[4]; float poly_246[3]; float ln2; + uint32_t off, lower; } data = { .poly_0135 = { /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so @@ -32,19 +33,24 @@ static const struct data -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f }, .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f }, - .ln2 = 0x1.62e43p-1f + .ln2 = 0x1.62e43p-1f, + .off = 0x3f2aaaab, + /* Lower bound is the smallest positive normal float 0x00800000. For + optimised register use subnormals are detected after offset has been + subtracted, so lower bound is 0x0080000 - offset (which wraps around). */ + .lower = 0x00800000 - 0x3f2aaaab }; -#define Min (0x00800000) -#define Max (0x7f800000) -#define Thresh (0x7f000000) /* Max - Min. */ +#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */ #define Mask (0x007fffff) -#define Off (0x3f2aaaab) /* 0.666667. */ static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp) +special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y, + svbool_t cmp) { - return sv_call_f32 (logf, x, y, cmp); + return sv_call_f32 ( + logf, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)), + svmla_x (svptrue_b32 (), p, r2, y), cmp); } /* Optimised implementation of SVE logf, using the same algorithm and @@ -55,19 +61,21 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint32_t u = svreinterpret_u32 (x); - svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh); + svuint32_t u_off = svreinterpret_u32 (x); + + u_off = svsub_x (pg, u_off, d->off); + svbool_t cmp = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh); /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */ - u = svsub_x (pg, u, Off); svfloat32_t n = svcvt_f32_x ( - pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */ - u = svand_x (pg, u, Mask); - u = svadd_x (pg, u, Off); + pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */ + + svuint32_t u = svand_x (pg, u_off, Mask); + u = svadd_x (pg, u, d->off); svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f); /* y = log(1+r) + n*ln2. */ - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))). */ svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]); svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1); @@ -80,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg) p = svmla_x (pg, r, n, d->ln2); if (__glibc_unlikely (svptest_any (pg, cmp))) - return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp); + return special_case (u_off, p, r2, y, cmp); return svmla_x (pg, p, r2, y); } diff --git a/sysdeps/aarch64/fpu/pow_advsimd.c b/sysdeps/aarch64/fpu/pow_advsimd.c index 3c91e3e183..81e134ac2f 100644 --- a/sysdeps/aarch64/fpu/pow_advsimd.c +++ b/sysdeps/aarch64/fpu/pow_advsimd.c @@ -22,9 +22,6 @@ /* Defines parameters of the approximation and scalar fallback. */ #include "finite_pow.h" -#define VecSmallExp v_u64 (SmallExp) -#define VecThresExp v_u64 (ThresExp) - #define VecSmallPowX v_u64 (SmallPowX) #define VecThresPowX v_u64 (ThresPowX) #define VecSmallPowY v_u64 (SmallPowY) @@ -32,36 +29,48 @@ static const struct data { - float64x2_t log_poly[6]; - float64x2_t exp_poly[3]; - float64x2_t ln2_hi, ln2_lo; - float64x2_t shift, inv_ln2_n, ln2_hi_n, ln2_lo_n, small_powx; uint64x2_t inf; + float64x2_t small_powx; + uint64x2_t offset, mask; + uint64x2_t mask_sub_0, mask_sub_1; + float64x2_t log_c0, log_c2, log_c4, log_c5; + double log_c1, log_c3; + double ln2_lo, ln2_hi; + uint64x2_t small_exp, thres_exp; + double ln2_lo_n, ln2_hi_n; + double inv_ln2_n, exp_c2; + float64x2_t exp_c0, exp_c1; } data = { + /* Power threshold. */ + .inf = V2 (0x7ff0000000000000), + .small_powx = V2 (0x1p-126), + .offset = V2 (Off), + .mask = V2 (0xfffULL << 52), + .mask_sub_0 = V2 (1ULL << 52), + .mask_sub_1 = V2 (52ULL << 52), /* Coefficients copied from v_pow_log_data.c relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8] Coefficients are scaled to match the scaling during evaluation. */ - .log_poly - = { V2 (0x1.555555555556p-2 * -2), V2 (-0x1.0000000000006p-2 * -2), - V2 (0x1.999999959554ep-3 * 4), V2 (-0x1.555555529a47ap-3 * 4), - V2 (0x1.2495b9b4845e9p-3 * -8), V2 (-0x1.0002b8b263fc3p-3 * -8) }, - .ln2_hi = V2 (0x1.62e42fefa3800p-1), - .ln2_lo = V2 (0x1.ef35793c76730p-45), + .log_c0 = V2 (0x1.555555555556p-2 * -2), + .log_c1 = -0x1.0000000000006p-2 * -2, + .log_c2 = V2 (0x1.999999959554ep-3 * 4), + .log_c3 = -0x1.555555529a47ap-3 * 4, + .log_c4 = V2 (0x1.2495b9b4845e9p-3 * -8), + .log_c5 = V2 (-0x1.0002b8b263fc3p-3 * -8), + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, /* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549 (0.550 without fma) if |x| < ln2/512. */ - .exp_poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6ef9p-3), - V2 (0x1.5555576a5adcep-5) }, - .shift = V2 (0x1.8p52), /* round to nearest int. without intrinsics. */ - .inv_ln2_n = V2 (0x1.71547652b82fep8), /* N/ln2. */ - .ln2_hi_n = V2 (0x1.62e42fefc0000p-9), /* ln2/N. */ - .ln2_lo_n = V2 (-0x1.c610ca86c3899p-45), - .small_powx = V2 (0x1p-126), - .inf = V2 (0x7ff0000000000000) + .exp_c0 = V2 (0x1.fffffffffffd4p-2), + .exp_c1 = V2 (0x1.5555571d6ef9p-3), + .exp_c2 = 0x1.5555576a5adcep-5, + .small_exp = V2 (0x3c90000000000000), + .thres_exp = V2 (0x03f0000000000000), + .inv_ln2_n = 0x1.71547652b82fep8, /* N/ln2. */ + .ln2_hi_n = 0x1.62e42fefc0000p-9, /* ln2/N. */ + .ln2_lo_n = -0x1.c610ca86c3899p-45, }; -#define A(i) data.log_poly[i] -#define C(i) data.exp_poly[i] - /* This version implements an algorithm close to scalar pow but - does not implement the trick in the exp's specialcase subroutine to avoid double-rounding, @@ -91,10 +100,9 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d) /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - uint64x2_t tmp = vsubq_u64 (ix, v_u64 (Off)); - int64x2_t k - = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift. */ - uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, v_u64 (0xfffULL << 52))); + uint64x2_t tmp = vsubq_u64 (ix, d->offset); + int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); + uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->mask)); float64x2_t z = vreinterpretq_f64_u64 (iz); float64x2_t kd = vcvtq_f64_s64 (k); /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ @@ -105,9 +113,10 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d) |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc); /* k*Ln2 + log(c) + r. */ - float64x2_t t1 = vfmaq_f64 (logc, kd, d->ln2_hi); + float64x2_t ln2 = vld1q_f64 (&d->ln2_lo); + float64x2_t t1 = vfmaq_laneq_f64 (logc, kd, ln2, 1); float64x2_t t2 = vaddq_f64 (t1, r); - float64x2_t lo1 = vfmaq_f64 (logctail, kd, d->ln2_lo); + float64x2_t lo1 = vfmaq_laneq_f64 (logctail, kd, ln2, 0); float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r); /* Evaluation is optimized assuming superscalar pipelined execution. */ float64x2_t ar = vmulq_f64 (v_f64 (-0.5), r); @@ -118,9 +127,10 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d) float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r); float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2); /* p = log1p(r) - r - A[0]*r*r. */ - float64x2_t a56 = vfmaq_f64 (A (4), r, A (5)); - float64x2_t a34 = vfmaq_f64 (A (2), r, A (3)); - float64x2_t a12 = vfmaq_f64 (A (0), r, A (1)); + float64x2_t odd_coeffs = vld1q_f64 (&d->log_c1); + float64x2_t a56 = vfmaq_f64 (d->log_c4, r, d->log_c5); + float64x2_t a34 = vfmaq_laneq_f64 (d->log_c2, r, odd_coeffs, 1); + float64x2_t a12 = vfmaq_laneq_f64 (d->log_c0, r, odd_coeffs, 0); float64x2_t p = vfmaq_f64 (a34, ar2, a56); p = vfmaq_f64 (a12, ar2, p); p = vmulq_f64 (ar3, p); @@ -140,28 +150,28 @@ exp_special_case (float64x2_t x, float64x2_t xtail) /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. */ static inline float64x2_t -v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d) +v_exp_inline (float64x2_t x, float64x2_t neg_xtail, const struct data *d) { /* Fallback to scalar exp_inline for all lanes if any lane contains value of x s.t. |x| <= 2^-54 or >= 512. */ - uint64x2_t abstop - = vshrq_n_u64 (vandq_u64 (vreinterpretq_u64_f64 (x), d->inf), 52); - uint64x2_t uoflowx - = vcgeq_u64 (vsubq_u64 (abstop, VecSmallExp), VecThresExp); + uint64x2_t uoflowx = vcgeq_u64 ( + vsubq_u64 (vreinterpretq_u64_f64 (vabsq_f64 (x)), d->small_exp), + d->thres_exp); if (__glibc_unlikely (v_any_u64 (uoflowx))) - return exp_special_case (x, xtail); + return exp_special_case (x, vnegq_f64 (neg_xtail)); /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N]. */ - float64x2_t z = vmulq_f64 (d->inv_ln2_n, x); /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - float64x2_t kd = vaddq_f64 (z, d->shift); - uint64x2_t ki = vreinterpretq_u64_f64 (kd); - kd = vsubq_f64 (kd, d->shift); - float64x2_t r = vfmsq_f64 (x, kd, d->ln2_hi_n); - r = vfmsq_f64 (r, kd, d->ln2_lo_n); + float64x2_t exp_consts = vld1q_f64 (&d->inv_ln2_n); + float64x2_t z = vmulq_laneq_f64 (x, exp_consts, 0); + float64x2_t kd = vrndnq_f64 (z); + uint64x2_t ki = vreinterpretq_u64_s64 (vcvtaq_s64_f64 (z)); + float64x2_t ln2_n = vld1q_f64 (&d->ln2_lo_n); + float64x2_t r = vfmsq_laneq_f64 (x, kd, ln2_n, 1); + r = vfmsq_laneq_f64 (r, kd, ln2_n, 0); /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ - r = vaddq_f64 (r, xtail); + r = vsubq_f64 (r, neg_xtail); /* 2^(k/N) ~= scale. */ uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1)); uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS); @@ -170,8 +180,8 @@ v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d) sbits = vaddq_u64 (sbits, top); /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */ float64x2_t r2 = vmulq_f64 (r, r); - float64x2_t tmp = vfmaq_f64 (C (1), r, C (2)); - tmp = vfmaq_f64 (C (0), r, tmp); + float64x2_t tmp = vfmaq_laneq_f64 (d->exp_c1, r, exp_consts, 1); + tmp = vfmaq_f64 (d->exp_c0, r, tmp); tmp = vfmaq_f64 (r, r2, tmp); float64x2_t scale = vreinterpretq_f64_u64 (sbits); /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there @@ -230,8 +240,8 @@ float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y) { /* Normalize subnormal x so exponent becomes negative. */ uint64x2_t vix_norm = vreinterpretq_u64_f64 ( - vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (v_u64 (1ULL << 52))))); - vix_norm = vsubq_u64 (vix_norm, v_u64 (52ULL << 52)); + vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (d->mask_sub_0)))); + vix_norm = vsubq_u64 (vix_norm, d->mask_sub_1); vix = vbslq_u64 (sub_x, vix_norm, vix); } } @@ -242,8 +252,7 @@ float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y) /* Vector Exp(y_loghi, y_loglo). */ float64x2_t vehi = vmulq_f64 (y, vhi); - float64x2_t velo = vmulq_f64 (y, vlo); float64x2_t vemi = vfmsq_f64 (vehi, y, vhi); - velo = vsubq_f64 (velo, vemi); - return v_exp_inline (vehi, velo, d); + float64x2_t neg_velo = vfmsq_f64 (vemi, y, vlo); + return v_exp_inline (vehi, neg_velo, d); } diff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c index 4c0bf8956c..a34db23594 100644 --- a/sysdeps/aarch64/fpu/pow_sve.c +++ b/sysdeps/aarch64/fpu/pow_sve.c @@ -31,8 +31,8 @@ The SVE algorithm drops the tail in the exp computation at the price of a lower accuracy, slightly above 1ULP. The SVE algorithm also drops the special treatement of small (< 2^-65) and - large (> 2^63) finite values of |y|, as they only affect non-round to nearest - modes. + large (> 2^63) finite values of |y|, as they only affect non-round to + nearest modes. Maximum measured error is 1.04 ULPs: SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12) @@ -44,19 +44,18 @@ /* Data is defined in v_pow_log_data.c. */ #define N_LOG (1 << V_POW_LOG_TABLE_BITS) -#define A __v_pow_log_data.poly #define Off 0x3fe6955500000000 /* Data is defined in v_pow_exp_data.c. */ #define N_EXP (1 << V_POW_EXP_TABLE_BITS) #define SignBias (0x800 << V_POW_EXP_TABLE_BITS) -#define C __v_pow_exp_data.poly #define SmallExp 0x3c9 /* top12(0x1p-54). */ #define BigExp 0x408 /* top12(512.). */ #define ThresExp 0x03f /* BigExp - SmallExp. */ #define HugeExp 0x409 /* top12(1024.). */ /* Constants associated with pow. */ +#define SmallBoundX 0x1p-126 #define SmallPowX 0x001 /* top12(0x1p-126). */ #define BigPowX 0x7ff /* top12(INFINITY). */ #define ThresPowX 0x7fe /* BigPowX - SmallPowX. */ @@ -64,6 +63,31 @@ #define BigPowY 0x43e /* top12(0x1.749p62). */ #define ThresPowY 0x080 /* BigPowY - SmallPowY. */ +static const struct data +{ + double log_c0, log_c2, log_c4, log_c6, ln2_hi, ln2_lo; + double log_c1, log_c3, log_c5, off; + double n_over_ln2, exp_c2, ln2_over_n_hi, ln2_over_n_lo; + double exp_c0, exp_c1; +} data = { + .log_c0 = -0x1p-1, + .log_c1 = -0x1.555555555556p-1, + .log_c2 = 0x1.0000000000006p-1, + .log_c3 = 0x1.999999959554ep-1, + .log_c4 = -0x1.555555529a47ap-1, + .log_c5 = -0x1.2495b9b4845e9p0, + .log_c6 = 0x1.0002b8b263fc3p0, + .off = Off, + .exp_c0 = 0x1.fffffffffffd4p-2, + .exp_c1 = 0x1.5555571d6ef9p-3, + .exp_c2 = 0x1.5555576a5adcep-5, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .n_over_ln2 = 0x1.71547652b82fep0 * N_EXP, + .ln2_over_n_hi = 0x1.62e42fefc0000p-9, + .ln2_over_n_lo = -0x1.c610ca86c3899p-45, +}; + /* Check if x is an integer. */ static inline svbool_t sv_isint (svbool_t pg, svfloat64_t x) @@ -82,7 +106,7 @@ sv_isnotint (svbool_t pg, svfloat64_t x) static inline svbool_t sv_isodd (svbool_t pg, svfloat64_t x) { - svfloat64_t y = svmul_x (pg, x, 0.5); + svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5); return sv_isnotint (pg, y); } @@ -121,7 +145,7 @@ zeroinfnan (uint64_t i) static inline svbool_t sv_zeroinfnan (svbool_t pg, svuint64_t i) { - return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2), 1), + return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1), 2 * asuint64 (INFINITY) - 1); } @@ -132,65 +156,46 @@ sv_zeroinfnan (svbool_t pg, svuint64_t i) a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -static inline double -specialcase (double tmp, uint64_t sbits, uint64_t ki) -{ - double scale; - if ((ki & 0x80000000) == 0) - { - /* k > 0, the exponent of scale might have overflowed by <= 460. */ - sbits -= 1009ull << 52; - scale = asdouble (sbits); - return 0x1p1009 * (scale + scale * tmp); - } - /* k < 0, need special care in the subnormal range. */ - sbits += 1022ull << 52; - /* Note: sbits is signed scale. */ - scale = asdouble (sbits); - double y = scale + scale * tmp; - return 0x1p-1022 * y; -} - -/* Scalar fallback for special cases of SVE pow's exp. */ static inline svfloat64_t -sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2, - svfloat64_t y, svbool_t cmp) +specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp) { - svbool_t p = svpfirst (cmp, svpfalse ()); - while (svptest_any (cmp, p)) - { - double sx1 = svclastb (p, 0, x1); - uint64_t su1 = svclastb (p, 0, u1); - uint64_t su2 = svclastb (p, 0, u2); - double elem = specialcase (sx1, su1, su2); - svfloat64_t y2 = sv_f64 (elem); - y = svsel (p, y2, y); - p = svpnext_b64 (cmp, p); - } - return y; + svbool_t p_pos = svcmpge_n_f64 (cmp, svreinterpret_f64_u64 (ki), 0.0); + + /* Scale up or down depending on sign of k. */ + svint64_t offset + = svsel_s64 (p_pos, sv_s64 (1009ull << 52), sv_s64 (-1022ull << 52)); + svfloat64_t factor + = svsel_f64 (p_pos, sv_f64 (0x1p1009), sv_f64 (0x1p-1022)); + + svuint64_t offset_sbits + = svsub_u64_x (cmp, sbits, svreinterpret_u64_s64 (offset)); + svfloat64_t scale = svreinterpret_f64_u64 (offset_sbits); + svfloat64_t res = svmad_f64_x (cmp, scale, tmp, scale); + return svmul_f64_x (cmp, res, factor); } /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about additional 15 bits precision. IX is the bit representation of x, but normalized in the subnormal range using the sign bit for the exponent. */ static inline svfloat64_t -sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail) +sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail, + const struct data *d) { /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS), sv_u64 (N_LOG - 1)); svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52); - svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, sv_u64 (0xfffULL << 52))); + svuint64_t iz = svsub_x (pg, ix, svlsl_x (pg, svreinterpret_u64 (k), 52)); svfloat64_t z = svreinterpret_f64 (iz); svfloat64_t kd = svcvt_f64_x (pg, k); /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version - that uses array of structures. We also do the lookup earlier in the code to - make sure it finishes as early as possible. */ + that uses array of structures. We also do the lookup earlier in the code + to make sure it finishes as early as possible. */ svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i); svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i); svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i); @@ -199,40 +204,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail) |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ svfloat64_t r = svmad_x (pg, z, invc, -1.0); /* k*Ln2 + log(c) + r. */ - svfloat64_t t1 = svmla_x (pg, logc, kd, __v_pow_log_data.ln2_hi); + + svfloat64_t ln2_hilo = svld1rq_f64 (svptrue_b64 (), &d->ln2_hi); + svfloat64_t t1 = svmla_lane_f64 (logc, kd, ln2_hilo, 0); svfloat64_t t2 = svadd_x (pg, t1, r); - svfloat64_t lo1 = svmla_x (pg, logctail, kd, __v_pow_log_data.ln2_lo); + svfloat64_t lo1 = svmla_lane_f64 (logctail, kd, ln2_hilo, 1); svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r); /* Evaluation is optimized assuming superscalar pipelined execution. */ - svfloat64_t ar = svmul_x (pg, r, -0.5); /* A[0] = -0.5. */ - svfloat64_t ar2 = svmul_x (pg, r, ar); - svfloat64_t ar3 = svmul_x (pg, r, ar2); + + svfloat64_t log_c02 = svld1rq_f64 (svptrue_b64 (), &d->log_c0); + svfloat64_t ar = svmul_lane_f64 (r, log_c02, 0); + svfloat64_t ar2 = svmul_x (svptrue_b64 (), r, ar); + svfloat64_t ar3 = svmul_x (svptrue_b64 (), r, ar2); /* k*Ln2 + log(c) + r + A[0]*r*r. */ svfloat64_t hi = svadd_x (pg, t2, ar2); - svfloat64_t lo3 = svmla_x (pg, svneg_x (pg, ar2), ar, r); + svfloat64_t lo3 = svmls_x (pg, ar2, ar, r); svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2); /* p = log1p(r) - r - A[0]*r*r. */ /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6])))). */ - svfloat64_t a56 = svmla_x (pg, sv_f64 (A[5]), r, A[6]); - svfloat64_t a34 = svmla_x (pg, sv_f64 (A[3]), r, A[4]); - svfloat64_t a12 = svmla_x (pg, sv_f64 (A[1]), r, A[2]); + + svfloat64_t log_c46 = svld1rq_f64 (svptrue_b64 (), &d->log_c4); + svfloat64_t a56 = svmla_lane_f64 (sv_f64 (d->log_c5), r, log_c46, 1); + svfloat64_t a34 = svmla_lane_f64 (sv_f64 (d->log_c3), r, log_c46, 0); + svfloat64_t a12 = svmla_lane_f64 (sv_f64 (d->log_c1), r, log_c02, 1); svfloat64_t p = svmla_x (pg, a34, ar2, a56); p = svmla_x (pg, a12, ar2, p); - p = svmul_x (pg, ar3, p); + p = svmul_x (svptrue_b64 (), ar3, p); svfloat64_t lo = svadd_x ( - pg, svadd_x (pg, svadd_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p); + pg, svadd_x (pg, svsub_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p); svfloat64_t y = svadd_x (pg, hi, lo); *tail = svadd_x (pg, svsub_x (pg, hi, y), lo); return y; } +static inline svfloat64_t +sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail, + svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits, + svuint64_t *ki, const struct data *d) +{ + /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ + /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ + svfloat64_t n_over_ln2_and_c2 = svld1rq_f64 (svptrue_b64 (), &d->n_over_ln2); + svfloat64_t z = svmul_lane_f64 (x, n_over_ln2_and_c2, 0); + /* z - kd is in [-1, 1] in non-nearest rounding modes. */ + svfloat64_t kd = svrinta_x (pg, z); + *ki = svreinterpret_u64 (svcvt_s64_x (pg, kd)); + + svfloat64_t ln2_over_n_hilo + = svld1rq_f64 (svptrue_b64 (), &d->ln2_over_n_hi); + svfloat64_t r = x; + r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 0); + r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 1); + /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ + r = svadd_x (pg, r, xtail); + /* 2^(k/N) ~= scale. */ + svuint64_t idx = svand_x (pg, *ki, N_EXP - 1); + svuint64_t top + = svlsl_x (pg, svadd_x (pg, *ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS); + /* This is only a valid scale when -1023*N < k < 1024*N. */ + *sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx); + *sbits = svadd_x (pg, *sbits, top); + /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + *tmp = svmla_lane_f64 (sv_f64 (d->exp_c1), r, n_over_ln2_and_c2, 1); + *tmp = svmla_x (pg, sv_f64 (d->exp_c0), r, *tmp); + *tmp = svmla_x (pg, r, r2, *tmp); + svfloat64_t scale = svreinterpret_f64 (*sbits); + /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there + is no spurious underflow here even without fma. */ + z = svmla_x (pg, scale, scale, *tmp); + return z; +} + /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */ static inline svfloat64_t sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail, - svuint64_t sign_bias) + svuint64_t sign_bias, const struct data *d) { /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow) and other cases of large values of x (scale * (1 + TMP) oflow). */ @@ -240,77 +290,50 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail, /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54). */ svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp); - /* Conditions special, uflow and oflow are all expressed as uoflow && - something, hence do not bother computing anything if no lane in uoflow is - true. */ - svbool_t special = svpfalse_b (); - svbool_t uflow = svpfalse_b (); - svbool_t oflow = svpfalse_b (); + svfloat64_t tmp; + svuint64_t sbits, ki; if (__glibc_unlikely (svptest_any (pg, uoflow))) { + svfloat64_t z + = sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d); + /* |x| is tiny (|x| <= 0x1p-54). */ - uflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000); + svbool_t uflow + = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000); uflow = svand_z (pg, uoflow, uflow); /* |x| is huge (|x| >= 1024). */ - oflow = svcmpge (pg, abstop, HugeExp); + svbool_t oflow = svcmpge (pg, abstop, HugeExp); oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow)); - /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow - or underflow. */ - special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); - } - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ - svfloat64_t z = svmul_x (pg, x, __v_pow_exp_data.n_over_ln2); - /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - svfloat64_t shift = sv_f64 (__v_pow_exp_data.shift); - svfloat64_t kd = svadd_x (pg, z, shift); - svuint64_t ki = svreinterpret_u64 (kd); - kd = svsub_x (pg, kd, shift); - svfloat64_t r = x; - r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_hi); - r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_lo); - /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ - r = svadd_x (pg, r, xtail); - /* 2^(k/N) ~= scale. */ - svuint64_t idx = svand_x (pg, ki, N_EXP - 1); - svuint64_t top - = svlsl_x (pg, svadd_x (pg, ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - svuint64_t sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx); - sbits = svadd_x (pg, sbits, top); - /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t tmp = svmla_x (pg, sv_f64 (C[1]), r, C[2]); - tmp = svmla_x (pg, sv_f64 (C[0]), r, tmp); - tmp = svmla_x (pg, r, r2, tmp); - svfloat64_t scale = svreinterpret_f64 (sbits); - /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there - is no spurious underflow here even without fma. */ - z = svmla_x (pg, scale, scale, tmp); - - /* Update result with special and large cases. */ - if (__glibc_unlikely (svptest_any (pg, special))) - z = sv_call_specialcase (tmp, sbits, ki, z, special); - - /* Handle underflow and overflow. */ - svuint64_t sign_bit = svlsr_x (pg, svreinterpret_u64 (x), 63); - svbool_t x_is_neg = svcmpne (pg, sign_bit, 0); - svuint64_t sign_mask = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS); - svfloat64_t res_uoflow = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY)); - res_uoflow = svreinterpret_f64 ( - svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask)); - z = svsel (oflow, res_uoflow, z); - /* Avoid spurious underflow for tiny x. */ - svfloat64_t res_spurious_uflow - = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); - z = svsel (uflow, res_spurious_uflow, z); + /* Handle underflow and overlow in scale. + For large |x| values (512 < |x| < 1024), scale * (1 + TMP) can + overflow or underflow. */ + svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); + if (__glibc_unlikely (svptest_any (pg, special))) + z = svsel (special, specialcase (tmp, sbits, ki, special), z); + + /* Handle underflow and overflow in exp. */ + svbool_t x_is_neg = svcmplt (pg, x, 0); + svuint64_t sign_mask + = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS); + svfloat64_t res_uoflow + = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY)); + res_uoflow = svreinterpret_f64 ( + svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask)); + /* Avoid spurious underflow for tiny x. */ + svfloat64_t res_spurious_uflow + = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); + + z = svsel (oflow, res_uoflow, z); + z = svsel (uflow, res_spurious_uflow, z); + return z; + } - return z; + return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d); } static inline double -pow_sc (double x, double y) +pow_specialcase (double x, double y) { uint64_t ix = asuint64 (x); uint64_t iy = asuint64 (y); @@ -339,49 +362,49 @@ pow_sc (double x, double y) return x; } +/* Scalar fallback for special case routines with custom signature. */ +static svfloat64_t NOINLINE +sv_pow_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y, + svbool_t cmp) +{ + return sv_call2_f64 (pow_specialcase, x1, x2, y, cmp); +} + svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + /* This preamble handles special case conditions used in the final scalar fallbacks. It also updates ix and sign_bias, that are used in the core computation too, i.e., exp( y * log (x) ). */ svuint64_t vix0 = svreinterpret_u64 (x); svuint64_t viy0 = svreinterpret_u64 (y); - svuint64_t vtopx0 = svlsr_x (svptrue_b64 (), vix0, 52); /* Negative x cases. */ - svuint64_t sign_bit = svlsr_m (pg, vix0, 63); - svbool_t xisneg = svcmpeq (pg, sign_bit, 1); + svbool_t xisneg = svcmplt (pg, x, 0); /* Set sign_bias and ix depending on sign of x and nature of y. */ - svbool_t yisnotint_xisneg = svpfalse_b (); + svbool_t yint_or_xpos = pg; svuint64_t sign_bias = sv_u64 (0); svuint64_t vix = vix0; - svuint64_t vtopx1 = vtopx0; if (__glibc_unlikely (svptest_any (pg, xisneg))) { /* Determine nature of y. */ - yisnotint_xisneg = sv_isnotint (xisneg, y); - svbool_t yisint_xisneg = sv_isint (xisneg, y); + yint_or_xpos = sv_isint (xisneg, y); svbool_t yisodd_xisneg = sv_isodd (xisneg, y); /* ix set to abs(ix) if y is integer. */ - vix = svand_m (yisint_xisneg, vix0, 0x7fffffffffffffff); - vtopx1 = svand_m (yisint_xisneg, vtopx0, 0x7ff); + vix = svand_m (yint_or_xpos, vix0, 0x7fffffffffffffff); /* Set to SignBias if x is negative and y is odd. */ sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0)); } - /* Special cases of x or y: zero, inf and nan. */ - svbool_t xspecial = sv_zeroinfnan (pg, vix0); - svbool_t yspecial = sv_zeroinfnan (pg, viy0); - svbool_t special = svorr_z (pg, xspecial, yspecial); - /* Small cases of x: |x| < 0x1p-126. */ - svuint64_t vabstopx0 = svand_x (pg, vtopx0, 0x7ff); - svbool_t xsmall = svcmplt (pg, vabstopx0, SmallPowX); - if (__glibc_unlikely (svptest_any (pg, xsmall))) + svbool_t xsmall = svaclt (yint_or_xpos, x, SmallBoundX); + if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall))) { /* Normalize subnormal x so exponent becomes negative. */ - svbool_t topx_is_null = svcmpeq (xsmall, vtopx1, 0); + svuint64_t vtopx = svlsr_x (svptrue_b64 (), vix, 52); + svbool_t topx_is_null = svcmpeq (xsmall, vtopx, 0); svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52)); vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff); @@ -391,21 +414,25 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) /* y_hi = log(ix, &y_lo). */ svfloat64_t vlo; - svfloat64_t vhi = sv_log_inline (pg, vix, &vlo); + svfloat64_t vhi = sv_log_inline (yint_or_xpos, vix, &vlo, d); /* z = exp(y_hi, y_lo, sign_bias). */ - svfloat64_t vehi = svmul_x (pg, y, vhi); - svfloat64_t velo = svmul_x (pg, y, vlo); - svfloat64_t vemi = svmls_x (pg, vehi, y, vhi); - velo = svsub_x (pg, velo, vemi); - svfloat64_t vz = sv_exp_inline (pg, vehi, velo, sign_bias); + svfloat64_t vehi = svmul_x (svptrue_b64 (), y, vhi); + svfloat64_t vemi = svmls_x (yint_or_xpos, vehi, y, vhi); + svfloat64_t velo = svnmls_x (yint_or_xpos, vemi, y, vlo); + svfloat64_t vz = sv_exp_inline (yint_or_xpos, vehi, velo, sign_bias, d); /* Cases of finite y and finite negative x. */ - vz = svsel (yisnotint_xisneg, sv_f64 (__builtin_nan ("")), vz); + vz = svsel (yint_or_xpos, vz, sv_f64 (__builtin_nan (""))); + + /* Special cases of x or y: zero, inf and nan. */ + svbool_t xspecial = sv_zeroinfnan (svptrue_b64 (), vix0); + svbool_t yspecial = sv_zeroinfnan (svptrue_b64 (), viy0); + svbool_t special = svorr_z (svptrue_b64 (), xspecial, yspecial); /* Cases of zero/inf/nan x or y. */ - if (__glibc_unlikely (svptest_any (pg, special))) - vz = sv_call2_f64 (pow_sc, x, y, vz, special); + if (__glibc_unlikely (svptest_any (svptrue_b64 (), special))) + vz = sv_pow_specialcase (x, y, vz, special); return vz; } diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c index 4f6a142325..8f6f85dca9 100644 --- a/sysdeps/aarch64/fpu/powf_sve.c +++ b/sysdeps/aarch64/fpu/powf_sve.c @@ -26,7 +26,6 @@ #define Tlogc __v_powf_data.logc #define Texp __v_powf_data.scale #define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11)) -#define Shift 0x1.8p52 #define Norm 0x1p23f /* 0x4b000000. */ /* Overall ULP error bound for pow is 2.6 ulp @@ -36,7 +35,7 @@ static const struct data double log_poly[4]; double exp_poly[3]; float uflow_bound, oflow_bound, small_bound; - uint32_t sign_bias, sign_mask, subnormal_bias, off; + uint32_t sign_bias, subnormal_bias, off; } data = { /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of V_POWF_EXP2_N. */ @@ -53,7 +52,6 @@ static const struct data .small_bound = 0x1p-126f, .off = 0x3f35d000, .sign_bias = SignBias, - .sign_mask = 0x80000000, .subnormal_bias = 0x0b800000, /* 23 << 23. */ }; @@ -86,7 +84,7 @@ svisodd (svbool_t pg, svfloat32_t x) static inline svbool_t sv_zeroinfnan (svbool_t pg, svuint32_t i) { - return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1), + return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1), 2u * 0x7f800000 - 1); } @@ -118,11 +116,10 @@ zeroinfnan (uint32_t ix) preamble of scalar powf except that we do not update ix and sign_bias. This is done in the preamble of the SVE powf. */ static inline float -powf_specialcase (float x, float y, float z) +powf_specialcase (float x, float y) { uint32_t ix = asuint (x); uint32_t iy = asuint (y); - /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */ if (__glibc_unlikely (zeroinfnan (iy))) { if (2 * iy == 0) @@ -144,27 +141,15 @@ powf_specialcase (float x, float y, float z) x2 = -x2; return iy & 0x80000000 ? 1 / x2 : x2; } - /* We need a return here in case x<0 and y is integer, but all other tests - need to be run. */ - return z; + /* Return x for convenience, but make sure result is never used. */ + return x; } /* Scalar fallback for special case routines with custom signature. */ -static inline svfloat32_t +static svfloat32_t NOINLINE sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp) { - svbool_t p = svpfirst (cmp, svpfalse ()); - while (svptest_any (cmp, p)) - { - float sx1 = svclastb (p, 0, x1); - float sx2 = svclastb (p, 0, x2); - float elem = svclastb (p, 0, y); - elem = powf_specialcase (sx1, sx2, elem); - svfloat32_t y2 = sv_f32 (elem); - y = svsel (p, y2, y); - p = svpnext_b32 (cmp, p); - } - return y; + return sv_call2_f32 (powf_specialcase, x1, x2, y, cmp); } /* Compute core for half of the lanes in double precision. */ @@ -182,30 +167,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k, /* Polynomial to approximate log1p(r)/ln2. */ svfloat64_t logx = A (0); - logx = svmla_x (pg, A (1), r, logx); - logx = svmla_x (pg, A (2), r, logx); - logx = svmla_x (pg, A (3), r, logx); - logx = svmla_x (pg, y0, r, logx); + logx = svmad_x (pg, r, logx, A (1)); + logx = svmad_x (pg, r, logx, A (2)); + logx = svmad_x (pg, r, logx, A (3)); + logx = svmad_x (pg, r, logx, y0); *pylogx = svmul_x (pg, y, logx); /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - svfloat64_t kd = svadd_x (pg, *pylogx, Shift); - svuint64_t ki = svreinterpret_u64 (kd); - kd = svsub_x (pg, kd, Shift); + svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx); + svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd)); r = svsub_x (pg, *pylogx, kd); /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */ - svuint64_t t - = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1)); - svuint64_t ski = svadd_x (pg, ki, sign_bias); - t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS)); + svuint64_t t = svld1_gather_index ( + svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1)); + svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias); + t = svadd_x (svptrue_b64 (), t, + svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS)); svfloat64_t s = svreinterpret_f64 (t); svfloat64_t p = C (0); p = svmla_x (pg, C (1), p, r); p = svmla_x (pg, C (2), p, r); - p = svmla_x (pg, s, p, svmul_x (pg, s, r)); + p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r)); return p; } @@ -219,19 +204,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, { const svbool_t ptrue = svptrue_b64 (); - /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in - order to perform core computation in double precision. */ + /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two + in order to perform core computation in double precision. */ const svbool_t pg_lo = svunpklo (pg); const svbool_t pg_hi = svunpkhi (pg); svfloat64_t y_lo = svcvt_f64_x ( ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); svfloat64_t y_hi = svcvt_f64_x ( ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); - svfloat32_t z = svreinterpret_f32 (iz); - svfloat64_t z_lo = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z)))); - svfloat64_t z_hi = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z)))); + svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz))); + svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz))); svuint64_t i_lo = svunpklo (i); svuint64_t i_hi = svunpkhi (i); svint64_t k_lo = svunpklo (k); @@ -258,9 +240,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, /* Implementation of SVE powf. Provides the same accuracy as AdvSIMD powf, since it relies on the same algorithm. The theoretical maximum error is under 2.60 ULPs. - Maximum measured error is 2.56 ULPs: - SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127 - want 0x1.fd4b06p+127. */ + Maximum measured error is 2.57 ULPs: + SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127 + want 0x1.fff862p+127. */ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) { const struct data *d = ptr_barrier (&data); @@ -269,21 +251,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) svuint32_t viy0 = svreinterpret_u32 (y); /* Negative x cases. */ - svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask); - svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask); + svbool_t xisneg = svcmplt (pg, x, sv_f32 (0)); /* Set sign_bias and ix depending on sign of x and nature of y. */ - svbool_t yisnotint_xisneg = svpfalse_b (); + svbool_t yint_or_xpos = pg; svuint32_t sign_bias = sv_u32 (0); svuint32_t vix = vix0; if (__glibc_unlikely (svptest_any (pg, xisneg))) { /* Determine nature of y. */ - yisnotint_xisneg = svisnotint (xisneg, y); - svbool_t yisint_xisneg = svisint (xisneg, y); + yint_or_xpos = svisint (xisneg, y); svbool_t yisodd_xisneg = svisodd (xisneg, y); /* ix set to abs(ix) if y is integer. */ - vix = svand_m (yisint_xisneg, vix0, 0x7fffffff); + vix = svand_m (yint_or_xpos, vix0, 0x7fffffff); /* Set to SignBias if x is negative and y is odd. */ sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0)); } @@ -294,8 +274,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) svbool_t cmp = svorr_z (pg, xspecial, yspecial); /* Small cases of x: |x| < 0x1p-126. */ - svbool_t xsmall = svaclt (pg, x, d->small_bound); - if (__glibc_unlikely (svptest_any (pg, xsmall))) + svbool_t xsmall = svaclt (yint_or_xpos, x, d->small_bound); + if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall))) { /* Normalize subnormal x so exponent becomes negative. */ svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm)); @@ -304,31 +284,34 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) vix = svsel (xsmall, vix_norm, vix); } /* Part of core computation carried in working precision. */ - svuint32_t tmp = svsub_x (pg, vix, d->off); - svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)), - V_POWF_LOG2_N - 1); - svuint32_t top = svand_x (pg, tmp, 0xff800000); - svuint32_t iz = svsub_x (pg, vix, top); - svint32_t k - = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS)); - - /* Compute core in extended precision and return intermediate ylogx results to - handle cases of underflow and underflow in exp. */ + svuint32_t tmp = svsub_x (yint_or_xpos, vix, d->off); + svuint32_t i = svand_x ( + yint_or_xpos, svlsr_x (yint_or_xpos, tmp, (23 - V_POWF_LOG2_TABLE_BITS)), + V_POWF_LOG2_N - 1); + svuint32_t top = svand_x (yint_or_xpos, tmp, 0xff800000); + svuint32_t iz = svsub_x (yint_or_xpos, vix, top); + svint32_t k = svasr_x (yint_or_xpos, svreinterpret_s32 (top), + (23 - V_POWF_EXP2_TABLE_BITS)); + + /* Compute core in extended precision and return intermediate ylogx results + to handle cases of underflow and overflow in exp. */ svfloat32_t ylogx; - svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d); + svfloat32_t ret + = sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d); /* Handle exp special cases of underflow and overflow. */ - svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS); + svuint32_t sign + = svlsl_x (yint_or_xpos, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS); svfloat32_t ret_oflow - = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY))); + = svreinterpret_f32 (svorr_x (yint_or_xpos, sign, asuint (INFINITY))); svfloat32_t ret_uflow = svreinterpret_f32 (sign); - ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret); - ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret); + ret = svsel (svcmple (yint_or_xpos, ylogx, d->uflow_bound), ret_uflow, ret); + ret = svsel (svcmpgt (yint_or_xpos, ylogx, d->oflow_bound), ret_oflow, ret); /* Cases of finite y and finite negative x. */ - ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret); + ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf (""))); - if (__glibc_unlikely (svptest_any (pg, cmp))) + if (__glibc_unlikely (svptest_any (cmp, cmp))) return sv_call_powf_sc (x, y, ret, cmp); return ret; diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c index a0d9d3b819..718125cbad 100644 --- a/sysdeps/aarch64/fpu/sin_advsimd.c +++ b/sysdeps/aarch64/fpu/sin_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float64x2_t poly[7]; - float64x2_t range_val, inv_pi, shift, pi_1, pi_2, pi_3; + float64x2_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7), V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19), @@ -34,12 +34,13 @@ static const struct data .pi_1 = V2 (0x1.921fb54442d18p+1), .pi_2 = V2 (0x1.1a62633145c06p-53), .pi_3 = V2 (0x1.c1cd129024e09p-106), - .shift = V2 (0x1.8p52), }; #if WANT_SIMD_EXCEPT -# define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255). */ -# define Thresh v_u64 (0x1160000000000000) /* RangeVal - TinyBound. */ +/* asuint64(0x1p-253)), below which multiply by inv_pi underflows. */ +# define TinyBound v_u64 (0x3020000000000000) +/* RangeVal - TinyBound. */ +# define Thresh v_u64 (0x1160000000000000) #endif #define C(i) d->poly[i] @@ -72,16 +73,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x) fenv). These lanes will be fixed by special-case handler later. */ uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x)); cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh); - r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x); + r = vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), cmp)); #else r = x; cmp = vcageq_f64 (x, d->range_val); #endif /* n = rint(|x|/pi). */ - n = vfmaq_f64 (d->shift, d->inv_pi, r); - odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63); - n = vsubq_f64 (n, d->shift); + n = vrndaq_f64 (vmulq_f64 (r, d->inv_pi)); + odd = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtq_s64_f64 (n)), 63); /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f64 (r, d->pi_1, n); diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c index 375dfc3331..6ee9a23d5b 100644 --- a/sysdeps/aarch64/fpu/sinf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinf_advsimd.c @@ -22,7 +22,7 @@ static const struct data { float32x4_t poly[4]; - float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3; + float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3; } data = { /* 1.886 ulp error. */ .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f), @@ -33,13 +33,14 @@ static const struct data .pi_3 = V4 (-0x1.ee59dap-49f), .inv_pi = V4 (0x1.45f306p-2f), - .shift = V4 (0x1.8p+23f), .range_val = V4 (0x1p20f) }; #if WANT_SIMD_EXCEPT -# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f). */ -# define Thresh v_u32 (0x28800000) /* RangeVal - TinyBound. */ +/* asuint32(0x1p-59f), below which multiply by inv_pi underflows. */ +# define TinyBound v_u32 (0x22000000) +/* RangeVal - TinyBound. */ +# define Thresh v_u32 (0x27800000) #endif #define C(i) d->poly[i] @@ -64,23 +65,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x) /* If fenv exceptions are to be triggered correctly, set any special lanes to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by special-case handler later. */ - r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x); + r = vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), cmp)); #else r = x; cmp = vcageq_f32 (x, d->range_val); #endif - /* n = rint(|x|/pi) */ - n = vfmaq_f32 (d->shift, d->inv_pi, r); - odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31); - n = vsubq_f32 (n, d->shift); + /* n = rint(|x|/pi). */ + n = vrndaq_f32 (vmulq_f32 (r, d->inv_pi)); + odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 31); - /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */ + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ r = vfmsq_f32 (r, d->pi_1, n); r = vfmsq_f32 (r, d->pi_2, n); r = vfmsq_f32 (r, d->pi_3, n); - /* y = sin(r) */ + /* y = sin(r). */ r2 = vmulq_f32 (r, r); y = vfmaq_f32 (C (2), C (3), r2); y = vfmaq_f32 (C (1), y, r2); diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c index df5f6c8c06..ac7b306018 100644 --- a/sysdeps/aarch64/fpu/sinh_sve.c +++ b/sysdeps/aarch64/fpu/sinh_sve.c @@ -18,90 +18,153 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[11]; - float64_t inv_ln2, m_ln2_hi, m_ln2_lo, shift; uint64_t halff; - int64_t onef; - uint64_t large_bound; + double c2, c4; + double inv_ln2; + double ln2_hi, ln2_lo; + double c0, c1, c3; + double shift, special_bound, bound; + uint64_t expm1_data[20]; } data = { - /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */ - .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5, - 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, - 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16, - 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22, - 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, }, - - .inv_ln2 = 0x1.71547652b82fep0, - .m_ln2_hi = -0x1.62e42fefa39efp-1, - .m_ln2_lo = -0x1.abc9e3b39803fp-56, - .shift = 0x1.8p52, - + /* Table lookup of 2^(i/64) - 1, for values of i from 0..19. */ + .expm1_data = { + 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901, + 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb, + 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2, + 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a, + 0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7, + }, + + /* Generated using Remez, in [-log(2)/128, log(2)/128]. */ + .c0 = 0x1p-1, + .c1 = 0x1.55555555548f9p-3, + .c2 = 0x1.5555555554c22p-5, + .c3 = 0x1.111123aaa2fb2p-7, + .c4 = 0x1.6c16d77d98e5bp-10, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .inv_ln2 = 0x1.71547652b82fep+0, + .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */ .halff = 0x3fe0000000000000, - .onef = 0x3ff0000000000000, - /* 2^9. expm1 helper overflows for large input. */ - .large_bound = 0x4080000000000000, + .special_bound = 0x1.62e37e7d8ba72p+9, /* ln(2^(1024 - 1/128)). */ + .bound = 0x1.a56ef8ec924ccp-3 /* 19*ln2/64. */ }; +/* A specialised FEXPA expm1 that is only valid for positive inputs and + has no special cases. Based off the full FEXPA expm1 implementated for + _ZGVsMxv_expm1, with a slightly modified file to keep sinh under 3.5ULP. */ static inline svfloat64_t -expm1_inline (svfloat64_t x, svbool_t pg) +expm1_inline (svbool_t pg, svfloat64_t x) { const struct data *d = ptr_barrier (&data); - /* Reduce argument: - exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 - where i = round(x / ln2) - and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */ - svfloat64_t j - = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift); - svint64_t i = svcvt_s64_x (pg, j); - svfloat64_t f = svmla_x (pg, x, j, d->m_ln2_hi); - f = svmla_x (pg, f, j, d->m_ln2_lo); - /* Approximate expm1(f) using polynomial. */ - svfloat64_t f2 = svmul_x (pg, f, f); - svfloat64_t f4 = svmul_x (pg, f2, f2); - svfloat64_t f8 = svmul_x (pg, f4, f4); - svfloat64_t p - = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly)); - /* t = 2^i. */ - svfloat64_t t = svscale_x (pg, sv_f64 (1), i); - /* expm1(x) ~= p * t + (t - 1). */ - return svmla_x (pg, svsub_x (pg, t, 1.0), p, t); + svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); + svuint64_t u = svreinterpret_u64 (z); + svfloat64_t n = svsub_x (pg, z, d->shift); + + svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); + + svfloat64_t r = x; + r = svmls_lane (r, n, ln2, 0); + r = svmls_lane (r, n, ln2, 1); + + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + + svfloat64_t p; + svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + p = svmad_x (pg, c34, r2, c12); + p = svmad_x (pg, p, r, sv_f64 (d->c0)); + p = svmad_x (pg, p, r2, r); + + svfloat64_t scale = svexpa (u); + + /* We want to construct expm1(x) = (scale - 1) + scale * poly. + However, for values of scale close to 1, scale-1 causes large ULP errors + due to cancellation. + + This can be circumvented by using a small lookup for scale-1 + when our input is below a certain bound, otherwise we can use FEXPA. */ + svbool_t is_small = svaclt (pg, x, d->bound); + + /* Index via the input of FEXPA, but we only care about the lower 5 bits. */ + svuint64_t base_idx = svand_x (pg, u, 0x1f); + + /* Compute scale - 1 from FEXPA, and lookup values where this fails. */ + svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0)); + svuint64_t scalem1_lookup + = svld1_gather_index (is_small, d->expm1_data, base_idx); + + /* Select the appropriate scale - 1 value based on x. */ + svfloat64_t scalem1 + = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate); + + /* return expm1 = scale - 1 + (scale * poly). */ + return svmla_x (pg, scalem1, scale, p); } +/* Vectorised special case to handle values past where exp_inline overflows. + Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double + the valid range of inputs, and returns inf for anything past that. */ static svfloat64_t NOINLINE -special_case (svfloat64_t x, svbool_t pg) +special_case (svbool_t pg, svbool_t special, svfloat64_t ax, + svfloat64_t halfsign, const struct data *d) { - return sv_call_f64 (sinh, x, x, pg); + /* Halves input value, and then check if any cases + are still going to overflow. */ + ax = svmul_x (special, ax, 0.5); + svbool_t is_safe = svaclt (special, ax, d->special_bound); + + svfloat64_t t = expm1_inline (pg, ax); + + /* Finish fastpass to compute values for non-special cases. */ + svfloat64_t y = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0))); + y = svmul_x (pg, y, halfsign); + + /* Computes special lane, and set remaining overflow lanes to inf. */ + svfloat64_t half_special_y = svmul_x (svptrue_b64 (), t, halfsign); + svfloat64_t special_y = svmul_x (svptrue_b64 (), half_special_y, t); + + svuint64_t signed_inf + = svorr_x (svptrue_b64 (), svreinterpret_u64 (halfsign), + sv_u64 (0x7ff0000000000000)); + special_y = svsel (is_safe, special_y, svreinterpret_f64 (signed_inf)); + + /* Join resulting vectors together and return. */ + return svsel (special, special_y, y); } -/* Approximation for SVE double-precision sinh(x) using expm1. - sinh(x) = (exp(x) - exp(-x)) / 2. - The greatest observed error is 2.57 ULP: - _ZGVsMxv_sinh (0x1.a008538399931p-2) got 0x1.ab929fc64bd66p-2 - want 0x1.ab929fc64bd63p-2. */ +/* Approximation for SVE double-precision sinh(x) using FEXPA expm1. + Uses sinh(x) = e^2x - 1 / 2e^x, rewritten for accuracy. + The greatest observed error in the non-special region is 2.63 + 0.5 ULP: + _ZGVsMxv_sinh (0x1.b5e0e13ba88aep-2) got 0x1.c3587faf97b0cp-2 + want 0x1.c3587faf97b09p-2 + + The greatest observed error in the special region is 2.65 + 0.5 ULP: + _ZGVsMxv_sinh (0x1.633ce847dab1ap+9) got 0x1.fffd30eea0066p+1023 + want 0x1.fffd30eea0063p+1023. */ svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); + svbool_t special = svacge (pg, x, d->special_bound); svfloat64_t ax = svabs_x (pg, x); svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax)); svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, d->halff)); - svbool_t special = svcmpge (pg, svreinterpret_u64 (ax), d->large_bound); - /* Fall back to scalar variant for all lanes if any are special. */ if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, pg); + return special_case (pg, special, ax, halfsign, d); /* Up to the point that expm1 overflows, we can use it to calculate sinh using a slight rearrangement of the definition of sinh. This allows us to retain acceptable accuracy for very small inputs. */ - svfloat64_t t = expm1_inline (ax, pg); + svfloat64_t t = expm1_inline (pg, ax); t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0))); return svmul_x (pg, t, halfsign); } diff --git a/sysdeps/aarch64/fpu/sinhf_advsimd.c b/sysdeps/aarch64/fpu/sinhf_advsimd.c index 6bb7482dc2..c6ed7598e7 100644 --- a/sysdeps/aarch64/fpu/sinhf_advsimd.c +++ b/sysdeps/aarch64/fpu/sinhf_advsimd.c @@ -23,15 +23,13 @@ static const struct data { struct v_expm1f_data expm1f_consts; - uint32x4_t halff; #if WANT_SIMD_EXCEPT uint32x4_t tiny_bound, thresh; #else - uint32x4_t oflow_bound; + float32x4_t oflow_bound; #endif } data = { .expm1f_consts = V_EXPM1F_DATA, - .halff = V4 (0x3f000000), #if WANT_SIMD_EXCEPT /* 0x1.6a09e8p-32, below which expm1f underflows. */ .tiny_bound = V4 (0x2fb504f4), @@ -39,14 +37,15 @@ static const struct data .thresh = V4 (0x12fbbbb3), #else /* 0x1.61814ep+6, above which expm1f helper overflows. */ - .oflow_bound = V4 (0x42b0c0a7), + .oflow_bound = V4 (0x1.61814ep+6), #endif }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, float32x4_t t, float32x4_t halfsign, + uint32x4_t special) { - return v_call_f32 (sinhf, x, y, special); + return v_call_f32 (sinhf, x, vmulq_f32 (t, halfsign), special); } /* Approximation for vector single-precision sinh(x) using expm1. @@ -60,15 +59,15 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x) uint32x4_t ix = vreinterpretq_u32_f32 (x); float32x4_t ax = vabsq_f32 (x); - uint32x4_t iax = vreinterpretq_u32_f32 (ax); - uint32x4_t sign = veorq_u32 (ix, iax); - float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff)); + float32x4_t halfsign = vreinterpretq_f32_u32 ( + vbslq_u32 (v_u32 (0x80000000), ix, vreinterpretq_u32_f32 (v_f32 (0.5)))); #if WANT_SIMD_EXCEPT - uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh); + uint32x4_t special = vcgeq_u32 ( + vsubq_u32 (vreinterpretq_u32_f32 (ax), d->tiny_bound), d->thresh); ax = v_zerofy_f32 (ax, special); #else - uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound); + uint32x4_t special = vcageq_f32 (x, d->oflow_bound); #endif /* Up to the point that expm1f overflows, we can use it to calculate sinhf @@ -80,7 +79,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x) /* Fall back to the scalar variant for any lanes that should trigger an exception. */ if (__glibc_unlikely (v_any_u32 (special))) - return special_case (x, vmulq_f32 (t, halfsign), special); + return special_case (x, t, halfsign, special); return vmulq_f32 (t, halfsign); } diff --git a/sysdeps/aarch64/fpu/sinhf_sve.c b/sysdeps/aarch64/fpu/sinhf_sve.c index 6c204b57a2..50dd386774 100644 --- a/sysdeps/aarch64/fpu/sinhf_sve.c +++ b/sysdeps/aarch64/fpu/sinhf_sve.c @@ -63,5 +63,5 @@ svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg) if (__glibc_unlikely (svptest_any (pg, special))) return special_case (x, svmul_x (pg, t, halfsign), special); - return svmul_x (pg, t, halfsign); + return svmul_x (svptrue_b32 (), t, halfsign); } diff --git a/sysdeps/aarch64/fpu/sv_erf_data.c b/sysdeps/aarch64/fpu/sv_erf_data.c deleted file mode 100644 index a53878f893..0000000000 --- a/sysdeps/aarch64/fpu/sv_erf_data.c +++ /dev/null @@ -1,1570 +0,0 @@ -/* Table for SVE erf approximation - - Copyright (C) 2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include "vecmath_config.h" - -/* Lookup table used in vector erf. - For each possible rounded input r (multiples of 1/128), between - r = 0.0 and r = 6.0 (769 values): - - the first entry __erf_data.tab.erf contains the values of erf(r), - - the second entry __erf_data.tab.scale contains the values of - 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the - algorithm, since lookup is performed only for x >= 1/64-1/512. */ -const struct sv_erf_data __sv_erf_data = { - .erf = { 0x0.0000000000000p+0, - 0x1.20dbf3deb1340p-7, - 0x1.20d77083f17a0p-6, - 0x1.b137e0cf584dcp-6, - 0x1.20c5645dd2538p-5, - 0x1.68e5d3bbc9526p-5, - 0x1.b0fafef135745p-5, - 0x1.f902a77bd3821p-5, - 0x1.207d480e90658p-4, - 0x1.44703e87e8593p-4, - 0x1.68591a1e83b5dp-4, - 0x1.8c36beb8a8d23p-4, - 0x1.b0081148a873ap-4, - 0x1.d3cbf7e70a4b3p-4, - 0x1.f78159ec8bb50p-4, - 0x1.0d939005f65e5p-3, - 0x1.1f5e1a35c3b89p-3, - 0x1.311fc15f56d14p-3, - 0x1.42d7fc2f64959p-3, - 0x1.548642321d7c6p-3, - 0x1.662a0bdf7a89fp-3, - 0x1.77c2d2a765f9ep-3, - 0x1.895010fdbdbfdp-3, - 0x1.9ad142662e14dp-3, - 0x1.ac45e37fe2526p-3, - 0x1.bdad72110a648p-3, - 0x1.cf076d1233237p-3, - 0x1.e05354b96ff36p-3, - 0x1.f190aa85540e2p-3, - 0x1.015f78a3dcf3dp-2, - 0x1.09eed6982b948p-2, - 0x1.127631eb8de32p-2, - 0x1.1af54e232d609p-2, - 0x1.236bef825d9a2p-2, - 0x1.2bd9db0f7827fp-2, - 0x1.343ed6989b7d9p-2, - 0x1.3c9aa8b84bedap-2, - 0x1.44ed18d9f6462p-2, - 0x1.4d35ef3e5372ep-2, - 0x1.5574f4ffac98ep-2, - 0x1.5da9f415ff23fp-2, - 0x1.65d4b75b00471p-2, - 0x1.6df50a8dff772p-2, - 0x1.760aba57a76bfp-2, - 0x1.7e15944d9d3e4p-2, - 0x1.861566f5fd3c0p-2, - 0x1.8e0a01cab516bp-2, - 0x1.95f3353cbb146p-2, - 0x1.9dd0d2b721f39p-2, - 0x1.a5a2aca209394p-2, - 0x1.ad68966569a87p-2, - 0x1.b522646bbda68p-2, - 0x1.bccfec24855b8p-2, - 0x1.c4710406a65fcp-2, - 0x1.cc058392a6d2dp-2, - 0x1.d38d4354c3bd0p-2, - 0x1.db081ce6e2a48p-2, - 0x1.e275eaf25e458p-2, - 0x1.e9d68931ae650p-2, - 0x1.f129d471eabb1p-2, - 0x1.f86faa9428f9dp-2, - 0x1.ffa7ea8eb5fd0p-2, - 0x1.03693a371519cp-1, - 0x1.06f794ab2cae7p-1, - 0x1.0a7ef5c18edd2p-1, - 0x1.0dff4f247f6c6p-1, - 0x1.1178930ada115p-1, - 0x1.14eab43841b55p-1, - 0x1.1855a5fd3dd50p-1, - 0x1.1bb95c3746199p-1, - 0x1.1f15cb50bc4dep-1, - 0x1.226ae840d4d70p-1, - 0x1.25b8a88b6dd7fp-1, - 0x1.28ff0240d52cdp-1, - 0x1.2c3debfd7d6c1p-1, - 0x1.2f755ce9a21f4p-1, - 0x1.32a54cb8db67bp-1, - 0x1.35cdb3a9a144dp-1, - 0x1.38ee8a84beb71p-1, - 0x1.3c07ca9cb4f9ep-1, - 0x1.3f196dcd0f135p-1, - 0x1.42236e79a5fa6p-1, - 0x1.4525c78dd5966p-1, - 0x1.4820747ba2dc2p-1, - 0x1.4b13713ad3513p-1, - 0x1.4dfeba47f63ccp-1, - 0x1.50e24ca35fd2cp-1, - 0x1.53be25d016a4fp-1, - 0x1.569243d2b3a9bp-1, - 0x1.595ea53035283p-1, - 0x1.5c2348ecc4dc3p-1, - 0x1.5ee02e8a71a53p-1, - 0x1.61955607dd15dp-1, - 0x1.6442bfdedd397p-1, - 0x1.66e86d0312e82p-1, - 0x1.69865ee075011p-1, - 0x1.6c1c9759d0e5fp-1, - 0x1.6eab18c74091bp-1, - 0x1.7131e5f496a5ap-1, - 0x1.73b1021fc0cb8p-1, - 0x1.762870f720c6fp-1, - 0x1.78983697dc96fp-1, - 0x1.7b00578c26037p-1, - 0x1.7d60d8c979f7bp-1, - 0x1.7fb9bfaed8078p-1, - 0x1.820b1202f27fbp-1, - 0x1.8454d5f25760dp-1, - 0x1.8697120d92a4ap-1, - 0x1.88d1cd474a2e0p-1, - 0x1.8b050ef253c37p-1, - 0x1.8d30debfc572ep-1, - 0x1.8f5544bd00c04p-1, - 0x1.91724951b8fc6p-1, - 0x1.9387f53df5238p-1, - 0x1.959651980da31p-1, - 0x1.979d67caa6631p-1, - 0x1.999d4192a5715p-1, - 0x1.9b95e8fd26abap-1, - 0x1.9d8768656cc42p-1, - 0x1.9f71ca72cffb6p-1, - 0x1.a1551a16aaeafp-1, - 0x1.a331628a45b92p-1, - 0x1.a506af4cc00f4p-1, - 0x1.a6d50c20fa293p-1, - 0x1.a89c850b7d54dp-1, - 0x1.aa5d265064366p-1, - 0x1.ac16fc7143263p-1, - 0x1.adca142b10f98p-1, - 0x1.af767a741088bp-1, - 0x1.b11c3c79bb424p-1, - 0x1.b2bb679ead19cp-1, - 0x1.b4540978921eep-1, - 0x1.b5e62fce16095p-1, - 0x1.b771e894d602ep-1, - 0x1.b8f741ef54f83p-1, - 0x1.ba764a2af2b78p-1, - 0x1.bbef0fbde6221p-1, - 0x1.bd61a1453ab44p-1, - 0x1.bece0d82d1a5cp-1, - 0x1.c034635b66e23p-1, - 0x1.c194b1d49a184p-1, - 0x1.c2ef0812fc1bdp-1, - 0x1.c443755820d64p-1, - 0x1.c5920900b5fd1p-1, - 0x1.c6dad2829ec62p-1, - 0x1.c81de16b14cefp-1, - 0x1.c95b455cce69dp-1, - 0x1.ca930e0e2a825p-1, - 0x1.cbc54b476248dp-1, - 0x1.ccf20ce0c0d27p-1, - 0x1.ce1962c0e0d8bp-1, - 0x1.cf3b5cdaf0c39p-1, - 0x1.d0580b2cfd249p-1, - 0x1.d16f7dbe41ca0p-1, - 0x1.d281c49d818d0p-1, - 0x1.d38eefdf64fddp-1, - 0x1.d4970f9ce00d9p-1, - 0x1.d59a33f19ed42p-1, - 0x1.d6986cfa798e7p-1, - 0x1.d791cad3eff01p-1, - 0x1.d8865d98abe01p-1, - 0x1.d97635600bb89p-1, - 0x1.da61623cb41e0p-1, - 0x1.db47f43b2980dp-1, - 0x1.dc29fb60715afp-1, - 0x1.dd0787a8bb39dp-1, - 0x1.dde0a90611a0dp-1, - 0x1.deb56f5f12d28p-1, - 0x1.df85ea8db188ep-1, - 0x1.e0522a5dfda73p-1, - 0x1.e11a3e8cf4eb8p-1, - 0x1.e1de36c75ba58p-1, - 0x1.e29e22a89d766p-1, - 0x1.e35a11b9b61cep-1, - 0x1.e4121370224ccp-1, - 0x1.e4c6372cd8927p-1, - 0x1.e5768c3b4a3fcp-1, - 0x1.e62321d06c5e0p-1, - 0x1.e6cc0709c8a0dp-1, - 0x1.e7714aec96534p-1, - 0x1.e812fc64db369p-1, - 0x1.e8b12a44944a8p-1, - 0x1.e94be342e6743p-1, - 0x1.e9e335fb56f87p-1, - 0x1.ea7730ed0bbb9p-1, - 0x1.eb07e27a133aap-1, - 0x1.eb9558e6b42cep-1, - 0x1.ec1fa258c4beap-1, - 0x1.eca6ccd709544p-1, - 0x1.ed2ae6489ac1ep-1, - 0x1.edabfc7453e63p-1, - 0x1.ee2a1d004692cp-1, - 0x1.eea5557137ae0p-1, - 0x1.ef1db32a2277cp-1, - 0x1.ef93436bc2daap-1, - 0x1.f006135426b26p-1, - 0x1.f0762fde45ee6p-1, - 0x1.f0e3a5e1a1788p-1, - 0x1.f14e8211e8c55p-1, - 0x1.f1b6d0fea5f4dp-1, - 0x1.f21c9f12f0677p-1, - 0x1.f27ff89525acfp-1, - 0x1.f2e0e9a6a8b09p-1, - 0x1.f33f7e43a706bp-1, - 0x1.f39bc242e43e6p-1, - 0x1.f3f5c1558b19ep-1, - 0x1.f44d870704911p-1, - 0x1.f4a31ebcd47dfp-1, - 0x1.f4f693b67bd77p-1, - 0x1.f547f10d60597p-1, - 0x1.f59741b4b97cfp-1, - 0x1.f5e4907982a07p-1, - 0x1.f62fe80272419p-1, - 0x1.f67952cff6282p-1, - 0x1.f6c0db3c34641p-1, - 0x1.f7068b7b10fd9p-1, - 0x1.f74a6d9a38383p-1, - 0x1.f78c8b812d498p-1, - 0x1.f7cceef15d631p-1, - 0x1.f80ba18636f07p-1, - 0x1.f848acb544e95p-1, - 0x1.f88419ce4e184p-1, - 0x1.f8bdf1fb78370p-1, - 0x1.f8f63e416ebffp-1, - 0x1.f92d077f8d56dp-1, - 0x1.f96256700da8ep-1, - 0x1.f99633a838a57p-1, - 0x1.f9c8a7989af0dp-1, - 0x1.f9f9ba8d3c733p-1, - 0x1.fa2974addae45p-1, - 0x1.fa57ddfe27376p-1, - 0x1.fa84fe5e05c8dp-1, - 0x1.fab0dd89d1309p-1, - 0x1.fadb831a9f9c3p-1, - 0x1.fb04f6868a944p-1, - 0x1.fb2d3f20f9101p-1, - 0x1.fb54641aebbc9p-1, - 0x1.fb7a6c834b5a2p-1, - 0x1.fb9f5f4739170p-1, - 0x1.fbc3433260ca5p-1, - 0x1.fbe61eef4cf6ap-1, - 0x1.fc07f907bc794p-1, - 0x1.fc28d7e4f9cd0p-1, - 0x1.fc48c1d033c7ap-1, - 0x1.fc67bcf2d7b8fp-1, - 0x1.fc85cf56ecd38p-1, - 0x1.fca2fee770c79p-1, - 0x1.fcbf5170b578bp-1, - 0x1.fcdacca0bfb73p-1, - 0x1.fcf57607a6e7cp-1, - 0x1.fd0f5317f582fp-1, - 0x1.fd2869270a56fp-1, - 0x1.fd40bd6d7a785p-1, - 0x1.fd58550773cb5p-1, - 0x1.fd6f34f52013ap-1, - 0x1.fd85621b0876dp-1, - 0x1.fd9ae142795e3p-1, - 0x1.fdafb719e6a69p-1, - 0x1.fdc3e835500b3p-1, - 0x1.fdd7790ea5bc0p-1, - 0x1.fdea6e062d0c9p-1, - 0x1.fdfccb62e52d3p-1, - 0x1.fe0e9552ebdd6p-1, - 0x1.fe1fcfebe2083p-1, - 0x1.fe307f2b503d0p-1, - 0x1.fe40a6f70af4bp-1, - 0x1.fe504b1d9696cp-1, - 0x1.fe5f6f568b301p-1, - 0x1.fe6e1742f7cf6p-1, - 0x1.fe7c466dc57a1p-1, - 0x1.fe8a004c19ae6p-1, - 0x1.fe97483db8670p-1, - 0x1.fea4218d6594ap-1, - 0x1.feb08f7146046p-1, - 0x1.febc950b3fa75p-1, - 0x1.fec835695932ep-1, - 0x1.fed37386190fbp-1, - 0x1.fede5248e38f4p-1, - 0x1.fee8d486585eep-1, - 0x1.fef2fd00af31ap-1, - 0x1.fefcce6813974p-1, - 0x1.ff064b5afffbep-1, - 0x1.ff0f766697c76p-1, - 0x1.ff18520700971p-1, - 0x1.ff20e0a7ba8c2p-1, - 0x1.ff2924a3f7a83p-1, - 0x1.ff312046f2339p-1, - 0x1.ff38d5cc4227fp-1, - 0x1.ff404760319b4p-1, - 0x1.ff47772010262p-1, - 0x1.ff4e671a85425p-1, - 0x1.ff55194fe19dfp-1, - 0x1.ff5b8fb26f5f6p-1, - 0x1.ff61cc26c1578p-1, - 0x1.ff67d08401202p-1, - 0x1.ff6d9e943c231p-1, - 0x1.ff733814af88cp-1, - 0x1.ff789eb6130c9p-1, - 0x1.ff7dd41ce2b4dp-1, - 0x1.ff82d9e1a76d8p-1, - 0x1.ff87b1913e853p-1, - 0x1.ff8c5cad200a5p-1, - 0x1.ff90dcaba4096p-1, - 0x1.ff9532f846ab0p-1, - 0x1.ff9960f3eb327p-1, - 0x1.ff9d67f51ddbap-1, - 0x1.ffa14948549a7p-1, - 0x1.ffa506302ebaep-1, - 0x1.ffa89fe5b3625p-1, - 0x1.ffac17988ef4bp-1, - 0x1.ffaf6e6f4f5c0p-1, - 0x1.ffb2a5879f35ep-1, - 0x1.ffb5bdf67fe6fp-1, - 0x1.ffb8b8c88295fp-1, - 0x1.ffbb970200110p-1, - 0x1.ffbe599f4f9d9p-1, - 0x1.ffc10194fcb64p-1, - 0x1.ffc38fcffbb7cp-1, - 0x1.ffc60535dd7f5p-1, - 0x1.ffc862a501fd7p-1, - 0x1.ffcaa8f4c9beap-1, - 0x1.ffccd8f5c66d1p-1, - 0x1.ffcef371ea4d7p-1, - 0x1.ffd0f92cb6ba7p-1, - 0x1.ffd2eae369a07p-1, - 0x1.ffd4c94d29fdbp-1, - 0x1.ffd6951b33686p-1, - 0x1.ffd84ef9009eep-1, - 0x1.ffd9f78c7524ap-1, - 0x1.ffdb8f7605ee7p-1, - 0x1.ffdd1750e1220p-1, - 0x1.ffde8fb314ebfp-1, - 0x1.ffdff92db56e5p-1, - 0x1.ffe1544d01ccbp-1, - 0x1.ffe2a1988857cp-1, - 0x1.ffe3e19349dc7p-1, - 0x1.ffe514bbdc197p-1, - 0x1.ffe63b8c8b5f7p-1, - 0x1.ffe7567b7b5e1p-1, - 0x1.ffe865fac722bp-1, - 0x1.ffe96a78a04a9p-1, - 0x1.ffea645f6d6dap-1, - 0x1.ffeb5415e7c44p-1, - 0x1.ffec39ff380b9p-1, - 0x1.ffed167b12ac2p-1, - 0x1.ffede9e5d3262p-1, - 0x1.ffeeb49896c6dp-1, - 0x1.ffef76e956a9fp-1, - 0x1.fff0312b010b5p-1, - 0x1.fff0e3ad91ec2p-1, - 0x1.fff18ebe2b0e1p-1, - 0x1.fff232a72b48ep-1, - 0x1.fff2cfb0453d9p-1, - 0x1.fff3661e9569dp-1, - 0x1.fff3f634b79f9p-1, - 0x1.fff48032dbe40p-1, - 0x1.fff50456dab8cp-1, - 0x1.fff582dc48d30p-1, - 0x1.fff5fbfc8a439p-1, - 0x1.fff66feee5129p-1, - 0x1.fff6dee89352ep-1, - 0x1.fff7491cd4af6p-1, - 0x1.fff7aebcff755p-1, - 0x1.fff80ff8911fdp-1, - 0x1.fff86cfd3e657p-1, - 0x1.fff8c5f702ccfp-1, - 0x1.fff91b102fca8p-1, - 0x1.fff96c717b695p-1, - 0x1.fff9ba420e834p-1, - 0x1.fffa04a7928b1p-1, - 0x1.fffa4bc63ee9ap-1, - 0x1.fffa8fc0e5f33p-1, - 0x1.fffad0b901755p-1, - 0x1.fffb0ecebee1bp-1, - 0x1.fffb4a210b172p-1, - 0x1.fffb82cd9dcbfp-1, - 0x1.fffbb8f1049c6p-1, - 0x1.fffbeca6adbe9p-1, - 0x1.fffc1e08f25f5p-1, - 0x1.fffc4d3120aa1p-1, - 0x1.fffc7a37857d2p-1, - 0x1.fffca53375ce3p-1, - 0x1.fffcce3b57bffp-1, - 0x1.fffcf564ab6b7p-1, - 0x1.fffd1ac4135f9p-1, - 0x1.fffd3e6d5cd87p-1, - 0x1.fffd607387b07p-1, - 0x1.fffd80e8ce0dap-1, - 0x1.fffd9fdeabccep-1, - 0x1.fffdbd65e5ad0p-1, - 0x1.fffdd98e903b2p-1, - 0x1.fffdf46816833p-1, - 0x1.fffe0e0140857p-1, - 0x1.fffe26683972ap-1, - 0x1.fffe3daa95b18p-1, - 0x1.fffe53d558ae9p-1, - 0x1.fffe68f4fa777p-1, - 0x1.fffe7d156d244p-1, - 0x1.fffe904222101p-1, - 0x1.fffea2860ee1ep-1, - 0x1.fffeb3ebb267bp-1, - 0x1.fffec47d19457p-1, - 0x1.fffed443e2787p-1, - 0x1.fffee34943b15p-1, - 0x1.fffef1960d85dp-1, - 0x1.fffeff32af7afp-1, - 0x1.ffff0c273bea2p-1, - 0x1.ffff187b6bc0ep-1, - 0x1.ffff2436a21dcp-1, - 0x1.ffff2f5fefcaap-1, - 0x1.ffff39fe16963p-1, - 0x1.ffff44178c8d2p-1, - 0x1.ffff4db27f146p-1, - 0x1.ffff56d4d5e5ep-1, - 0x1.ffff5f8435efcp-1, - 0x1.ffff67c604180p-1, - 0x1.ffff6f9f67e55p-1, - 0x1.ffff77154e0d6p-1, - 0x1.ffff7e2c6aea2p-1, - 0x1.ffff84e93cd75p-1, - 0x1.ffff8b500e77cp-1, - 0x1.ffff9164f8e46p-1, - 0x1.ffff972be5c59p-1, - 0x1.ffff9ca891572p-1, - 0x1.ffffa1de8c582p-1, - 0x1.ffffa6d13de73p-1, - 0x1.ffffab83e54b8p-1, - 0x1.ffffaff99bac4p-1, - 0x1.ffffb43555b5fp-1, - 0x1.ffffb839e52f3p-1, - 0x1.ffffbc09fa7cdp-1, - 0x1.ffffbfa82616bp-1, - 0x1.ffffc316d9ed0p-1, - 0x1.ffffc6586abf6p-1, - 0x1.ffffc96f1165ep-1, - 0x1.ffffcc5cec0c1p-1, - 0x1.ffffcf23ff5fcp-1, - 0x1.ffffd1c637b2bp-1, - 0x1.ffffd4456a10dp-1, - 0x1.ffffd6a3554a1p-1, - 0x1.ffffd8e1a2f22p-1, - 0x1.ffffdb01e8546p-1, - 0x1.ffffdd05a75eap-1, - 0x1.ffffdeee4f810p-1, - 0x1.ffffe0bd3e852p-1, - 0x1.ffffe273c15b7p-1, - 0x1.ffffe41314e06p-1, - 0x1.ffffe59c6698bp-1, - 0x1.ffffe710d565ep-1, - 0x1.ffffe8717232dp-1, - 0x1.ffffe9bf4098cp-1, - 0x1.ffffeafb377d5p-1, - 0x1.ffffec2641a9ep-1, - 0x1.ffffed413e5b7p-1, - 0x1.ffffee4d01cd6p-1, - 0x1.ffffef4a55bd4p-1, - 0x1.fffff039f9e8fp-1, - 0x1.fffff11ca4876p-1, - 0x1.fffff1f302bc1p-1, - 0x1.fffff2bdb904dp-1, - 0x1.fffff37d63a36p-1, - 0x1.fffff43297019p-1, - 0x1.fffff4dde0118p-1, - 0x1.fffff57fc4a95p-1, - 0x1.fffff618c3da6p-1, - 0x1.fffff6a956450p-1, - 0x1.fffff731ee681p-1, - 0x1.fffff7b2f8ed6p-1, - 0x1.fffff82cdcf1bp-1, - 0x1.fffff89ffc4aap-1, - 0x1.fffff90cb3c81p-1, - 0x1.fffff9735b73bp-1, - 0x1.fffff9d446cccp-1, - 0x1.fffffa2fc5015p-1, - 0x1.fffffa8621251p-1, - 0x1.fffffad7a2652p-1, - 0x1.fffffb248c39dp-1, - 0x1.fffffb6d1e95dp-1, - 0x1.fffffbb196132p-1, - 0x1.fffffbf22c1e2p-1, - 0x1.fffffc2f171e3p-1, - 0x1.fffffc688a9cfp-1, - 0x1.fffffc9eb76acp-1, - 0x1.fffffcd1cbc28p-1, - 0x1.fffffd01f36afp-1, - 0x1.fffffd2f57d68p-1, - 0x1.fffffd5a2041fp-1, - 0x1.fffffd8271d12p-1, - 0x1.fffffda86faa9p-1, - 0x1.fffffdcc3b117p-1, - 0x1.fffffdedf37edp-1, - 0x1.fffffe0db6b91p-1, - 0x1.fffffe2ba0ea5p-1, - 0x1.fffffe47ccb60p-1, - 0x1.fffffe62534d4p-1, - 0x1.fffffe7b4c81ep-1, - 0x1.fffffe92ced93p-1, - 0x1.fffffea8ef9cfp-1, - 0x1.fffffebdc2ec6p-1, - 0x1.fffffed15bcbap-1, - 0x1.fffffee3cc32cp-1, - 0x1.fffffef5251c2p-1, - 0x1.ffffff0576917p-1, - 0x1.ffffff14cfb92p-1, - 0x1.ffffff233ee1dp-1, - 0x1.ffffff30d18e8p-1, - 0x1.ffffff3d9480fp-1, - 0x1.ffffff4993c46p-1, - 0x1.ffffff54dab72p-1, - 0x1.ffffff5f74141p-1, - 0x1.ffffff6969fb8p-1, - 0x1.ffffff72c5fb6p-1, - 0x1.ffffff7b91176p-1, - 0x1.ffffff83d3d07p-1, - 0x1.ffffff8b962bep-1, - 0x1.ffffff92dfba2p-1, - 0x1.ffffff99b79d2p-1, - 0x1.ffffffa0248e8p-1, - 0x1.ffffffa62ce54p-1, - 0x1.ffffffabd69b4p-1, - 0x1.ffffffb127525p-1, - 0x1.ffffffb624592p-1, - 0x1.ffffffbad2affp-1, - 0x1.ffffffbf370cdp-1, - 0x1.ffffffc355dfdp-1, - 0x1.ffffffc733572p-1, - 0x1.ffffffcad3626p-1, - 0x1.ffffffce39b67p-1, - 0x1.ffffffd169d0cp-1, - 0x1.ffffffd466fa5p-1, - 0x1.ffffffd7344aap-1, - 0x1.ffffffd9d4aabp-1, - 0x1.ffffffdc4ad7ap-1, - 0x1.ffffffde9964ep-1, - 0x1.ffffffe0c2bf0p-1, - 0x1.ffffffe2c92dbp-1, - 0x1.ffffffe4aed5ep-1, - 0x1.ffffffe675bbdp-1, - 0x1.ffffffe81fc4ep-1, - 0x1.ffffffe9aeb97p-1, - 0x1.ffffffeb24467p-1, - 0x1.ffffffec81ff2p-1, - 0x1.ffffffedc95e7p-1, - 0x1.ffffffeefbc85p-1, - 0x1.fffffff01a8b6p-1, - 0x1.fffffff126e1ep-1, - 0x1.fffffff221f30p-1, - 0x1.fffffff30cd3fp-1, - 0x1.fffffff3e8892p-1, - 0x1.fffffff4b606fp-1, - 0x1.fffffff57632dp-1, - 0x1.fffffff629e44p-1, - 0x1.fffffff6d1e56p-1, - 0x1.fffffff76ef3fp-1, - 0x1.fffffff801c1fp-1, - 0x1.fffffff88af67p-1, - 0x1.fffffff90b2e3p-1, - 0x1.fffffff982fc1p-1, - 0x1.fffffff9f2e9fp-1, - 0x1.fffffffa5b790p-1, - 0x1.fffffffabd229p-1, - 0x1.fffffffb18582p-1, - 0x1.fffffffb6d844p-1, - 0x1.fffffffbbd0aap-1, - 0x1.fffffffc0748fp-1, - 0x1.fffffffc4c96cp-1, - 0x1.fffffffc8d462p-1, - 0x1.fffffffcc9a41p-1, - 0x1.fffffffd01f89p-1, - 0x1.fffffffd36871p-1, - 0x1.fffffffd678edp-1, - 0x1.fffffffd954aep-1, - 0x1.fffffffdbff2ap-1, - 0x1.fffffffde7ba0p-1, - 0x1.fffffffe0cd16p-1, - 0x1.fffffffe2f664p-1, - 0x1.fffffffe4fa30p-1, - 0x1.fffffffe6daf7p-1, - 0x1.fffffffe89b0cp-1, - 0x1.fffffffea3c9ap-1, - 0x1.fffffffebc1a9p-1, - 0x1.fffffffed2c21p-1, - 0x1.fffffffee7dc8p-1, - 0x1.fffffffefb847p-1, - 0x1.ffffffff0dd2bp-1, - 0x1.ffffffff1ede9p-1, - 0x1.ffffffff2ebdap-1, - 0x1.ffffffff3d843p-1, - 0x1.ffffffff4b453p-1, - 0x1.ffffffff58126p-1, - 0x1.ffffffff63fc3p-1, - 0x1.ffffffff6f121p-1, - 0x1.ffffffff79626p-1, - 0x1.ffffffff82fabp-1, - 0x1.ffffffff8be77p-1, - 0x1.ffffffff94346p-1, - 0x1.ffffffff9bec8p-1, - 0x1.ffffffffa319fp-1, - 0x1.ffffffffa9c63p-1, - 0x1.ffffffffaffa4p-1, - 0x1.ffffffffb5be5p-1, - 0x1.ffffffffbb1a2p-1, - 0x1.ffffffffc014ep-1, - 0x1.ffffffffc4b56p-1, - 0x1.ffffffffc901cp-1, - 0x1.ffffffffccfffp-1, - 0x1.ffffffffd0b56p-1, - 0x1.ffffffffd4271p-1, - 0x1.ffffffffd759dp-1, - 0x1.ffffffffda520p-1, - 0x1.ffffffffdd13cp-1, - 0x1.ffffffffdfa2dp-1, - 0x1.ffffffffe202dp-1, - 0x1.ffffffffe4371p-1, - 0x1.ffffffffe642ap-1, - 0x1.ffffffffe8286p-1, - 0x1.ffffffffe9eb0p-1, - 0x1.ffffffffeb8d0p-1, - 0x1.ffffffffed10ap-1, - 0x1.ffffffffee782p-1, - 0x1.ffffffffefc57p-1, - 0x1.fffffffff0fa7p-1, - 0x1.fffffffff218fp-1, - 0x1.fffffffff3227p-1, - 0x1.fffffffff4188p-1, - 0x1.fffffffff4fc9p-1, - 0x1.fffffffff5cfdp-1, - 0x1.fffffffff6939p-1, - 0x1.fffffffff748ep-1, - 0x1.fffffffff7f0dp-1, - 0x1.fffffffff88c5p-1, - 0x1.fffffffff91c6p-1, - 0x1.fffffffff9a1bp-1, - 0x1.fffffffffa1d2p-1, - 0x1.fffffffffa8f6p-1, - 0x1.fffffffffaf92p-1, - 0x1.fffffffffb5b0p-1, - 0x1.fffffffffbb58p-1, - 0x1.fffffffffc095p-1, - 0x1.fffffffffc56dp-1, - 0x1.fffffffffc9e8p-1, - 0x1.fffffffffce0dp-1, - 0x1.fffffffffd1e1p-1, - 0x1.fffffffffd56cp-1, - 0x1.fffffffffd8b3p-1, - 0x1.fffffffffdbbap-1, - 0x1.fffffffffde86p-1, - 0x1.fffffffffe11dp-1, - 0x1.fffffffffe380p-1, - 0x1.fffffffffe5b6p-1, - 0x1.fffffffffe7c0p-1, - 0x1.fffffffffe9a2p-1, - 0x1.fffffffffeb60p-1, - 0x1.fffffffffecfbp-1, - 0x1.fffffffffee77p-1, - 0x1.fffffffffefd6p-1, - 0x1.ffffffffff11ap-1, - 0x1.ffffffffff245p-1, - 0x1.ffffffffff359p-1, - 0x1.ffffffffff457p-1, - 0x1.ffffffffff542p-1, - 0x1.ffffffffff61bp-1, - 0x1.ffffffffff6e3p-1, - 0x1.ffffffffff79bp-1, - 0x1.ffffffffff845p-1, - 0x1.ffffffffff8e2p-1, - 0x1.ffffffffff973p-1, - 0x1.ffffffffff9f8p-1, - 0x1.ffffffffffa73p-1, - 0x1.ffffffffffae4p-1, - 0x1.ffffffffffb4cp-1, - 0x1.ffffffffffbadp-1, - 0x1.ffffffffffc05p-1, - 0x1.ffffffffffc57p-1, - 0x1.ffffffffffca2p-1, - 0x1.ffffffffffce7p-1, - 0x1.ffffffffffd27p-1, - 0x1.ffffffffffd62p-1, - 0x1.ffffffffffd98p-1, - 0x1.ffffffffffdcap-1, - 0x1.ffffffffffdf8p-1, - 0x1.ffffffffffe22p-1, - 0x1.ffffffffffe49p-1, - 0x1.ffffffffffe6cp-1, - 0x1.ffffffffffe8dp-1, - 0x1.ffffffffffeabp-1, - 0x1.ffffffffffec7p-1, - 0x1.ffffffffffee1p-1, - 0x1.ffffffffffef8p-1, - 0x1.fffffffffff0ep-1, - 0x1.fffffffffff22p-1, - 0x1.fffffffffff34p-1, - 0x1.fffffffffff45p-1, - 0x1.fffffffffff54p-1, - 0x1.fffffffffff62p-1, - 0x1.fffffffffff6fp-1, - 0x1.fffffffffff7bp-1, - 0x1.fffffffffff86p-1, - 0x1.fffffffffff90p-1, - 0x1.fffffffffff9ap-1, - 0x1.fffffffffffa2p-1, - 0x1.fffffffffffaap-1, - 0x1.fffffffffffb1p-1, - 0x1.fffffffffffb8p-1, - 0x1.fffffffffffbep-1, - 0x1.fffffffffffc3p-1, - 0x1.fffffffffffc8p-1, - 0x1.fffffffffffcdp-1, - 0x1.fffffffffffd1p-1, - 0x1.fffffffffffd5p-1, - 0x1.fffffffffffd9p-1, - 0x1.fffffffffffdcp-1, - 0x1.fffffffffffdfp-1, - 0x1.fffffffffffe2p-1, - 0x1.fffffffffffe4p-1, - 0x1.fffffffffffe7p-1, - 0x1.fffffffffffe9p-1, - 0x1.fffffffffffebp-1, - 0x1.fffffffffffedp-1, - 0x1.fffffffffffeep-1, - 0x1.ffffffffffff0p-1, - 0x1.ffffffffffff1p-1, - 0x1.ffffffffffff3p-1, - 0x1.ffffffffffff4p-1, - 0x1.ffffffffffff5p-1, - 0x1.ffffffffffff6p-1, - 0x1.ffffffffffff7p-1, - 0x1.ffffffffffff7p-1, - 0x1.ffffffffffff8p-1, - 0x1.ffffffffffff9p-1, - 0x1.ffffffffffff9p-1, - 0x1.ffffffffffffap-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffbp-1, - 0x1.ffffffffffffcp-1, - 0x1.ffffffffffffcp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffdp-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.ffffffffffffep-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.fffffffffffffp-1, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - 0x1.0000000000000p+0, - }, - .scale = { 0x1.20dd750429b6dp+0, - 0x1.20d8f1975c85dp+0, - 0x1.20cb67bd452c7p+0, - 0x1.20b4d8bac36c1p+0, - 0x1.209546ad13ccfp+0, - 0x1.206cb4897b148p+0, - 0x1.203b261cd0052p+0, - 0x1.2000a00ae3804p+0, - 0x1.1fbd27cdc72d3p+0, - 0x1.1f70c3b4f2cc7p+0, - 0x1.1f1b7ae44867fp+0, - 0x1.1ebd5552f795bp+0, - 0x1.1e565bca400d4p+0, - 0x1.1de697e413d28p+0, - 0x1.1d6e14099944ap+0, - 0x1.1cecdb718d61cp+0, - 0x1.1c62fa1e869b6p+0, - 0x1.1bd07cdd189acp+0, - 0x1.1b357141d95d5p+0, - 0x1.1a91e5a748165p+0, - 0x1.19e5e92b964abp+0, - 0x1.19318bae53a04p+0, - 0x1.1874ddcdfce24p+0, - 0x1.17aff0e56ec10p+0, - 0x1.16e2d7093cd8cp+0, - 0x1.160da304ed92fp+0, - 0x1.153068581b781p+0, - 0x1.144b3b337c90cp+0, - 0x1.135e3075d076bp+0, - 0x1.12695da8b5bdep+0, - 0x1.116cd8fd67618p+0, - 0x1.1068b94962e5ep+0, - 0x1.0f5d1602f7e41p+0, - 0x1.0e4a073dc1b91p+0, - 0x1.0d2fa5a70c168p+0, - 0x1.0c0e0a8223359p+0, - 0x1.0ae54fa490722p+0, - 0x1.09b58f724416bp+0, - 0x1.087ee4d9ad247p+0, - 0x1.07416b4fbfe7cp+0, - 0x1.05fd3ecbec297p+0, - 0x1.04b27bc403d30p+0, - 0x1.03613f2812dafp+0, - 0x1.0209a65e29545p+0, - 0x1.00abcf3e187a9p+0, - 0x1.fe8fb01a47307p-1, - 0x1.fbbbbef34b4b2p-1, - 0x1.f8dc092d58ff8p-1, - 0x1.f5f0cdaf15313p-1, - 0x1.f2fa4c16c0019p-1, - 0x1.eff8c4b1375dbp-1, - 0x1.ecec7870ebca7p-1, - 0x1.e9d5a8e4c934ep-1, - 0x1.e6b4982f158b9p-1, - 0x1.e38988fc46e72p-1, - 0x1.e054be79d3042p-1, - 0x1.dd167c4cf9d2ap-1, - 0x1.d9cf06898cdafp-1, - 0x1.d67ea1a8b5368p-1, - 0x1.d325927fb9d89p-1, - 0x1.cfc41e36c7df9p-1, - 0x1.cc5a8a3fbea40p-1, - 0x1.c8e91c4d01368p-1, - 0x1.c5701a484ef9dp-1, - 0x1.c1efca49a5011p-1, - 0x1.be68728e29d5dp-1, - 0x1.bada596f25436p-1, - 0x1.b745c55905bf8p-1, - 0x1.b3aafcc27502ep-1, - 0x1.b00a46237d5bep-1, - 0x1.ac63e7ecc1411p-1, - 0x1.a8b8287ec6a09p-1, - 0x1.a5074e2157620p-1, - 0x1.a1519efaf889ep-1, - 0x1.9d97610879642p-1, - 0x1.99d8da149c13fp-1, - 0x1.96164fafd8de3p-1, - 0x1.925007283d7aap-1, - 0x1.8e86458169af8p-1, - 0x1.8ab94f6caa71dp-1, - 0x1.86e9694134b9ep-1, - 0x1.8316d6f48133dp-1, - 0x1.7f41dc12c9e89p-1, - 0x1.7b6abbb7aaf19p-1, - 0x1.7791b886e7403p-1, - 0x1.73b714a552763p-1, - 0x1.6fdb11b1e0c34p-1, - 0x1.6bfdf0beddaf5p-1, - 0x1.681ff24b4ab04p-1, - 0x1.6441563c665d4p-1, - 0x1.60625bd75d07bp-1, - 0x1.5c8341bb23767p-1, - 0x1.58a445da7c74cp-1, - 0x1.54c5a57629db0p-1, - 0x1.50e79d1749ac9p-1, - 0x1.4d0a6889dfd9fp-1, - 0x1.492e42d78d2c5p-1, - 0x1.4553664273d24p-1, - 0x1.417a0c4049fd0p-1, - 0x1.3da26d759aef5p-1, - 0x1.39ccc1b136d5ap-1, - 0x1.35f93fe7d1b3dp-1, - 0x1.32281e2fd1a92p-1, - 0x1.2e5991bd4cbfcp-1, - 0x1.2a8dcede3673bp-1, - 0x1.26c508f6bd0ffp-1, - 0x1.22ff727dd6f7bp-1, - 0x1.1f3d3cf9ffe5ap-1, - 0x1.1b7e98fe26217p-1, - 0x1.17c3b626c7a11p-1, - 0x1.140cc3173f007p-1, - 0x1.1059ed7740313p-1, - 0x1.0cab61f084b93p-1, - 0x1.09014c2ca74dap-1, - 0x1.055bd6d32e8d7p-1, - 0x1.01bb2b87c6968p-1, - 0x1.fc3ee5d1524b0p-2, - 0x1.f511a91a67d2ap-2, - 0x1.edeeee0959518p-2, - 0x1.e6d6ffaa65a25p-2, - 0x1.dfca26f5bbf88p-2, - 0x1.d8c8aace11e63p-2, - 0x1.d1d2cfff91594p-2, - 0x1.cae8d93f1d7b6p-2, - 0x1.c40b0729ed547p-2, - 0x1.bd3998457afdap-2, - 0x1.b674c8ffc6283p-2, - 0x1.afbcd3afe8ab6p-2, - 0x1.a911f096fbc26p-2, - 0x1.a27455e14c93cp-2, - 0x1.9be437a7de946p-2, - 0x1.9561c7f23a47bp-2, - 0x1.8eed36b886d93p-2, - 0x1.8886b1e5ecfd1p-2, - 0x1.822e655b417e6p-2, - 0x1.7be47af1f5d89p-2, - 0x1.75a91a7f4d2edp-2, - 0x1.6f7c69d7d3ef8p-2, - 0x1.695e8cd31867ep-2, - 0x1.634fa54fa285fp-2, - 0x1.5d4fd33729015p-2, - 0x1.575f3483021c3p-2, - 0x1.517de540ce2a3p-2, - 0x1.4babff975a04cp-2, - 0x1.45e99bcbb7915p-2, - 0x1.4036d0468a7a2p-2, - 0x1.3a93b1998736cp-2, - 0x1.35005285227f1p-2, - 0x1.2f7cc3fe6f423p-2, - 0x1.2a09153529381p-2, - 0x1.24a55399ea239p-2, - 0x1.1f518ae487dc8p-2, - 0x1.1a0dc51a9934dp-2, - 0x1.14da0a961fd14p-2, - 0x1.0fb6620c550afp-2, - 0x1.0aa2d09497f2bp-2, - 0x1.059f59af7a906p-2, - 0x1.00abff4dec7a3p-2, - 0x1.f79183b101c5bp-3, - 0x1.edeb406d9c824p-3, - 0x1.e4652fadcb6b2p-3, - 0x1.daff4969c0b04p-3, - 0x1.d1b982c501370p-3, - 0x1.c893ce1dcbef7p-3, - 0x1.bf8e1b1ca2279p-3, - 0x1.b6a856c3ed54fp-3, - 0x1.ade26b7fbed95p-3, - 0x1.a53c4135a6526p-3, - 0x1.9cb5bd549b111p-3, - 0x1.944ec2e4f5630p-3, - 0x1.8c07329874652p-3, - 0x1.83deeada4d25ap-3, - 0x1.7bd5c7df3fe9cp-3, - 0x1.73eba3b5b07b7p-3, - 0x1.6c205655be71fp-3, - 0x1.6473b5b15a7a1p-3, - 0x1.5ce595c455b0ap-3, - 0x1.5575c8a468361p-3, - 0x1.4e241e912c305p-3, - 0x1.46f066040a832p-3, - 0x1.3fda6bc016994p-3, - 0x1.38e1fae1d6a9dp-3, - 0x1.3206dceef5f87p-3, - 0x1.2b48d9e5dea1cp-3, - 0x1.24a7b84d38971p-3, - 0x1.1e233d434b813p-3, - 0x1.17bb2c8d41535p-3, - 0x1.116f48a6476ccp-3, - 0x1.0b3f52ce8c383p-3, - 0x1.052b0b1a174eap-3, - 0x1.fe6460fef4680p-4, - 0x1.f2a901ccafb37p-4, - 0x1.e723726b824a9p-4, - 0x1.dbd32ac4c99b0p-4, - 0x1.d0b7a0f921e7cp-4, - 0x1.c5d0497c09e74p-4, - 0x1.bb1c972f23e50p-4, - 0x1.b09bfb7d11a83p-4, - 0x1.a64de673e8837p-4, - 0x1.9c31c6df3b1b8p-4, - 0x1.92470a61b6965p-4, - 0x1.888d1d8e510a3p-4, - 0x1.7f036c0107294p-4, - 0x1.75a96077274bap-4, - 0x1.6c7e64e7281cbp-4, - 0x1.6381e2980956bp-4, - 0x1.5ab342383d177p-4, - 0x1.5211ebf41880bp-4, - 0x1.499d478bca735p-4, - 0x1.4154bc68d75c3p-4, - 0x1.3937b1b319259p-4, - 0x1.31458e6542847p-4, - 0x1.297db960e4f63p-4, - 0x1.21df9981f8e53p-4, - 0x1.1a6a95b1e786fp-4, - 0x1.131e14fa1625dp-4, - 0x1.0bf97e95f2a64p-4, - 0x1.04fc3a0481321p-4, - 0x1.fc4b5e32d6259p-5, - 0x1.eeea8c1b1db93p-5, - 0x1.e1d4cf1e2450ap-5, - 0x1.d508f9a1ea64ep-5, - 0x1.c885df3451a07p-5, - 0x1.bc4a54a84e834p-5, - 0x1.b055303221015p-5, - 0x1.a4a549829587ep-5, - 0x1.993979e14fffdp-5, - 0x1.8e109c4622913p-5, - 0x1.83298d717210ep-5, - 0x1.78832c03aa2b1p-5, - 0x1.6e1c5893c380bp-5, - 0x1.63f3f5c4de13bp-5, - 0x1.5a08e85af27e0p-5, - 0x1.505a174e9c929p-5, - 0x1.46e66be002240p-5, - 0x1.3dacd1a8d8ccdp-5, - 0x1.34ac36ad8dafep-5, - 0x1.2be38b6d92415p-5, - 0x1.2351c2f2d1449p-5, - 0x1.1af5d2e04f3f6p-5, - 0x1.12ceb37ff9bc3p-5, - 0x1.0adb5fcfa8c75p-5, - 0x1.031ad58d56279p-5, - 0x1.f7182a851bca2p-6, - 0x1.e85c449e377f2p-6, - 0x1.da0005e5f28dfp-6, - 0x1.cc0180af00a8bp-6, - 0x1.be5ecd2fcb5f9p-6, - 0x1.b1160991ff737p-6, - 0x1.a4255a00b9f03p-6, - 0x1.978ae8b55ce1bp-6, - 0x1.8b44e6031383ep-6, - 0x1.7f5188610ddc8p-6, - 0x1.73af0c737bb45p-6, - 0x1.685bb5134ef13p-6, - 0x1.5d55cb54cd53ap-6, - 0x1.529b9e8cf9a1ep-6, - 0x1.482b8455dc491p-6, - 0x1.3e03d891b37dep-6, - 0x1.3422fd6d12e2bp-6, - 0x1.2a875b5ffab56p-6, - 0x1.212f612dee7fbp-6, - 0x1.181983e5133ddp-6, - 0x1.0f443edc5ce49p-6, - 0x1.06ae13b0d3255p-6, - 0x1.fcab1483ea7fcp-7, - 0x1.ec72615a894c4p-7, - 0x1.dcaf3691fc448p-7, - 0x1.cd5ec93c12431p-7, - 0x1.be7e5ac24963bp-7, - 0x1.b00b38d6b3575p-7, - 0x1.a202bd6372dcep-7, - 0x1.94624e78e0fafp-7, - 0x1.87275e3a6869dp-7, - 0x1.7a4f6aca256cbp-7, - 0x1.6dd7fe3358230p-7, - 0x1.61beae53b72b7p-7, - 0x1.56011cc3b036dp-7, - 0x1.4a9cf6bda3f4cp-7, - 0x1.3f8ff5042a88ep-7, - 0x1.34d7dbc76d7e5p-7, - 0x1.2a727a89a3f14p-7, - 0x1.205dac02bd6b9p-7, - 0x1.1697560347b25p-7, - 0x1.0d1d69569b82dp-7, - 0x1.03ede1a45bfeep-7, - 0x1.f60d8aa2a88f2p-8, - 0x1.e4cc4abf7d065p-8, - 0x1.d4143a9dfe965p-8, - 0x1.c3e1a5f5c077cp-8, - 0x1.b430ecf4a83a8p-8, - 0x1.a4fe83fb9db25p-8, - 0x1.9646f35a76623p-8, - 0x1.8806d70b2fc36p-8, - 0x1.7a3ade6c8b3e4p-8, - 0x1.6cdfcbfc1e263p-8, - 0x1.5ff2750fe7820p-8, - 0x1.536fc18f7ce5cp-8, - 0x1.4754abacdf1dcp-8, - 0x1.3b9e3f9d06e3fp-8, - 0x1.30499b503957fp-8, - 0x1.2553ee2a336bfp-8, - 0x1.1aba78ba3af89p-8, - 0x1.107a8c7323a6ep-8, - 0x1.06918b6355624p-8, - 0x1.f9f9cfd9c3035p-9, - 0x1.e77448fb66bb9p-9, - 0x1.d58da68fd1170p-9, - 0x1.c4412bf4b8f0bp-9, - 0x1.b38a3af2e55b4p-9, - 0x1.a3645330550ffp-9, - 0x1.93cb11a30d765p-9, - 0x1.84ba3004a50d0p-9, - 0x1.762d84469c18fp-9, - 0x1.6821000795a03p-9, - 0x1.5a90b00981d93p-9, - 0x1.4d78bba8ca5fdp-9, - 0x1.40d564548fad7p-9, - 0x1.34a305080681fp-9, - 0x1.28de11c5031ebp-9, - 0x1.1d83170fbf6fbp-9, - 0x1.128eb96be8798p-9, - 0x1.07fdb4dafea5fp-9, - 0x1.fb99b8b8279e1p-10, - 0x1.e7f232d9e2630p-10, - 0x1.d4fed7195d7e8p-10, - 0x1.c2b9cf7f893bfp-10, - 0x1.b11d702b3deb1p-10, - 0x1.a024365f771bdp-10, - 0x1.8fc8c794b03b5p-10, - 0x1.8005f08d6f1efp-10, - 0x1.70d6a46e07ddap-10, - 0x1.6235fbd7a4345p-10, - 0x1.541f340697987p-10, - 0x1.468dadf4080abp-10, - 0x1.397ced7af2b15p-10, - 0x1.2ce898809244ep-10, - 0x1.20cc76202c5fap-10, - 0x1.15246dda49d47p-10, - 0x1.09ec86c75d497p-10, - 0x1.fe41cd9bb4eeep-11, - 0x1.e97ba3b77f306p-11, - 0x1.d57f524723822p-11, - 0x1.c245d4b998479p-11, - 0x1.afc85e0f82e12p-11, - 0x1.9e005769dbc1dp-11, - 0x1.8ce75e9f6f8a0p-11, - 0x1.7c7744d9378f7p-11, - 0x1.6caa0d3582fe9p-11, - 0x1.5d79eb71e893bp-11, - 0x1.4ee1429bf7cc0p-11, - 0x1.40daa3c89f5b6p-11, - 0x1.3360ccd23db3ap-11, - 0x1.266ea71d4f71ap-11, - 0x1.19ff4663ae9dfp-11, - 0x1.0e0de78654d1ep-11, - 0x1.0295ef6591848p-11, - 0x1.ef25d37f49fe1p-12, - 0x1.da01102b5f851p-12, - 0x1.c5b5412dcafadp-12, - 0x1.b23a5a23e4210p-12, - 0x1.9f8893d8fd1c1p-12, - 0x1.8d986a4187285p-12, - 0x1.7c629a822bc9ep-12, - 0x1.6be02102b3520p-12, - 0x1.5c0a378c90bcap-12, - 0x1.4cda5374ea275p-12, - 0x1.3e4a23d1f4702p-12, - 0x1.30538fbb77ecdp-12, - 0x1.22f0b496539bdp-12, - 0x1.161be46ad3b50p-12, - 0x1.09cfa445b00ffp-12, - 0x1.fc0d55470cf51p-13, - 0x1.e577bbcd49935p-13, - 0x1.cfd4a5adec5bfp-13, - 0x1.bb1a9657ce465p-13, - 0x1.a740684026555p-13, - 0x1.943d4a1d1ed39p-13, - 0x1.8208bc334a6a5p-13, - 0x1.709a8db59f25cp-13, - 0x1.5feada379d8b7p-13, - 0x1.4ff207314a102p-13, - 0x1.40a8c1949f75ep-13, - 0x1.3207fb7420eb9p-13, - 0x1.2408e9ba3327fp-13, - 0x1.16a501f0e42cap-13, - 0x1.09d5f819c9e29p-13, - 0x1.fb2b792b40a22p-14, - 0x1.e3bcf436a1a95p-14, - 0x1.cd55277c18d05p-14, - 0x1.b7e94604479dcp-14, - 0x1.a36eec00926ddp-14, - 0x1.8fdc1b2dcf7b9p-14, - 0x1.7d2737527c3f9p-14, - 0x1.6b4702d7d5849p-14, - 0x1.5a329b7d30748p-14, - 0x1.49e17724f4d41p-14, - 0x1.3a4b60ba9aa4dp-14, - 0x1.2b6875310f785p-14, - 0x1.1d312098e9dbap-14, - 0x1.0f9e1b4dd36dfp-14, - 0x1.02a8673a94691p-14, - 0x1.ec929a665b449p-15, - 0x1.d4f4b4c8e09edp-15, - 0x1.be6abbb10a5aap-15, - 0x1.a8e8cc1fadef6p-15, - 0x1.94637d5bacfdbp-15, - 0x1.80cfdc72220cfp-15, - 0x1.6e2367dc27f95p-15, - 0x1.5c540b4936fd2p-15, - 0x1.4b581b8d170fcp-15, - 0x1.3b2652b06c2b2p-15, - 0x1.2bb5cc22e5db6p-15, - 0x1.1cfe010e2052dp-15, - 0x1.0ef6c4c84a0fep-15, - 0x1.01984165a5f36p-15, - 0x1.e9b5e8d00ce76p-16, - 0x1.d16f5716c6c1ap-16, - 0x1.ba4f035d60e02p-16, - 0x1.a447b7b03f045p-16, - 0x1.8f4ccca7fc90dp-16, - 0x1.7b5223dac7336p-16, - 0x1.684c227fcacefp-16, - 0x1.562fac4329b48p-16, - 0x1.44f21e49054f2p-16, - 0x1.34894a5e24657p-16, - 0x1.24eb7254ccf83p-16, - 0x1.160f438c70913p-16, - 0x1.07ebd2a2d2844p-16, - 0x1.f4f12e9ab070ap-17, - 0x1.db5ad0b27805cp-17, - 0x1.c304efa2c6f4ep-17, - 0x1.abe09e9144b5ep-17, - 0x1.95df988e76644p-17, - 0x1.80f439b4ee04bp-17, - 0x1.6d11788a69c64p-17, - 0x1.5a2adfa0b4bc4p-17, - 0x1.4834877429b8fp-17, - 0x1.37231085c7d9ap-17, - 0x1.26eb9daed6f7ep-17, - 0x1.1783ceac28910p-17, - 0x1.08e1badf0fcedp-17, - 0x1.f5f7d88472604p-18, - 0x1.db92b5212fb8dp-18, - 0x1.c282cd3957edap-18, - 0x1.aab7abace48dcp-18, - 0x1.94219bfcb4928p-18, - 0x1.7eb1a2075864dp-18, - 0x1.6a597219a93d9p-18, - 0x1.570b69502f313p-18, - 0x1.44ba864670882p-18, - 0x1.335a62115bce2p-18, - 0x1.22df298214423p-18, - 0x1.133d96ae7e0ddp-18, - 0x1.046aeabcfcdecp-18, - 0x1.ecb9cfe1d8642p-19, - 0x1.d21397ead99cbp-19, - 0x1.b8d094c86d374p-19, - 0x1.a0df0f0c626dcp-19, - 0x1.8a2e269750a39p-19, - 0x1.74adc8f4064d3p-19, - 0x1.604ea819f007cp-19, - 0x1.4d0231928c6f9p-19, - 0x1.3aba85fe22e1fp-19, - 0x1.296a70f414053p-19, - 0x1.1905613b3abf2p-19, - 0x1.097f6156f32c5p-19, - 0x1.f59a20caf6695p-20, - 0x1.d9c73698fb1dcp-20, - 0x1.bf716c6168baep-20, - 0x1.a6852c6b58392p-20, - 0x1.8eefd70594a88p-20, - 0x1.789fb715aae95p-20, - 0x1.6383f726a8e04p-20, - 0x1.4f8c96f26a26ap-20, - 0x1.3caa61607f920p-20, - 0x1.2acee2f5ecdb8p-20, - 0x1.19ec60b1242edp-20, - 0x1.09f5cf4dd2877p-20, - 0x1.f5bd95d8730d8p-21, - 0x1.d9371e2ff7c35p-21, - 0x1.be41de54d155ap-21, - 0x1.a4c89e08ef4f3p-21, - 0x1.8cb738399b12cp-21, - 0x1.75fa8dbc84becp-21, - 0x1.608078a70dcbcp-21, - 0x1.4c37c0394d094p-21, - 0x1.39100d5687bfep-21, - 0x1.26f9df8519bd6p-21, - 0x1.15e6827001f18p-21, - 0x1.05c803e4831c1p-21, - 0x1.ed22548cffd35p-22, - 0x1.d06ad6ecdf971p-22, - 0x1.b551c847fbc96p-22, - 0x1.9bc09f112b494p-22, - 0x1.83a1ff0aa239dp-22, - 0x1.6ce1aa3fd7bddp-22, - 0x1.576c72b514859p-22, - 0x1.43302cc4a0da8p-22, - 0x1.301ba221dc9bbp-22, - 0x1.1e1e857adc568p-22, - 0x1.0d2966b1746f7p-22, - 0x1.fa5b4f49cc6b2p-23, - 0x1.dc3ae30b55c16p-23, - 0x1.bfd7555a3bd68p-23, - 0x1.a517d9e61628ap-23, - 0x1.8be4f8f6c951fp-23, - 0x1.74287ded49339p-23, - 0x1.5dcd669f2cd34p-23, - 0x1.48bfd38302870p-23, - 0x1.34ecf8a3c124ap-23, - 0x1.22430f521cbcfp-23, - 0x1.10b1488aeb235p-23, - 0x1.0027c00a263a6p-23, - 0x1.e12ee004efc37p-24, - 0x1.c3e44ae32b16bp-24, - 0x1.a854ea14102a8p-24, - 0x1.8e6761569f45dp-24, - 0x1.7603bac345f65p-24, - 0x1.5f1353cdad001p-24, - 0x1.4980cb3c80949p-24, - 0x1.3537f00b6ad4dp-24, - 0x1.2225b12bffc68p-24, - 0x1.10380e1adb7e9p-24, - 0x1.febc107d5efaap-25, - 0x1.df0f2a0ee6946p-25, - 0x1.c14b2188bcee4p-25, - 0x1.a553644f7f07dp-25, - 0x1.8b0cfce0579dfp-25, - 0x1.725e7c5dd20f7p-25, - 0x1.5b2fe547a1340p-25, - 0x1.456a974e92e93p-25, - 0x1.30f93c3699078p-25, - 0x1.1dc7b5b978cf8p-25, - 0x1.0bc30c5d52f15p-25, - 0x1.f5b2be65a0c7fp-26, - 0x1.d5f3a8dea7357p-26, - 0x1.b82915b03515bp-26, - 0x1.9c3517e789488p-26, - 0x1.81fb7df06136ep-26, - 0x1.6961b8d641d06p-26, - 0x1.524ec4d916caep-26, - 0x1.3cab1343d18d1p-26, - 0x1.2860757487a01p-26, - 0x1.155a09065d4f7p-26, - 0x1.0384250e4c9fcp-26, - 0x1.e59890b926c78p-27, - 0x1.c642116a8a9e3p-27, - 0x1.a8e405e651ab6p-27, - 0x1.8d5f98114f872p-27, - 0x1.7397c5a66e307p-27, - 0x1.5b71456c5a4c4p-27, - 0x1.44d26de513197p-27, - 0x1.2fa31d6371537p-27, - 0x1.1bcca373b7b43p-27, - 0x1.0939ab853339fp-27, - 0x1.efac5187b2863p-28, - 0x1.cf1e86235d0e6p-28, - 0x1.b0a68a2128babp-28, - 0x1.9423165bc4444p-28, - 0x1.7974e743dea3cp-28, - 0x1.607e9eacd1050p-28, - 0x1.4924a74dec728p-28, - 0x1.334d19e0c2160p-28, - 0x1.1edfa3c5f5ccap-28, - 0x1.0bc56f1b54701p-28, - 0x1.f3d2185e047d9p-29, - 0x1.d26cb87945e87p-29, - 0x1.b334fac4b9f99p-29, - 0x1.96076f7918d1cp-29, - 0x1.7ac2d72fc2c63p-29, - 0x1.614801550319ep-29, - 0x1.4979ac8b28926p-29, - 0x1.333c68e2d0548p-29, - 0x1.1e767bce37dd7p-29, - 0x1.0b0fc5b6d05a0p-29, - 0x1.f1e3523b41d7dp-30, - 0x1.d00de6608effep-30, - 0x1.b0778b7b3301ap-30, - 0x1.92fb04ec0f6cfp-30, - 0x1.77756ec9f78fap-30, - 0x1.5dc61922d5a06p-30, - 0x1.45ce65699ff6dp-30, - 0x1.2f71a5f159970p-30, - 0x1.1a94ff571654fp-30, - 0x1.071f4bbea09ecp-30, - 0x1.e9f1ff8ddd774p-31, - 0x1.c818223a202c7p-31, - 0x1.a887bd2b4404dp-31, - 0x1.8b1a336c5eb6bp-31, - 0x1.6fab63324088ap-31, - 0x1.56197e30205bap-31, - 0x1.3e44e45301b92p-31, - 0x1.281000bfe4c3fp-31, - 0x1.135f28f2d50b4p-31, - 0x1.00187dded5975p-31, - 0x1.dc479de0ef001p-32, - 0x1.bad4fdad3caa1p-32, - 0x1.9baed3ed27ab8p-32, - 0x1.7ead9ce4285bbp-32, - 0x1.63ac6b4edc88ep-32, - 0x1.4a88be2a6390cp-32, - 0x1.332259185f1a0p-32, - 0x1.1d5b1f3793044p-32, - 0x1.0916f04b6e18bp-32, - 0x1.ec77101de6926p-33, - 0x1.c960bf23153e0p-33, - 0x1.a8bd20fc65ef7p-33, - 0x1.8a61745ec7d1dp-33, - 0x1.6e25d0e756261p-33, - 0x1.53e4f7d1666cbp-33, - 0x1.3b7c27a7ddb0ep-33, - 0x1.24caf2c32af14p-33, - 0x1.0fb3186804d0fp-33, - 0x1.f830c0bb41fd7p-34, - 0x1.d3c0f1a91c846p-34, - 0x1.b1e5acf351d87p-34, - 0x1.92712d259ce66p-34, - 0x1.7538c60a04476p-34, - 0x1.5a14b04b47879p-34, - 0x1.40dfd87456f4cp-34, - 0x1.2977b1172b9d5p-34, - 0x1.13bc07e891491p-34, - 0x1.ff1dbb4300811p-35, - 0x1.d9a880f306bd8p-35, - 0x1.b6e45220b55e0p-35, - 0x1.96a0b33f2c4dap-35, - 0x1.78b07e9e924acp-35, - 0x1.5ce9ab1670dd2p-35, - 0x1.4325167006bb0p-35, - 0x1.2b3e53538ff3fp-35, - 0x1.15137a7f44864p-35, - 0x1.0084ff125639dp-35, - 0x1.daeb0b7311ec7p-36, - 0x1.b7937d1c40c52p-36, - 0x1.96d082f59ab06p-36, - 0x1.7872d9fa10aadp-36, - 0x1.5c4e8e37bc7d0p-36, - 0x1.423ac0df49a40p-36, - 0x1.2a117230ad284p-36, - 0x1.13af4f04f9998p-36, - 0x1.fde703724e560p-37, - 0x1.d77f0c82e7641p-37, - 0x1.b3ee02611d7ddp-37, - 0x1.92ff33023d5bdp-37, - 0x1.7481a9e69f53fp-37, - 0x1.5847eda620959p-37, - 0x1.3e27c1fcc74bdp-37, - 0x1.25f9ee0b923dcp-37, - 0x1.0f9a0686531ffp-37, - 0x1.f5cc7718082afp-38, - 0x1.cf7e53d6a2ca5p-38, - 0x1.ac0f5f3229372p-38, - 0x1.8b498644847eap-38, - 0x1.6cfa9bcca59dcp-38, - 0x1.50f411d4fd2cdp-38, - 0x1.370ab8327af5ep-38, - 0x1.1f167f88c6b6ep-38, - 0x1.08f24085d4597p-38, - 0x1.e8f70e181d619p-39, - 0x1.c324c20e337dcp-39, - 0x1.a03261574b54ep-39, - 0x1.7fe903cdf5855p-39, - 0x1.6215c58da3450p-39, - 0x1.46897d4b69fc6p-39, - 0x1.2d1877d731b7bp-39, - 0x1.159a386b11517p-39, - 0x1.ffd27ae9393cep-40, - 0x1.d7c593130dd0bp-40, - 0x1.b2cd607c79bcfp-40, - 0x1.90ae4d3405651p-40, - 0x1.71312dd1759e2p-40, - 0x1.5422ef5d8949dp-40, - 0x1.39544b0ecc957p-40, - 0x1.20997f73e73ddp-40, - 0x1.09ca0eaacd277p-40, - 0x1.e9810295890ecp-41, - 0x1.c2b45b5aa4a1dp-41, - 0x1.9eee068fa7596p-41, - 0x1.7df2b399c10a8p-41, - 0x1.5f8b87a31bd85p-41, - 0x1.4385c96e9a2d9p-41, - 0x1.29b2933ef4cbcp-41, - 0x1.11e68a6378f8ap-41, - 0x1.f7f338086a86bp-42, - 0x1.cf8d7d9ce040ap-42, - 0x1.aa577251ae484p-42, - 0x1.8811d739efb5ep-42, - 0x1.68823e52970bep-42, - 0x1.4b72ae68e8b4cp-42, - 0x1.30b14dbe876bcp-42, - 0x1.181012ef86610p-42, - 0x1.01647ba798744p-42, - 0x1.d90e917701675p-43, - 0x1.b2a87e86d0c8ap-43, - 0x1.8f53dcb377293p-43, - 0x1.6ed2f2515e933p-43, - 0x1.50ecc9ed47f19p-43, - 0x1.356cd5ce7799ep-43, - 0x1.1c229a587ab78p-43, - 0x1.04e15ecc7f3f6p-43, - 0x1.deffc7e6a6017p-44, - 0x1.b7b040832f310p-44, - 0x1.938e021f36d76p-44, - 0x1.7258610b3b233p-44, - 0x1.53d3bfc82a909p-44, - 0x1.37c92babdc2fdp-44, - 0x1.1e06010120f6ap-44, - 0x1.065b9616170d4p-44, - 0x1.e13dd96b3753ap-45, - 0x1.b950d32467392p-45, - 0x1.94a72263259a5p-45, - 0x1.72fd93e036cdcp-45, - 0x1.54164576929abp-45, - 0x1.37b83c521fe96p-45, - 0x1.1daf033182e96p-45, - 0x1.05ca50205d26ap-45, - 0x1.dfbb6235639fap-46, - 0x1.b7807e294781fp-46, - 0x1.9298add70a734p-46, - 0x1.70beaf9c7ffb6p-46, - 0x1.51b2cd6709222p-46, - 0x1.353a6cf7f7fffp-46, - 0x1.1b1fa8cbe84a7p-46, - 0x1.0330f0fd69921p-46, - 0x1.da81670f96f9bp-47, - 0x1.b24a16b4d09aap-47, - 0x1.8d6eeb6efdbd6p-47, - 0x1.6ba91ac734785p-47, - 0x1.4cb7966770ab5p-47, - 0x1.305e9721d0981p-47, - 0x1.1667311fff70ap-47, - 0x1.fd3de10d62855p-48, - 0x1.d1aefbcd48d0cp-48, - 0x1.a9cc93c25aca9p-48, - 0x1.85487ee3ea735p-48, - 0x1.63daf8b4b1e0cp-48, - 0x1.45421e69a6ca1p-48, - 0x1.294175802d99ap-48, - 0x1.0fa17bf41068fp-48, - 0x1.f05e82aae2bb9p-49, - 0x1.c578101b29058p-49, - 0x1.9e39dc5dd2f7cp-49, - 0x1.7a553a728bbf2p-49, - 0x1.5982008db1304p-49, - 0x1.3b7e00422e51bp-49, - 0x1.200c898d9ee3ep-49, - 0x1.06f5f7eb65a56p-49, - 0x1.e00e9148a1d25p-50, - 0x1.b623734024e92p-50, - 0x1.8fd4e01891bf8p-50, - 0x1.6cd44c7470d89p-50, - 0x1.4cd9c04158cd7p-50, - 0x1.2fa34bf5c8344p-50, - 0x1.14f4890ff2461p-50, - 0x1.f92c49dfa4df5p-51, - 0x1.ccaaea71ab0dfp-51, - 0x1.a40829f001197p-51, - 0x1.7eef13b59e96cp-51, - 0x1.5d11e1a252bf5p-51, - 0x1.3e296303b2297p-51, - 0x1.21f47009f43cep-51, - 0x1.083768c5e4541p-51, - 0x1.e1777d831265ep-52, - 0x1.b69f10b0191b5p-52, - 0x1.8f8a3a05b5b52p-52, - 0x1.6be573c40c8e7p-52, - 0x1.4b645ba991fdbp-52, - 0x1.2dc119095729fp-52, - }, -}; diff --git a/sysdeps/aarch64/fpu/sv_erff_data.c b/sysdeps/aarch64/fpu/sv_erff_data.c deleted file mode 100644 index 6dcd72af69..0000000000 --- a/sysdeps/aarch64/fpu/sv_erff_data.c +++ /dev/null @@ -1,1058 +0,0 @@ -/* Table for SVE erff approximation - - Copyright (C) 2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include "vecmath_config.h" - -/* Lookup table used in SVE erff. - For each possible rounded input r (multiples of 1/128), between - r = 0.0 and r = 4.0 (513 values): - - __erff_data.erf contains the values of erf(r), - - __erff_data.scale contains the values of 2/sqrt(pi)*exp(-r^2). - Note that indices 0 and 1 are never hit by the algorithm, since lookup is - performed only for x >= 1/64-1/512. */ -const struct sv_erff_data __sv_erff_data = { - .erf = { 0x0.000000p+0, - 0x1.20dbf4p-7, - 0x1.20d770p-6, - 0x1.b137e0p-6, - 0x1.20c564p-5, - 0x1.68e5d4p-5, - 0x1.b0fafep-5, - 0x1.f902a8p-5, - 0x1.207d48p-4, - 0x1.44703ep-4, - 0x1.68591ap-4, - 0x1.8c36bep-4, - 0x1.b00812p-4, - 0x1.d3cbf8p-4, - 0x1.f7815ap-4, - 0x1.0d9390p-3, - 0x1.1f5e1ap-3, - 0x1.311fc2p-3, - 0x1.42d7fcp-3, - 0x1.548642p-3, - 0x1.662a0cp-3, - 0x1.77c2d2p-3, - 0x1.895010p-3, - 0x1.9ad142p-3, - 0x1.ac45e4p-3, - 0x1.bdad72p-3, - 0x1.cf076ep-3, - 0x1.e05354p-3, - 0x1.f190aap-3, - 0x1.015f78p-2, - 0x1.09eed6p-2, - 0x1.127632p-2, - 0x1.1af54ep-2, - 0x1.236bf0p-2, - 0x1.2bd9dcp-2, - 0x1.343ed6p-2, - 0x1.3c9aa8p-2, - 0x1.44ed18p-2, - 0x1.4d35f0p-2, - 0x1.5574f4p-2, - 0x1.5da9f4p-2, - 0x1.65d4b8p-2, - 0x1.6df50ap-2, - 0x1.760abap-2, - 0x1.7e1594p-2, - 0x1.861566p-2, - 0x1.8e0a02p-2, - 0x1.95f336p-2, - 0x1.9dd0d2p-2, - 0x1.a5a2acp-2, - 0x1.ad6896p-2, - 0x1.b52264p-2, - 0x1.bccfecp-2, - 0x1.c47104p-2, - 0x1.cc0584p-2, - 0x1.d38d44p-2, - 0x1.db081cp-2, - 0x1.e275eap-2, - 0x1.e9d68ap-2, - 0x1.f129d4p-2, - 0x1.f86faap-2, - 0x1.ffa7eap-2, - 0x1.03693ap-1, - 0x1.06f794p-1, - 0x1.0a7ef6p-1, - 0x1.0dff50p-1, - 0x1.117894p-1, - 0x1.14eab4p-1, - 0x1.1855a6p-1, - 0x1.1bb95cp-1, - 0x1.1f15ccp-1, - 0x1.226ae8p-1, - 0x1.25b8a8p-1, - 0x1.28ff02p-1, - 0x1.2c3decp-1, - 0x1.2f755cp-1, - 0x1.32a54cp-1, - 0x1.35cdb4p-1, - 0x1.38ee8ap-1, - 0x1.3c07cap-1, - 0x1.3f196ep-1, - 0x1.42236ep-1, - 0x1.4525c8p-1, - 0x1.482074p-1, - 0x1.4b1372p-1, - 0x1.4dfebap-1, - 0x1.50e24cp-1, - 0x1.53be26p-1, - 0x1.569244p-1, - 0x1.595ea6p-1, - 0x1.5c2348p-1, - 0x1.5ee02ep-1, - 0x1.619556p-1, - 0x1.6442c0p-1, - 0x1.66e86ep-1, - 0x1.69865ep-1, - 0x1.6c1c98p-1, - 0x1.6eab18p-1, - 0x1.7131e6p-1, - 0x1.73b102p-1, - 0x1.762870p-1, - 0x1.789836p-1, - 0x1.7b0058p-1, - 0x1.7d60d8p-1, - 0x1.7fb9c0p-1, - 0x1.820b12p-1, - 0x1.8454d6p-1, - 0x1.869712p-1, - 0x1.88d1cep-1, - 0x1.8b050ep-1, - 0x1.8d30dep-1, - 0x1.8f5544p-1, - 0x1.91724ap-1, - 0x1.9387f6p-1, - 0x1.959652p-1, - 0x1.979d68p-1, - 0x1.999d42p-1, - 0x1.9b95e8p-1, - 0x1.9d8768p-1, - 0x1.9f71cap-1, - 0x1.a1551ap-1, - 0x1.a33162p-1, - 0x1.a506b0p-1, - 0x1.a6d50cp-1, - 0x1.a89c86p-1, - 0x1.aa5d26p-1, - 0x1.ac16fcp-1, - 0x1.adca14p-1, - 0x1.af767ap-1, - 0x1.b11c3cp-1, - 0x1.b2bb68p-1, - 0x1.b4540ap-1, - 0x1.b5e630p-1, - 0x1.b771e8p-1, - 0x1.b8f742p-1, - 0x1.ba764ap-1, - 0x1.bbef10p-1, - 0x1.bd61a2p-1, - 0x1.bece0ep-1, - 0x1.c03464p-1, - 0x1.c194b2p-1, - 0x1.c2ef08p-1, - 0x1.c44376p-1, - 0x1.c5920ap-1, - 0x1.c6dad2p-1, - 0x1.c81de2p-1, - 0x1.c95b46p-1, - 0x1.ca930ep-1, - 0x1.cbc54cp-1, - 0x1.ccf20cp-1, - 0x1.ce1962p-1, - 0x1.cf3b5cp-1, - 0x1.d0580cp-1, - 0x1.d16f7ep-1, - 0x1.d281c4p-1, - 0x1.d38ef0p-1, - 0x1.d49710p-1, - 0x1.d59a34p-1, - 0x1.d6986cp-1, - 0x1.d791cap-1, - 0x1.d8865ep-1, - 0x1.d97636p-1, - 0x1.da6162p-1, - 0x1.db47f4p-1, - 0x1.dc29fcp-1, - 0x1.dd0788p-1, - 0x1.dde0aap-1, - 0x1.deb570p-1, - 0x1.df85eap-1, - 0x1.e0522ap-1, - 0x1.e11a3ep-1, - 0x1.e1de36p-1, - 0x1.e29e22p-1, - 0x1.e35a12p-1, - 0x1.e41214p-1, - 0x1.e4c638p-1, - 0x1.e5768cp-1, - 0x1.e62322p-1, - 0x1.e6cc08p-1, - 0x1.e7714ap-1, - 0x1.e812fcp-1, - 0x1.e8b12ap-1, - 0x1.e94be4p-1, - 0x1.e9e336p-1, - 0x1.ea7730p-1, - 0x1.eb07e2p-1, - 0x1.eb9558p-1, - 0x1.ec1fa2p-1, - 0x1.eca6ccp-1, - 0x1.ed2ae6p-1, - 0x1.edabfcp-1, - 0x1.ee2a1ep-1, - 0x1.eea556p-1, - 0x1.ef1db4p-1, - 0x1.ef9344p-1, - 0x1.f00614p-1, - 0x1.f07630p-1, - 0x1.f0e3a6p-1, - 0x1.f14e82p-1, - 0x1.f1b6d0p-1, - 0x1.f21ca0p-1, - 0x1.f27ff8p-1, - 0x1.f2e0eap-1, - 0x1.f33f7ep-1, - 0x1.f39bc2p-1, - 0x1.f3f5c2p-1, - 0x1.f44d88p-1, - 0x1.f4a31ep-1, - 0x1.f4f694p-1, - 0x1.f547f2p-1, - 0x1.f59742p-1, - 0x1.f5e490p-1, - 0x1.f62fe8p-1, - 0x1.f67952p-1, - 0x1.f6c0dcp-1, - 0x1.f7068cp-1, - 0x1.f74a6ep-1, - 0x1.f78c8cp-1, - 0x1.f7cceep-1, - 0x1.f80ba2p-1, - 0x1.f848acp-1, - 0x1.f8841ap-1, - 0x1.f8bdf2p-1, - 0x1.f8f63ep-1, - 0x1.f92d08p-1, - 0x1.f96256p-1, - 0x1.f99634p-1, - 0x1.f9c8a8p-1, - 0x1.f9f9bap-1, - 0x1.fa2974p-1, - 0x1.fa57dep-1, - 0x1.fa84fep-1, - 0x1.fab0dep-1, - 0x1.fadb84p-1, - 0x1.fb04f6p-1, - 0x1.fb2d40p-1, - 0x1.fb5464p-1, - 0x1.fb7a6cp-1, - 0x1.fb9f60p-1, - 0x1.fbc344p-1, - 0x1.fbe61ep-1, - 0x1.fc07fap-1, - 0x1.fc28d8p-1, - 0x1.fc48c2p-1, - 0x1.fc67bcp-1, - 0x1.fc85d0p-1, - 0x1.fca2fep-1, - 0x1.fcbf52p-1, - 0x1.fcdaccp-1, - 0x1.fcf576p-1, - 0x1.fd0f54p-1, - 0x1.fd286ap-1, - 0x1.fd40bep-1, - 0x1.fd5856p-1, - 0x1.fd6f34p-1, - 0x1.fd8562p-1, - 0x1.fd9ae2p-1, - 0x1.fdafb8p-1, - 0x1.fdc3e8p-1, - 0x1.fdd77ap-1, - 0x1.fdea6ep-1, - 0x1.fdfcccp-1, - 0x1.fe0e96p-1, - 0x1.fe1fd0p-1, - 0x1.fe3080p-1, - 0x1.fe40a6p-1, - 0x1.fe504cp-1, - 0x1.fe5f70p-1, - 0x1.fe6e18p-1, - 0x1.fe7c46p-1, - 0x1.fe8a00p-1, - 0x1.fe9748p-1, - 0x1.fea422p-1, - 0x1.feb090p-1, - 0x1.febc96p-1, - 0x1.fec836p-1, - 0x1.fed374p-1, - 0x1.fede52p-1, - 0x1.fee8d4p-1, - 0x1.fef2fep-1, - 0x1.fefccep-1, - 0x1.ff064cp-1, - 0x1.ff0f76p-1, - 0x1.ff1852p-1, - 0x1.ff20e0p-1, - 0x1.ff2924p-1, - 0x1.ff3120p-1, - 0x1.ff38d6p-1, - 0x1.ff4048p-1, - 0x1.ff4778p-1, - 0x1.ff4e68p-1, - 0x1.ff551ap-1, - 0x1.ff5b90p-1, - 0x1.ff61ccp-1, - 0x1.ff67d0p-1, - 0x1.ff6d9ep-1, - 0x1.ff7338p-1, - 0x1.ff789ep-1, - 0x1.ff7dd4p-1, - 0x1.ff82dap-1, - 0x1.ff87b2p-1, - 0x1.ff8c5cp-1, - 0x1.ff90dcp-1, - 0x1.ff9532p-1, - 0x1.ff9960p-1, - 0x1.ff9d68p-1, - 0x1.ffa14ap-1, - 0x1.ffa506p-1, - 0x1.ffa8a0p-1, - 0x1.ffac18p-1, - 0x1.ffaf6ep-1, - 0x1.ffb2a6p-1, - 0x1.ffb5bep-1, - 0x1.ffb8b8p-1, - 0x1.ffbb98p-1, - 0x1.ffbe5ap-1, - 0x1.ffc102p-1, - 0x1.ffc390p-1, - 0x1.ffc606p-1, - 0x1.ffc862p-1, - 0x1.ffcaa8p-1, - 0x1.ffccd8p-1, - 0x1.ffcef4p-1, - 0x1.ffd0fap-1, - 0x1.ffd2eap-1, - 0x1.ffd4cap-1, - 0x1.ffd696p-1, - 0x1.ffd84ep-1, - 0x1.ffd9f8p-1, - 0x1.ffdb90p-1, - 0x1.ffdd18p-1, - 0x1.ffde90p-1, - 0x1.ffdffap-1, - 0x1.ffe154p-1, - 0x1.ffe2a2p-1, - 0x1.ffe3e2p-1, - 0x1.ffe514p-1, - 0x1.ffe63cp-1, - 0x1.ffe756p-1, - 0x1.ffe866p-1, - 0x1.ffe96ap-1, - 0x1.ffea64p-1, - 0x1.ffeb54p-1, - 0x1.ffec3ap-1, - 0x1.ffed16p-1, - 0x1.ffedeap-1, - 0x1.ffeeb4p-1, - 0x1.ffef76p-1, - 0x1.fff032p-1, - 0x1.fff0e4p-1, - 0x1.fff18ep-1, - 0x1.fff232p-1, - 0x1.fff2d0p-1, - 0x1.fff366p-1, - 0x1.fff3f6p-1, - 0x1.fff480p-1, - 0x1.fff504p-1, - 0x1.fff582p-1, - 0x1.fff5fcp-1, - 0x1.fff670p-1, - 0x1.fff6dep-1, - 0x1.fff74ap-1, - 0x1.fff7aep-1, - 0x1.fff810p-1, - 0x1.fff86cp-1, - 0x1.fff8c6p-1, - 0x1.fff91cp-1, - 0x1.fff96cp-1, - 0x1.fff9bap-1, - 0x1.fffa04p-1, - 0x1.fffa4cp-1, - 0x1.fffa90p-1, - 0x1.fffad0p-1, - 0x1.fffb0ep-1, - 0x1.fffb4ap-1, - 0x1.fffb82p-1, - 0x1.fffbb8p-1, - 0x1.fffbecp-1, - 0x1.fffc1ep-1, - 0x1.fffc4ep-1, - 0x1.fffc7ap-1, - 0x1.fffca6p-1, - 0x1.fffccep-1, - 0x1.fffcf6p-1, - 0x1.fffd1ap-1, - 0x1.fffd3ep-1, - 0x1.fffd60p-1, - 0x1.fffd80p-1, - 0x1.fffda0p-1, - 0x1.fffdbep-1, - 0x1.fffddap-1, - 0x1.fffdf4p-1, - 0x1.fffe0ep-1, - 0x1.fffe26p-1, - 0x1.fffe3ep-1, - 0x1.fffe54p-1, - 0x1.fffe68p-1, - 0x1.fffe7ep-1, - 0x1.fffe90p-1, - 0x1.fffea2p-1, - 0x1.fffeb4p-1, - 0x1.fffec4p-1, - 0x1.fffed4p-1, - 0x1.fffee4p-1, - 0x1.fffef2p-1, - 0x1.ffff00p-1, - 0x1.ffff0cp-1, - 0x1.ffff18p-1, - 0x1.ffff24p-1, - 0x1.ffff30p-1, - 0x1.ffff3ap-1, - 0x1.ffff44p-1, - 0x1.ffff4ep-1, - 0x1.ffff56p-1, - 0x1.ffff60p-1, - 0x1.ffff68p-1, - 0x1.ffff70p-1, - 0x1.ffff78p-1, - 0x1.ffff7ep-1, - 0x1.ffff84p-1, - 0x1.ffff8cp-1, - 0x1.ffff92p-1, - 0x1.ffff98p-1, - 0x1.ffff9cp-1, - 0x1.ffffa2p-1, - 0x1.ffffa6p-1, - 0x1.ffffacp-1, - 0x1.ffffb0p-1, - 0x1.ffffb4p-1, - 0x1.ffffb8p-1, - 0x1.ffffbcp-1, - 0x1.ffffc0p-1, - 0x1.ffffc4p-1, - 0x1.ffffc6p-1, - 0x1.ffffcap-1, - 0x1.ffffccp-1, - 0x1.ffffd0p-1, - 0x1.ffffd2p-1, - 0x1.ffffd4p-1, - 0x1.ffffd6p-1, - 0x1.ffffd8p-1, - 0x1.ffffdcp-1, - 0x1.ffffdep-1, - 0x1.ffffdep-1, - 0x1.ffffe0p-1, - 0x1.ffffe2p-1, - 0x1.ffffe4p-1, - 0x1.ffffe6p-1, - 0x1.ffffe8p-1, - 0x1.ffffe8p-1, - 0x1.ffffeap-1, - 0x1.ffffeap-1, - 0x1.ffffecp-1, - 0x1.ffffeep-1, - 0x1.ffffeep-1, - 0x1.fffff0p-1, - 0x1.fffff0p-1, - 0x1.fffff2p-1, - 0x1.fffff2p-1, - 0x1.fffff2p-1, - 0x1.fffff4p-1, - 0x1.fffff4p-1, - 0x1.fffff4p-1, - 0x1.fffff6p-1, - 0x1.fffff6p-1, - 0x1.fffff6p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffff8p-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffap-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffcp-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.fffffep-1, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - 0x1.000000p+0, - }, - .scale = { 0x1.20dd76p+0, - 0x1.20d8f2p+0, - 0x1.20cb68p+0, - 0x1.20b4d8p+0, - 0x1.209546p+0, - 0x1.206cb4p+0, - 0x1.203b26p+0, - 0x1.2000a0p+0, - 0x1.1fbd28p+0, - 0x1.1f70c4p+0, - 0x1.1f1b7ap+0, - 0x1.1ebd56p+0, - 0x1.1e565cp+0, - 0x1.1de698p+0, - 0x1.1d6e14p+0, - 0x1.1cecdcp+0, - 0x1.1c62fap+0, - 0x1.1bd07cp+0, - 0x1.1b3572p+0, - 0x1.1a91e6p+0, - 0x1.19e5eap+0, - 0x1.19318cp+0, - 0x1.1874dep+0, - 0x1.17aff0p+0, - 0x1.16e2d8p+0, - 0x1.160da4p+0, - 0x1.153068p+0, - 0x1.144b3cp+0, - 0x1.135e30p+0, - 0x1.12695ep+0, - 0x1.116cd8p+0, - 0x1.1068bap+0, - 0x1.0f5d16p+0, - 0x1.0e4a08p+0, - 0x1.0d2fa6p+0, - 0x1.0c0e0ap+0, - 0x1.0ae550p+0, - 0x1.09b590p+0, - 0x1.087ee4p+0, - 0x1.07416cp+0, - 0x1.05fd3ep+0, - 0x1.04b27cp+0, - 0x1.036140p+0, - 0x1.0209a6p+0, - 0x1.00abd0p+0, - 0x1.fe8fb0p-1, - 0x1.fbbbbep-1, - 0x1.f8dc0ap-1, - 0x1.f5f0cep-1, - 0x1.f2fa4cp-1, - 0x1.eff8c4p-1, - 0x1.ecec78p-1, - 0x1.e9d5a8p-1, - 0x1.e6b498p-1, - 0x1.e38988p-1, - 0x1.e054bep-1, - 0x1.dd167cp-1, - 0x1.d9cf06p-1, - 0x1.d67ea2p-1, - 0x1.d32592p-1, - 0x1.cfc41ep-1, - 0x1.cc5a8ap-1, - 0x1.c8e91cp-1, - 0x1.c5701ap-1, - 0x1.c1efcap-1, - 0x1.be6872p-1, - 0x1.bada5ap-1, - 0x1.b745c6p-1, - 0x1.b3aafcp-1, - 0x1.b00a46p-1, - 0x1.ac63e8p-1, - 0x1.a8b828p-1, - 0x1.a5074ep-1, - 0x1.a1519ep-1, - 0x1.9d9762p-1, - 0x1.99d8dap-1, - 0x1.961650p-1, - 0x1.925008p-1, - 0x1.8e8646p-1, - 0x1.8ab950p-1, - 0x1.86e96ap-1, - 0x1.8316d6p-1, - 0x1.7f41dcp-1, - 0x1.7b6abcp-1, - 0x1.7791b8p-1, - 0x1.73b714p-1, - 0x1.6fdb12p-1, - 0x1.6bfdf0p-1, - 0x1.681ff2p-1, - 0x1.644156p-1, - 0x1.60625cp-1, - 0x1.5c8342p-1, - 0x1.58a446p-1, - 0x1.54c5a6p-1, - 0x1.50e79ep-1, - 0x1.4d0a68p-1, - 0x1.492e42p-1, - 0x1.455366p-1, - 0x1.417a0cp-1, - 0x1.3da26ep-1, - 0x1.39ccc2p-1, - 0x1.35f940p-1, - 0x1.32281ep-1, - 0x1.2e5992p-1, - 0x1.2a8dcep-1, - 0x1.26c508p-1, - 0x1.22ff72p-1, - 0x1.1f3d3cp-1, - 0x1.1b7e98p-1, - 0x1.17c3b6p-1, - 0x1.140cc4p-1, - 0x1.1059eep-1, - 0x1.0cab62p-1, - 0x1.09014cp-1, - 0x1.055bd6p-1, - 0x1.01bb2cp-1, - 0x1.fc3ee6p-2, - 0x1.f511aap-2, - 0x1.edeeeep-2, - 0x1.e6d700p-2, - 0x1.dfca26p-2, - 0x1.d8c8aap-2, - 0x1.d1d2d0p-2, - 0x1.cae8dap-2, - 0x1.c40b08p-2, - 0x1.bd3998p-2, - 0x1.b674c8p-2, - 0x1.afbcd4p-2, - 0x1.a911f0p-2, - 0x1.a27456p-2, - 0x1.9be438p-2, - 0x1.9561c8p-2, - 0x1.8eed36p-2, - 0x1.8886b2p-2, - 0x1.822e66p-2, - 0x1.7be47ap-2, - 0x1.75a91ap-2, - 0x1.6f7c6ap-2, - 0x1.695e8cp-2, - 0x1.634fa6p-2, - 0x1.5d4fd4p-2, - 0x1.575f34p-2, - 0x1.517de6p-2, - 0x1.4bac00p-2, - 0x1.45e99cp-2, - 0x1.4036d0p-2, - 0x1.3a93b2p-2, - 0x1.350052p-2, - 0x1.2f7cc4p-2, - 0x1.2a0916p-2, - 0x1.24a554p-2, - 0x1.1f518ap-2, - 0x1.1a0dc6p-2, - 0x1.14da0ap-2, - 0x1.0fb662p-2, - 0x1.0aa2d0p-2, - 0x1.059f5ap-2, - 0x1.00ac00p-2, - 0x1.f79184p-3, - 0x1.edeb40p-3, - 0x1.e46530p-3, - 0x1.daff4ap-3, - 0x1.d1b982p-3, - 0x1.c893cep-3, - 0x1.bf8e1cp-3, - 0x1.b6a856p-3, - 0x1.ade26cp-3, - 0x1.a53c42p-3, - 0x1.9cb5bep-3, - 0x1.944ec2p-3, - 0x1.8c0732p-3, - 0x1.83deeap-3, - 0x1.7bd5c8p-3, - 0x1.73eba4p-3, - 0x1.6c2056p-3, - 0x1.6473b6p-3, - 0x1.5ce596p-3, - 0x1.5575c8p-3, - 0x1.4e241ep-3, - 0x1.46f066p-3, - 0x1.3fda6cp-3, - 0x1.38e1fap-3, - 0x1.3206dcp-3, - 0x1.2b48dap-3, - 0x1.24a7b8p-3, - 0x1.1e233ep-3, - 0x1.17bb2cp-3, - 0x1.116f48p-3, - 0x1.0b3f52p-3, - 0x1.052b0cp-3, - 0x1.fe6460p-4, - 0x1.f2a902p-4, - 0x1.e72372p-4, - 0x1.dbd32ap-4, - 0x1.d0b7a0p-4, - 0x1.c5d04ap-4, - 0x1.bb1c98p-4, - 0x1.b09bfcp-4, - 0x1.a64de6p-4, - 0x1.9c31c6p-4, - 0x1.92470ap-4, - 0x1.888d1ep-4, - 0x1.7f036cp-4, - 0x1.75a960p-4, - 0x1.6c7e64p-4, - 0x1.6381e2p-4, - 0x1.5ab342p-4, - 0x1.5211ecp-4, - 0x1.499d48p-4, - 0x1.4154bcp-4, - 0x1.3937b2p-4, - 0x1.31458ep-4, - 0x1.297dbap-4, - 0x1.21df9ap-4, - 0x1.1a6a96p-4, - 0x1.131e14p-4, - 0x1.0bf97ep-4, - 0x1.04fc3ap-4, - 0x1.fc4b5ep-5, - 0x1.eeea8cp-5, - 0x1.e1d4d0p-5, - 0x1.d508fap-5, - 0x1.c885e0p-5, - 0x1.bc4a54p-5, - 0x1.b05530p-5, - 0x1.a4a54ap-5, - 0x1.99397ap-5, - 0x1.8e109cp-5, - 0x1.83298ep-5, - 0x1.78832cp-5, - 0x1.6e1c58p-5, - 0x1.63f3f6p-5, - 0x1.5a08e8p-5, - 0x1.505a18p-5, - 0x1.46e66cp-5, - 0x1.3dacd2p-5, - 0x1.34ac36p-5, - 0x1.2be38cp-5, - 0x1.2351c2p-5, - 0x1.1af5d2p-5, - 0x1.12ceb4p-5, - 0x1.0adb60p-5, - 0x1.031ad6p-5, - 0x1.f7182ap-6, - 0x1.e85c44p-6, - 0x1.da0006p-6, - 0x1.cc0180p-6, - 0x1.be5ecep-6, - 0x1.b1160ap-6, - 0x1.a4255ap-6, - 0x1.978ae8p-6, - 0x1.8b44e6p-6, - 0x1.7f5188p-6, - 0x1.73af0cp-6, - 0x1.685bb6p-6, - 0x1.5d55ccp-6, - 0x1.529b9ep-6, - 0x1.482b84p-6, - 0x1.3e03d8p-6, - 0x1.3422fep-6, - 0x1.2a875cp-6, - 0x1.212f62p-6, - 0x1.181984p-6, - 0x1.0f443ep-6, - 0x1.06ae14p-6, - 0x1.fcab14p-7, - 0x1.ec7262p-7, - 0x1.dcaf36p-7, - 0x1.cd5ecap-7, - 0x1.be7e5ap-7, - 0x1.b00b38p-7, - 0x1.a202bep-7, - 0x1.94624ep-7, - 0x1.87275ep-7, - 0x1.7a4f6ap-7, - 0x1.6dd7fep-7, - 0x1.61beaep-7, - 0x1.56011cp-7, - 0x1.4a9cf6p-7, - 0x1.3f8ff6p-7, - 0x1.34d7dcp-7, - 0x1.2a727ap-7, - 0x1.205dacp-7, - 0x1.169756p-7, - 0x1.0d1d6ap-7, - 0x1.03ede2p-7, - 0x1.f60d8ap-8, - 0x1.e4cc4ap-8, - 0x1.d4143ap-8, - 0x1.c3e1a6p-8, - 0x1.b430ecp-8, - 0x1.a4fe84p-8, - 0x1.9646f4p-8, - 0x1.8806d8p-8, - 0x1.7a3adep-8, - 0x1.6cdfccp-8, - 0x1.5ff276p-8, - 0x1.536fc2p-8, - 0x1.4754acp-8, - 0x1.3b9e40p-8, - 0x1.30499cp-8, - 0x1.2553eep-8, - 0x1.1aba78p-8, - 0x1.107a8cp-8, - 0x1.06918cp-8, - 0x1.f9f9d0p-9, - 0x1.e77448p-9, - 0x1.d58da6p-9, - 0x1.c4412cp-9, - 0x1.b38a3ap-9, - 0x1.a36454p-9, - 0x1.93cb12p-9, - 0x1.84ba30p-9, - 0x1.762d84p-9, - 0x1.682100p-9, - 0x1.5a90b0p-9, - 0x1.4d78bcp-9, - 0x1.40d564p-9, - 0x1.34a306p-9, - 0x1.28de12p-9, - 0x1.1d8318p-9, - 0x1.128ebap-9, - 0x1.07fdb4p-9, - 0x1.fb99b8p-10, - 0x1.e7f232p-10, - 0x1.d4fed8p-10, - 0x1.c2b9d0p-10, - 0x1.b11d70p-10, - 0x1.a02436p-10, - 0x1.8fc8c8p-10, - 0x1.8005f0p-10, - 0x1.70d6a4p-10, - 0x1.6235fcp-10, - 0x1.541f34p-10, - 0x1.468daep-10, - 0x1.397ceep-10, - 0x1.2ce898p-10, - 0x1.20cc76p-10, - 0x1.15246ep-10, - 0x1.09ec86p-10, - 0x1.fe41cep-11, - 0x1.e97ba4p-11, - 0x1.d57f52p-11, - 0x1.c245d4p-11, - 0x1.afc85ep-11, - 0x1.9e0058p-11, - 0x1.8ce75ep-11, - 0x1.7c7744p-11, - 0x1.6caa0ep-11, - 0x1.5d79ecp-11, - 0x1.4ee142p-11, - 0x1.40daa4p-11, - 0x1.3360ccp-11, - 0x1.266ea8p-11, - 0x1.19ff46p-11, - 0x1.0e0de8p-11, - 0x1.0295f0p-11, - 0x1.ef25d4p-12, - 0x1.da0110p-12, - 0x1.c5b542p-12, - 0x1.b23a5ap-12, - 0x1.9f8894p-12, - 0x1.8d986ap-12, - 0x1.7c629ap-12, - 0x1.6be022p-12, - 0x1.5c0a38p-12, - 0x1.4cda54p-12, - 0x1.3e4a24p-12, - 0x1.305390p-12, - 0x1.22f0b4p-12, - 0x1.161be4p-12, - 0x1.09cfa4p-12, - 0x1.fc0d56p-13, - 0x1.e577bcp-13, - 0x1.cfd4a6p-13, - 0x1.bb1a96p-13, - 0x1.a74068p-13, - 0x1.943d4ap-13, - 0x1.8208bcp-13, - 0x1.709a8ep-13, - 0x1.5feadap-13, - 0x1.4ff208p-13, - 0x1.40a8c2p-13, - 0x1.3207fcp-13, - 0x1.2408eap-13, - 0x1.16a502p-13, - 0x1.09d5f8p-13, - 0x1.fb2b7ap-14, - 0x1.e3bcf4p-14, - 0x1.cd5528p-14, - 0x1.b7e946p-14, - 0x1.a36eecp-14, - 0x1.8fdc1cp-14, - 0x1.7d2738p-14, - 0x1.6b4702p-14, - 0x1.5a329cp-14, - 0x1.49e178p-14, - 0x1.3a4b60p-14, - 0x1.2b6876p-14, - 0x1.1d3120p-14, - 0x1.0f9e1cp-14, - 0x1.02a868p-14, - 0x1.ec929ap-15, - 0x1.d4f4b4p-15, - 0x1.be6abcp-15, - 0x1.a8e8ccp-15, - 0x1.94637ep-15, - 0x1.80cfdcp-15, - 0x1.6e2368p-15, - 0x1.5c540cp-15, - 0x1.4b581cp-15, - 0x1.3b2652p-15, - 0x1.2bb5ccp-15, - 0x1.1cfe02p-15, - 0x1.0ef6c4p-15, - 0x1.019842p-15, - 0x1.e9b5e8p-16, - 0x1.d16f58p-16, - 0x1.ba4f04p-16, - 0x1.a447b8p-16, - 0x1.8f4cccp-16, - 0x1.7b5224p-16, - 0x1.684c22p-16, - 0x1.562facp-16, - 0x1.44f21ep-16, - 0x1.34894ap-16, - 0x1.24eb72p-16, - 0x1.160f44p-16, - 0x1.07ebd2p-16, - 0x1.f4f12ep-17, - 0x1.db5ad0p-17, - 0x1.c304f0p-17, - 0x1.abe09ep-17, - 0x1.95df98p-17, - 0x1.80f43ap-17, - 0x1.6d1178p-17, - 0x1.5a2ae0p-17, - 0x1.483488p-17, - 0x1.372310p-17, - 0x1.26eb9ep-17, - 0x1.1783cep-17, - 0x1.08e1bap-17, - 0x1.f5f7d8p-18, - 0x1.db92b6p-18, - 0x1.c282cep-18, - 0x1.aab7acp-18, - 0x1.94219cp-18, - 0x1.7eb1a2p-18, - 0x1.6a5972p-18, - 0x1.570b6ap-18, - 0x1.44ba86p-18, - 0x1.335a62p-18, - 0x1.22df2ap-18, - 0x1.133d96p-18, - 0x1.046aeap-18, - 0x1.ecb9d0p-19, - 0x1.d21398p-19, - 0x1.b8d094p-19, - 0x1.a0df10p-19, - 0x1.8a2e26p-19, - 0x1.74adc8p-19, - 0x1.604ea8p-19, - 0x1.4d0232p-19, - 0x1.3aba86p-19, - 0x1.296a70p-19, - 0x1.190562p-19, - 0x1.097f62p-19, - 0x1.f59a20p-20, - 0x1.d9c736p-20, - 0x1.bf716cp-20, - 0x1.a6852cp-20, - 0x1.8eefd8p-20, - 0x1.789fb8p-20, - 0x1.6383f8p-20, - 0x1.4f8c96p-20, - 0x1.3caa62p-20, - 0x1.2acee2p-20, - 0x1.19ec60p-20, - 0x1.09f5d0p-20, - 0x1.f5bd96p-21, - 0x1.d9371ep-21, - 0x1.be41dep-21, - 0x1.a4c89ep-21, - 0x1.8cb738p-21, - 0x1.75fa8ep-21, - 0x1.608078p-21, - 0x1.4c37c0p-21, - 0x1.39100ep-21, - 0x1.26f9e0p-21, - 0x1.15e682p-21, - 0x1.05c804p-21, - 0x1.ed2254p-22, - 0x1.d06ad6p-22, - 0x1.b551c8p-22, - 0x1.9bc0a0p-22, - 0x1.83a200p-22, - 0x1.6ce1aap-22, - 0x1.576c72p-22, - 0x1.43302cp-22, - 0x1.301ba2p-22, - 0x1.1e1e86p-22, - 0x1.0d2966p-22, - 0x1.fa5b50p-23, - 0x1.dc3ae4p-23, - 0x1.bfd756p-23, - 0x1.a517dap-23, - 0x1.8be4f8p-23, - 0x1.74287ep-23, - 0x1.5dcd66p-23, - 0x1.48bfd4p-23, - 0x1.34ecf8p-23, - 0x1.224310p-23, - 0x1.10b148p-23, - }, -}; diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h index 23963b5f8e..01fbb4d4c0 100644 --- a/sysdeps/aarch64/fpu/sv_expf_inline.h +++ b/sysdeps/aarch64/fpu/sv_expf_inline.h @@ -24,52 +24,41 @@ struct sv_expf_data { - float poly[5]; - float inv_ln2, ln2_hi, ln2_lo, shift; + float ln2_hi, ln2_lo, c1, null; + float inv_ln2, shift; }; -/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for - compatibility with polynomial helpers. Shift is 1.5*2^17 + 127. */ +/* Shift is 1.5*2^17 + 127. */ #define SV_EXPF_DATA \ { \ - .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \ - 0x1.0e4020p-7f }, \ - \ - .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \ + .c1 = 0.5f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \ .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \ } -#define C(i) sv_f32 (d->poly[i]) - static inline svfloat32_t expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) { /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - /* Load some constants in quad-word chunks to minimise memory access. */ - svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_hi); /* n = round(x/(ln2/N)). */ - svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1); + svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift); svfloat32_t n = svsub_x (pg, z, d->shift); /* r = x - n*ln2/N. */ - svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2); - r = svmls_lane (r, n, c4_invln2_and_ln2, 3); + svfloat32_t r = x; + r = svmls_lane (r, n, lane_consts, 0); + r = svmls_lane (r, n, lane_consts, 1); /* scale = 2^(n/N). */ - svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z)); + svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ - svfloat32_t p12 = svmla_x (pg, C (1), C (2), r); - svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0); - svfloat32_t r2 = svmul_f32_x (pg, r, r); - svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_f32_x (pg, r, C (0)); - svfloat32_t poly = svmla_x (pg, p0, r2, p14); + /* poly(r) = exp(r) - 1 ~= r + 0.5 r^2. */ + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); + svfloat32_t poly = svmla_lane (r, r2, lane_consts, 2); return svmla_x (pg, scale, scale, poly); } - #endif diff --git a/sysdeps/aarch64/fpu/sv_expm1f_inline.h b/sysdeps/aarch64/fpu/sv_expm1f_inline.h index 5b72451222..e46ddda543 100644 --- a/sysdeps/aarch64/fpu/sv_expm1f_inline.h +++ b/sysdeps/aarch64/fpu/sv_expm1f_inline.h @@ -27,21 +27,18 @@ struct sv_expm1f_data /* These 4 are grouped together so they can be loaded as one quadword, then used with _lane forms of svmla/svmls. */ float32_t c2, c4, ln2_hi, ln2_lo; - float32_t c0, c1, c3, inv_ln2, shift; + float c0, inv_ln2, c1, c3, special_bound; }; /* Coefficients generated using fpminimax. */ #define SV_EXPM1F_DATA \ { \ - .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5, \ - .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \ + .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .inv_ln2 = 0x1.715476p+0f, \ + .c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7, \ \ - .shift = 0x1.8p23f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \ - .ln2_lo = 0x1.7f7d1cp-20f, \ + .c4 = 0x1.6b55a2p-10, .ln2_lo = 0x1.7f7d1cp-20f, .ln2_hi = 0x1.62e4p-1f, \ } -#define C(i) sv_f32 (d->c##i) - static inline svfloat32_t expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d) { @@ -55,9 +52,8 @@ expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d) and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 where 2^i is exact because i is an integer. */ - svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2); - j = svsub_x (pg, j, d->shift); - svint32_t i = svcvt_s32_x (pg, j); + svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2); + j = svrinta_x (pg, j); svfloat32_t f = svmls_lane (x, j, lane_constants, 2); f = svmls_lane (f, j, lane_constants, 3); @@ -67,18 +63,18 @@ expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d) x + ax^2 + bx^3 + cx^4 .... So we calculate the polynomial P(f) = a + bf + cf^2 + ... and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0); - svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1); - svfloat32_t f2 = svmul_x (pg, f, f); + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1); + svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f); svfloat32_t p = svmla_x (pg, p12, f2, p34); - p = svmla_x (pg, C (0), f, p); + p = svmla_x (pg, sv_f32 (d->c0), f, p); p = svmla_x (pg, f, f2, p); /* Assemble the result. expm1(x) ~= 2^i * (p + 1) - 1 Let t = 2^i. */ - svfloat32_t t = svscale_x (pg, sv_f32 (1), i); - return svmla_x (pg, svsub_x (pg, t, 1), p, t); + svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j)); + return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t); } #endif diff --git a/sysdeps/aarch64/fpu/sv_log1p_inline.h b/sysdeps/aarch64/fpu/sv_log1p_inline.h index da019674f9..a9ecd75d19 100644 --- a/sysdeps/aarch64/fpu/sv_log1p_inline.h +++ b/sysdeps/aarch64/fpu/sv_log1p_inline.h @@ -21,11 +21,12 @@ #define AARCH64_FPU_SV_LOG1P_INLINE_H #include "sv_math.h" -#include "poly_sve_f64.h" static const struct sv_log1p_data { - double poly[19], ln2[2]; + double c0, c2, c4, c6, c8, c10, c12, c14, c16; + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18; + double ln2_lo, ln2_hi; uint64_t hf_rt2_top; uint64_t one_m_hf_rt2_top; uint32_t bottom_mask; @@ -33,15 +34,30 @@ static const struct sv_log1p_data } sv_log1p_data = { /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */ - .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2, - 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3, - -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4, - 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4, - -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5, - 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4, - -0x1.cfa7385bdb37ep-6 }, - .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 }, + .c0 = -0x1.ffffffffffffbp-2, + .c1 = 0x1.55555555551a9p-2, + .c2 = -0x1.00000000008e3p-2, + .c3 = 0x1.9999999a32797p-3, + .c4 = -0x1.555555552fecfp-3, + .c5 = 0x1.249248e071e5ap-3, + .c6 = -0x1.ffffff8bf8482p-4, + .c7 = 0x1.c71c8f07da57ap-4, + .c8 = -0x1.9999ca4ccb617p-4, + .c9 = 0x1.7459ad2e1dfa3p-4, + .c10 = -0x1.554d2680a3ff2p-4, + .c11 = 0x1.3b4c54d487455p-4, + .c12 = -0x1.2548a9ffe80e6p-4, + .c13 = 0x1.0f389a24b2e07p-4, + .c14 = -0x1.eee4db15db335p-5, + .c15 = 0x1.e95b494d4a5ddp-5, + .c16 = -0x1.15fdf07cb7c73p-4, + .c17 = 0x1.0310b70800fcfp-4, + .c18 = -0x1.cfa7385bdb37ep-6, + .ln2_lo = 0x1.62e42fefa3800p-1, + .ln2_hi = 0x1.ef35793c76730p-45, + /* top32(asuint64(sqrt(2)/2)) << 32. */ .hf_rt2_top = 0x3fe6a09e00000000, + /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32. */ .one_m_hf_rt2_top = 0x00095f6200000000, .bottom_mask = 0xffffffff, .one_top = 0x3ff @@ -51,14 +67,14 @@ static inline svfloat64_t sv_log1p_inline (svfloat64_t x, const svbool_t pg) { /* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which - differs from v_log1p_2u5.c by: + differs from advsimd/log1p.c by: - No special-case handling - this should be dealt with by the caller. - Pairwise Horner polynomial evaluation for improved accuracy. - Optionally simulate the shortcut for k=0, used in the scalar routine, using svsel, for improved accuracy when the argument to log1p is close to 0. This feature is enabled by defining WANT_SV_LOG1P_K0_SHORTCUT as 1 in the source of the caller before including this file. - See sv_log1p_2u1.c for details of the algorithm. */ + See sve/log1p.c for details of the algorithm. */ const struct sv_log1p_data *d = ptr_barrier (&sv_log1p_data); svfloat64_t m = svadd_x (pg, x, 1); svuint64_t mi = svreinterpret_u64 (m); @@ -79,7 +95,7 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg) svfloat64_t cm; #ifndef WANT_SV_LOG1P_K0_SHORTCUT -#error \ +#error \ "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0" #elif WANT_SV_LOG1P_K0_SHORTCUT /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is @@ -96,14 +112,46 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg) #endif /* Approximate log1p(f) on the reduced input using a polynomial. */ - svfloat64_t f2 = svmul_x (pg, f, f); - svfloat64_t p = sv_pw_horner_18_f64_x (pg, f, f2, d->poly); + svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f), + f4 = svmul_x (svptrue_b64 (), f2, f2), + f8 = svmul_x (svptrue_b64 (), f4, f4), + f16 = svmul_x (svptrue_b64 (), f8, f8); + + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5); + svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9); + svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13); + svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17); + + /* Order-18 Estrin scheme. */ + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1); + + svfloat64_t p03 = svmla_x (pg, p01, f2, p23); + svfloat64_t p47 = svmla_x (pg, p45, f2, p67); + svfloat64_t p07 = svmla_x (pg, p03, f4, p47); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1); + svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0); + svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1); + + svfloat64_t p811 = svmla_x (pg, p89, f2, p1011); + svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415); + svfloat64_t p815 = svmla_x (pg, p811, f4, p1215); + + svfloat64_t p015 = svmla_x (pg, p07, f8, p815); + svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0); + svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1); + svfloat64_t p = svmla_x (pg, p015, f16, p1618); /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */ - svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2[0]); - svfloat64_t yhi = svmla_x (pg, f, k, d->ln2[1]); + svfloat64_t ln2_lo_hi = svld1rq (svptrue_b64 (), &d->ln2_lo); + svfloat64_t ylo = svmla_lane (cm, k, ln2_lo_hi, 0); + svfloat64_t yhi = svmla_lane (f, k, ln2_lo_hi, 1); - return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p); + return svmad_x (pg, p, f2, svadd_x (pg, ylo, yhi)); } - #endif diff --git a/sysdeps/aarch64/fpu/sv_log1pf_inline.h b/sysdeps/aarch64/fpu/sv_log1pf_inline.h index b94b2da055..850297d615 100644 --- a/sysdeps/aarch64/fpu/sv_log1pf_inline.h +++ b/sysdeps/aarch64/fpu/sv_log1pf_inline.h @@ -22,55 +22,76 @@ #include "sv_math.h" #include "vecmath_config.h" -#include "poly_sve_f32.h" + +#define SignExponentMask 0xff800000 static const struct sv_log1pf_data { - float32_t poly[9]; - float32_t ln2; - float32_t scale_back; + float c0, c2, c4, c6; + float c1, c3, c5, c7; + float ln2, exp_bias, quarter; + uint32_t four, three_quarters; } sv_log1pf_data = { - /* Polynomial generated using FPMinimax in [-0.25, 0.5]. */ - .poly = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f, - -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f, - -0x1.6f0d5ep-5f }, - .scale_back = 0x1.0p-23f, - .ln2 = 0x1.62e43p-1f, + /* Do not store first term of polynomial, which is -0.5, as + this can be fmov-ed directly instead of including it in + the main load-and-mla polynomial schedule. */ + .c0 = 0x1.5555aap-2f, .c1 = -0x1.000038p-2f, .c2 = 0x1.99675cp-3f, + .c3 = -0x1.54ef78p-3f, .c4 = 0x1.28a1f4p-3f, .c5 = -0x1.0da91p-3f, + .c6 = 0x1.abcb6p-4f, .c7 = -0x1.6f0d5ep-5f, .ln2 = 0x1.62e43p-1f, + .exp_bias = 0x1p-23f, .quarter = 0x1p-2f, .four = 0x40800000, + .three_quarters = 0x3f400000, }; -static inline svfloat32_t -eval_poly (svfloat32_t m, const float32_t *c, svbool_t pg) -{ - svfloat32_t p_12 = svmla_x (pg, sv_f32 (c[0]), m, sv_f32 (c[1])); - svfloat32_t m2 = svmul_x (pg, m, m); - svfloat32_t q = svmla_x (pg, m, m2, p_12); - svfloat32_t p = sv_pw_horner_6_f32_x (pg, m, m2, c + 2); - p = svmul_x (pg, m2, p); - - return svmla_x (pg, q, m2, p); -} - static inline svfloat32_t sv_log1pf_inline (svfloat32_t x, svbool_t pg) { const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data); - svfloat32_t m = svadd_x (pg, x, 1.0f); - - svint32_t ks = svsub_x (pg, svreinterpret_s32 (m), - svreinterpret_s32 (svdup_f32 (0.75f))); - ks = svand_x (pg, ks, 0xff800000); - svuint32_t k = svreinterpret_u32 (ks); - svfloat32_t s = svreinterpret_f32 ( - svsub_x (pg, svreinterpret_u32 (svdup_f32 (4.0f)), k)); - - svfloat32_t m_scale - = svreinterpret_f32 (svsub_x (pg, svreinterpret_u32 (x), k)); - m_scale - = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1.0f), sv_f32 (0.25f), s)); - svfloat32_t p = eval_poly (m_scale, d->poly, pg); - svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->scale_back); - return svmla_x (pg, p, scale_back, d->ln2); + /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m + is in [-0.25, 0.5]): + log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). + + We approximate log1p(m) with a polynomial, then scale by + k*log(2). Instead of doing this directly, we use an intermediate + scale factor s = 4*k*log(2) to ensure the scale is representable + as a normalised fp32 number. */ + svfloat32_t m = svadd_x (pg, x, 1); + + /* Choose k to scale x to the range [-1/4, 1/2]. */ + svint32_t k + = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters), + sv_s32 (SignExponentMask)); + + /* Scale x by exponent manipulation. */ + svfloat32_t m_scale = svreinterpret_f32 ( + svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k))); + + /* Scale up to ensure that the scale factor is representable as normalised + fp32 number, and scale m down accordingly. */ + svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four)); + svfloat32_t fconst = svld1rq_f32 (svptrue_b32 (), &d->ln2); + m_scale = svadd_x (pg, m_scale, svmla_lane_f32 (sv_f32 (-1), s, fconst, 2)); + + /* Evaluate polynomial on reduced interval. */ + svfloat32_t ms2 = svmul_x (svptrue_b32 (), m_scale, m_scale); + + svfloat32_t c1357 = svld1rq_f32 (svptrue_b32 (), &d->c1); + svfloat32_t p01 = svmla_lane_f32 (sv_f32 (d->c0), m_scale, c1357, 0); + svfloat32_t p23 = svmla_lane_f32 (sv_f32 (d->c2), m_scale, c1357, 1); + svfloat32_t p45 = svmla_lane_f32 (sv_f32 (d->c4), m_scale, c1357, 2); + svfloat32_t p67 = svmla_lane_f32 (sv_f32 (d->c6), m_scale, c1357, 3); + + svfloat32_t p = svmla_x (pg, p45, p67, ms2); + p = svmla_x (pg, p23, p, ms2); + p = svmla_x (pg, p01, p, ms2); + + p = svmad_x (pg, m_scale, p, -0.5); + p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p)); + + /* The scale factor to be applied back at the end - by multiplying float(k) + by 2^-23 we get the unbiased exponent of k. */ + svfloat32_t scale_back = svmul_lane_f32 (svcvt_f32_x (pg, k), fconst, 1); + return svmla_lane_f32 (p, scale_back, fconst, 0); } #endif diff --git a/sysdeps/aarch64/fpu/sv_math.h b/sysdeps/aarch64/fpu/sv_math.h index 41a2013929..c2dae543a1 100644 --- a/sysdeps/aarch64/fpu/sv_math.h +++ b/sysdeps/aarch64/fpu/sv_math.h @@ -24,11 +24,29 @@ #include "vecmath_config.h" +#if !defined(__ARM_FEATURE_SVE_BITS) || __ARM_FEATURE_SVE_BITS == 0 +/* If not specified by -msve-vector-bits, assume maximum vector length. */ +# define SVE_VECTOR_BYTES 256 +#else +# define SVE_VECTOR_BYTES (__ARM_FEATURE_SVE_BITS / 8) +#endif +#define SVE_NUM_FLTS (SVE_VECTOR_BYTES / sizeof (float)) +#define SVE_NUM_DBLS (SVE_VECTOR_BYTES / sizeof (double)) +/* Predicate is stored as one bit per byte of VL so requires VL / 64 bytes. */ +#define SVE_NUM_PG_BYTES (SVE_VECTOR_BYTES / sizeof (uint64_t)) + #define SV_NAME_F1(fun) _ZGVsMxv_##fun##f #define SV_NAME_D1(fun) _ZGVsMxv_##fun #define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f #define SV_NAME_D2(fun) _ZGVsMxvv_##fun +static inline void +svstr_p (uint8_t *dst, svbool_t p) +{ + /* Predicate STR does not currently have an intrinsic. */ + __asm__("str %0, [%x1]\n" : : "Upa"(p), "r"(dst) : "memory"); +} + /* Double precision. */ static inline svint64_t sv_s64 (int64_t x) @@ -51,33 +69,35 @@ sv_f64 (double x) static inline svfloat64_t sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp) { - svbool_t p = svpfirst (cmp, svpfalse ()); - while (svptest_any (cmp, p)) + double tmp[SVE_NUM_DBLS]; + uint8_t pg_bits[SVE_NUM_PG_BYTES]; + svstr_p (pg_bits, cmp); + svst1 (svptrue_b64 (), tmp, svsel (cmp, x, y)); + + for (int i = 0; i < svcntd (); i++) { - double elem = svclastb_n_f64 (p, 0, x); - elem = (*f) (elem); - svfloat64_t y2 = svdup_n_f64 (elem); - y = svsel_f64 (p, y2, y); - p = svpnext_b64 (cmp, p); + if (pg_bits[i] & 1) + tmp[i] = f (tmp[i]); } - return y; + return svld1 (svptrue_b64 (), tmp); } static inline svfloat64_t sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2, svfloat64_t y, svbool_t cmp) { - svbool_t p = svpfirst (cmp, svpfalse ()); - while (svptest_any (cmp, p)) + double tmp1[SVE_NUM_DBLS], tmp2[SVE_NUM_DBLS]; + uint8_t pg_bits[SVE_NUM_PG_BYTES]; + svstr_p (pg_bits, cmp); + svst1 (svptrue_b64 (), tmp1, svsel (cmp, x1, y)); + svst1 (cmp, tmp2, x2); + + for (int i = 0; i < svcntd (); i++) { - double elem1 = svclastb_n_f64 (p, 0, x1); - double elem2 = svclastb_n_f64 (p, 0, x2); - double ret = (*f) (elem1, elem2); - svfloat64_t y2 = svdup_n_f64 (ret); - y = svsel_f64 (p, y2, y); - p = svpnext_b64 (cmp, p); + if (pg_bits[i] & 1) + tmp1[i] = f (tmp1[i], tmp2[i]); } - return y; + return svld1 (svptrue_b64 (), tmp1); } static inline svuint64_t @@ -109,33 +129,40 @@ sv_f32 (float x) static inline svfloat32_t sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp) { - svbool_t p = svpfirst (cmp, svpfalse ()); - while (svptest_any (cmp, p)) + float tmp[SVE_NUM_FLTS]; + uint8_t pg_bits[SVE_NUM_PG_BYTES]; + svstr_p (pg_bits, cmp); + svst1 (svptrue_b32 (), tmp, svsel (cmp, x, y)); + + for (int i = 0; i < svcntd (); i++) { - float elem = svclastb_n_f32 (p, 0, x); - elem = f (elem); - svfloat32_t y2 = svdup_n_f32 (elem); - y = svsel_f32 (p, y2, y); - p = svpnext_b32 (cmp, p); + uint8_t p = pg_bits[i]; + if (p & 1) + tmp[i * 2] = f (tmp[i * 2]); + if (p & (1 << 4)) + tmp[i * 2 + 1] = f (tmp[i * 2 + 1]); } - return y; + return svld1 (svptrue_b32 (), tmp); } static inline svfloat32_t sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp) { - svbool_t p = svpfirst (cmp, svpfalse ()); - while (svptest_any (cmp, p)) + float tmp1[SVE_NUM_FLTS], tmp2[SVE_NUM_FLTS]; + uint8_t pg_bits[SVE_NUM_PG_BYTES]; + svstr_p (pg_bits, cmp); + svst1 (svptrue_b32 (), tmp1, svsel (cmp, x1, y)); + svst1 (cmp, tmp2, x2); + + for (int i = 0; i < svcntd (); i++) { - float elem1 = svclastb_n_f32 (p, 0, x1); - float elem2 = svclastb_n_f32 (p, 0, x2); - float ret = f (elem1, elem2); - svfloat32_t y2 = svdup_n_f32 (ret); - y = svsel_f32 (p, y2, y); - p = svpnext_b32 (cmp, p); + uint8_t p = pg_bits[i]; + if (p & 1) + tmp1[i * 2] = f (tmp1[i * 2], tmp2[i * 2]); + if (p & (1 << 4)) + tmp1[i * 2 + 1] = f (tmp1[i * 2 + 1], tmp2[i * 2 + 1]); } - return y; + return svld1 (svptrue_b32 (), tmp1); } - #endif diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c index d56a102dd1..c6a5a17126 100644 --- a/sysdeps/aarch64/fpu/tan_advsimd.c +++ b/sysdeps/aarch64/fpu/tan_advsimd.c @@ -25,9 +25,7 @@ static const struct data float64x2_t poly[9]; double half_pi[2]; float64x2_t two_over_pi, shift; -#if !WANT_SIMD_EXCEPT float64x2_t range_val; -#endif } data = { /* Coefficients generated using FPMinimax. */ .poly = { V2 (0x1.5555555555556p-2), V2 (0x1.1111111110a63p-3), @@ -38,20 +36,17 @@ static const struct data .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 }, .two_over_pi = V2 (0x1.45f306dc9c883p-1), .shift = V2 (0x1.8p52), -#if !WANT_SIMD_EXCEPT .range_val = V2 (0x1p23), -#endif }; #define RangeVal 0x4160000000000000 /* asuint64(0x1p23). */ #define TinyBound 0x3e50000000000000 /* asuint64(2^-26). */ -#define Thresh 0x310000000000000 /* RangeVal - TinyBound. */ /* Special cases (fall back to scalar calls). */ static float64x2_t VPCS_ATTR NOINLINE -special_case (float64x2_t x) +special_case (float64x2_t x, float64x2_t n, float64x2_t d, uint64x2_t special) { - return v_call_f64 (tan, x, x, v_u64 (-1)); + return v_call_f64 (tan, x, vdivq_f64 (n, d), special); } /* Vector approximation for double-precision tan. @@ -65,14 +60,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) very large inputs. Fall back to scalar routine for all lanes if any are too large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny input to avoid underflow. */ -#if WANT_SIMD_EXCEPT - uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x)); - /* iax - tiny_bound > range_val - tiny_bound. */ - uint64x2_t special - = vcgtq_u64 (vsubq_u64 (iax, v_u64 (TinyBound)), v_u64 (Thresh)); - if (__glibc_unlikely (v_any_u64 (special))) - return special_case (x); -#endif /* q = nearest integer to 2 * x / pi. */ float64x2_t q @@ -81,9 +68,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) /* Use q to reduce x to r in [-pi/4, pi/4], by: r = x - q * pi/2, in extended precision. */ - float64x2_t r = x; float64x2_t half_pi = vld1q_f64 (dat->half_pi); - r = vfmsq_laneq_f64 (r, q, half_pi, 0); + float64x2_t r = vfmsq_laneq_f64 (x, q, half_pi, 0); r = vfmsq_laneq_f64 (r, q, half_pi, 1); /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle formula. */ @@ -114,12 +100,13 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1)); -#if !WANT_SIMD_EXCEPT uint64x2_t special = vcageq_f64 (x, dat->range_val); + float64x2_t swap = vbslq_f64 (no_recip, n, vnegq_f64 (d)); + d = vbslq_f64 (no_recip, d, n); + n = swap; + if (__glibc_unlikely (v_any_u64 (special))) - return special_case (x); -#endif + return special_case (x, n, d, special); - return vdivq_f64 (vbslq_f64 (no_recip, n, vnegq_f64 (d)), - vbslq_f64 (no_recip, d, n)); + return vdivq_f64 (n, d); } diff --git a/sysdeps/aarch64/fpu/tan_sve.c b/sysdeps/aarch64/fpu/tan_sve.c index b2e4447316..a7318fd417 100644 --- a/sysdeps/aarch64/fpu/tan_sve.c +++ b/sysdeps/aarch64/fpu/tan_sve.c @@ -22,24 +22,38 @@ static const struct data { - double poly[9]; - double half_pi_hi, half_pi_lo, inv_half_pi, range_val, shift; + double c2, c4, c6, c8; + double poly_1357[4]; + double c0, inv_half_pi; + double half_pi_hi, half_pi_lo, range_val; } data = { /* Polynomial generated with FPMinimax. */ - .poly = { 0x1.5555555555556p-2, 0x1.1111111110a63p-3, 0x1.ba1ba1bb46414p-5, - 0x1.664f47e5b5445p-6, 0x1.226e5e5ecdfa3p-7, 0x1.d6c7ddbf87047p-9, - 0x1.7ea75d05b583ep-10, 0x1.289f22964a03cp-11, - 0x1.4e4fd14147622p-12, }, + .c2 = 0x1.ba1ba1bb46414p-5, + .c4 = 0x1.226e5e5ecdfa3p-7, + .c6 = 0x1.7ea75d05b583ep-10, + .c8 = 0x1.4e4fd14147622p-12, + .poly_1357 = { 0x1.1111111110a63p-3, 0x1.664f47e5b5445p-6, + 0x1.d6c7ddbf87047p-9, 0x1.289f22964a03cp-11 }, + .c0 = 0x1.5555555555556p-2, + .inv_half_pi = 0x1.45f306dc9c883p-1, .half_pi_hi = 0x1.921fb54442d18p0, .half_pi_lo = 0x1.1a62633145c07p-54, - .inv_half_pi = 0x1.45f306dc9c883p-1, .range_val = 0x1p23, - .shift = 0x1.8p52, }; static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) +special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg, + svbool_t special) { + svbool_t use_recip = svcmpeq ( + pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0); + + svfloat64_t n = svmad_x (pg, p, p, -1); + svfloat64_t d = svmul_x (svptrue_b64 (), p, 2); + svfloat64_t swap = n; + n = svneg_m (n, use_recip, d); + d = svsel (use_recip, swap, d); + svfloat64_t y = svdiv_x (svnot_z (pg, special), n, d); return sv_call_f64 (tan, x, y, special); } @@ -50,15 +64,10 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special) svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg) { const struct data *dat = ptr_barrier (&data); - - /* Invert condition to catch NaNs and Infs as well as large values. */ - svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val)); - + svfloat64_t half_pi_c0 = svld1rq (svptrue_b64 (), &dat->c0); /* q = nearest integer to 2 * x / pi. */ - svfloat64_t shift = sv_f64 (dat->shift); - svfloat64_t q = svmla_x (pg, shift, x, dat->inv_half_pi); - q = svsub_x (pg, q, shift); - svint64_t qi = svcvt_s64_x (pg, q); + svfloat64_t q = svmul_lane (x, half_pi_c0, 1); + q = svrinta_x (pg, q); /* Use q to reduce x to r in [-pi/4, pi/4], by: r = x - q * pi/2, in extended precision. */ @@ -68,7 +77,7 @@ svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg) r = svmls_lane (r, q, half_pi, 1); /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle formula. */ - r = svmul_x (pg, r, 0.5); + r = svmul_x (svptrue_b64 (), r, 0.5); /* Approximate tan(r) using order 8 polynomial. tan(x) is odd, so polynomial has the form: @@ -76,29 +85,51 @@ svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg) Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ... Then compute the approximation by: tan(r) ~= r + r^3 * (C0 + r^2 * P(r)). */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t r4 = svmul_x (pg, r2, r2); - svfloat64_t r8 = svmul_x (pg, r4, r4); + + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t r4 = svmul_x (svptrue_b64 (), r2, r2); + svfloat64_t r8 = svmul_x (svptrue_b64 (), r4, r4); /* Use offset version coeff array by 1 to evaluate from C1 onwards. */ - svfloat64_t p = sv_estrin_7_f64_x (pg, r2, r4, r8, dat->poly + 1); - p = svmad_x (pg, p, r2, dat->poly[0]); - p = svmla_x (pg, r, r2, svmul_x (pg, p, r)); + svfloat64_t C_24 = svld1rq (svptrue_b64 (), &dat->c2); + svfloat64_t C_68 = svld1rq (svptrue_b64 (), &dat->c6); + + /* Use offset version coeff array by 1 to evaluate from C1 onwards. */ + svfloat64_t p01 = svmla_lane (sv_f64 (dat->poly_1357[0]), r2, C_24, 0); + svfloat64_t p23 = svmla_lane_f64 (sv_f64 (dat->poly_1357[1]), r2, C_24, 1); + svfloat64_t p03 = svmla_x (pg, p01, p23, r4); + + svfloat64_t p45 = svmla_lane (sv_f64 (dat->poly_1357[2]), r2, C_68, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (dat->poly_1357[3]), r2, C_68, 1); + svfloat64_t p47 = svmla_x (pg, p45, p67, r4); + + svfloat64_t p = svmla_x (pg, p03, p47, r8); + + svfloat64_t z = svmul_x (svptrue_b64 (), p, r); + z = svmul_x (svptrue_b64 (), r2, z); + z = svmla_lane (z, r, half_pi_c0, 0); + p = svmla_x (pg, r, r2, z); /* Recombination uses double-angle formula: tan(2x) = 2 * tan(x) / (1 - (tan(x))^2) and reciprocity around pi/2: tan(x) = 1 / (tan(pi/2 - x)) to assemble result using change-of-sign and conditional selection of - numerator/denominator dependent on odd/even-ness of q (hence quadrant). */ - svbool_t use_recip - = svcmpeq (pg, svand_x (pg, svreinterpret_u64 (qi), 1), 0); + numerator/denominator dependent on odd/even-ness of q (quadrant). */ + + /* Invert condition to catch NaNs and Infs as well as large values. */ + svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val)); + + if (__glibc_unlikely (svptest_any (pg, special))) + { + return special_case (x, p, q, pg, special); + } + svbool_t use_recip = svcmpeq ( + pg, svand_x (pg, svreinterpret_u64 (svcvt_s64_x (pg, q)), 1), 0); svfloat64_t n = svmad_x (pg, p, p, -1); - svfloat64_t d = svmul_x (pg, p, 2); + svfloat64_t d = svmul_x (svptrue_b64 (), p, 2); svfloat64_t swap = n; n = svneg_m (n, use_recip, d); d = svsel (use_recip, swap, d); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svdiv_x (svnot_z (pg, special), n, d), special); return svdiv_x (pg, n, d); } diff --git a/sysdeps/aarch64/fpu/tanf_sve.c b/sysdeps/aarch64/fpu/tanf_sve.c index f342583241..e850fb4882 100644 --- a/sysdeps/aarch64/fpu/tanf_sve.c +++ b/sysdeps/aarch64/fpu/tanf_sve.c @@ -60,21 +60,16 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - /* Determine whether input is too large to perform fast regression. */ - svbool_t cmp = svacge (pg, x, d->range_val); - svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1); svfloat32_t pi_vals = svld1rq (svptrue_b32 (), &d->pio2_1); /* n = rint(x/(pi/2)). */ - svfloat32_t q = svmla_lane (sv_f32 (d->shift), x, pi_vals, 3); - svfloat32_t n = svsub_x (pg, q, d->shift); + svfloat32_t n = svrintn_x (pg, svmul_lane (x, pi_vals, 3)); /* n is already a signed integer, simply convert it. */ svint32_t in = svcvt_s32_x (pg, n); /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */ svint32_t alt = svand_x (pg, in, 1); svbool_t pred_alt = svcmpne (pg, alt, 0); - /* r = x - n * (pi/2) (range reduction into 0 .. pi/4). */ svfloat32_t r; r = svmls_lane (x, n, pi_vals, 0); @@ -93,7 +88,7 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg) /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4], using Estrin on z^2. */ - svfloat32_t z2 = svmul_x (pg, z, z); + svfloat32_t z2 = svmul_x (svptrue_b32 (), r, r); svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0); svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1); svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2); @@ -106,13 +101,14 @@ svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg) svfloat32_t y = svmla_x (pg, z, p, svmul_x (pg, z, z2)); - /* Transform result back, if necessary. */ - svfloat32_t inv_y = svdivr_x (pg, y, 1.0f); - /* No need to pass pg to specialcase here since cmp is a strict subset, guaranteed by the cmpge above. */ + + /* Determine whether input is too large to perform fast regression. */ + svbool_t cmp = svacge (pg, x, d->range_val); if (__glibc_unlikely (svptest_any (pg, cmp))) - return special_case (x, svsel (pred_alt, inv_y, y), cmp); + return special_case (x, svdivr_x (pg, y, 1.0f), cmp); + svfloat32_t inv_y = svdivr_x (pg, y, 1.0f); return svsel (pred_alt, inv_y, y); } diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c index d25e011cea..805669845d 100644 --- a/sysdeps/aarch64/fpu/tanh_sve.c +++ b/sysdeps/aarch64/fpu/tanh_sve.c @@ -18,83 +18,117 @@ . */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[11]; - float64_t inv_ln2, ln2_hi, ln2_lo, shift; - uint64_t thresh, tiny_bound; + double ln2_hi, ln2_lo; + double c2, c4; + double c0, c1, c3; + double two_over_ln2, shift; + uint64_t tiny_bound; + double large_bound, fexpa_bound; + uint64_t e2xm1_data[20]; } data = { - /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */ - .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5, - 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, - 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16, - 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22, - 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, }, - - .inv_ln2 = 0x1.71547652b82fep0, - .ln2_hi = -0x1.62e42fefa39efp-1, - .ln2_lo = -0x1.abc9e3b39803fp-56, - .shift = 0x1.8p52, - + /* Generated using Remez, in [-log(2)/128, log(2)/128]. */ + .c0 = 0x1p-1, + .c1 = 0x1.55555555548f9p-3, + .c2 = 0x1.5555555554c22p-5, + .c3 = 0x1.111123aaa2fb2p-7, + .c4 = 0x1.6c16d77d98e5bp-10, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .two_over_ln2 = 0x1.71547652b82fep+1, + .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */ .tiny_bound = 0x3e40000000000000, /* asuint64 (0x1p-27). */ - /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound). */ - .thresh = 0x01f241bf835f9d5f, + .large_bound = 0x1.30fc1931f09cap+4, /* arctanh(1 - 2^-54). */ + .fexpa_bound = 0x1.a56ef8ec924ccp-4, /* 19/64 * ln2/2. */ + /* Table lookup of 2^(i/64) - 1, for values of i from 0..19. */ + .e2xm1_data = { + 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901, + 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb, + 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2, + 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a, + 0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7, + }, }; +/* An expm1 inspired, FEXPA based helper function that returns an + accurate estimate for e^2x - 1. With no special case or support for + negative inputs of x. */ static inline svfloat64_t -expm1_inline (svfloat64_t x, const svbool_t pg, const struct data *d) -{ - /* Helper routine for calculating exp(x) - 1. Vector port of the helper from - the scalar variant of tanh. */ - - /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ - svfloat64_t j - = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift); - svint64_t i = svcvt_s64_x (pg, j); - svfloat64_t f = svmla_x (pg, x, j, d->ln2_hi); - f = svmla_x (pg, f, j, d->ln2_lo); - - /* Approximate expm1(f) using polynomial. */ - svfloat64_t f2 = svmul_x (pg, f, f); - svfloat64_t f4 = svmul_x (pg, f2, f2); - svfloat64_t p = svmla_x ( - pg, f, f2, - sv_estrin_10_f64_x (pg, f, f2, f4, svmul_x (pg, f4, f4), d->poly)); - - /* t = 2 ^ i. */ - svfloat64_t t = svscale_x (pg, sv_f64 (1), i); - /* expm1(x) = p * t + (t - 1). */ - return svmla_x (pg, svsub_x (pg, t, 1), p, t); -} - -static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) +e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d) { - return sv_call_f64 (tanh, x, y, special); + svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->two_over_ln2); + svuint64_t u = svreinterpret_u64 (z); + svfloat64_t n = svsub_x (pg, z, d->shift); + + /* r = x - n * ln2/2, r is in [-ln2/(2N), ln2/(2N)]. */ + svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); + svfloat64_t r = svadd_x (pg, x, x); + r = svmls_lane (r, n, ln2, 0); + r = svmls_lane (r, n, ln2, 1); + + /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); + + svfloat64_t p; + svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + p = svmad_x (pg, c34, r2, c12); + p = svmad_x (pg, p, r, sv_f64 (d->c0)); + p = svmad_x (pg, p, r2, r); + + svfloat64_t scale = svexpa (u); + + /* We want to construct e2xm1(x) = (scale - 1) + scale * poly. + However, for values of scale close to 1, scale-1 causes large ULP errors + due to cancellation. + + This can be circumvented by using a small lookup for scale-1 + when our input is below a certain bound, otherwise we can use FEXPA. */ + svbool_t is_small = svaclt (pg, x, d->fexpa_bound); + + /* Index via the input of FEXPA, but we only care about the lower 5 bits. */ + svuint64_t base_idx = svand_x (pg, u, 0x1f); + + /* Compute scale - 1 from FEXPA, and lookup values where this fails. */ + svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0)); + svuint64_t scalem1_lookup + = svld1_gather_index (is_small, d->e2xm1_data, base_idx); + + /* Select the appropriate scale - 1 value based on x. */ + svfloat64_t scalem1 + = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate); + return svmla_x (pg, scalem1, scale, p); } -/* SVE approximation for double-precision tanh(x), using a simplified - version of expm1. The greatest observed error is 2.77 ULP: - _ZGVsMxv_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3 - want -0x1.bd6a21a163624p-3. */ +/* SVE approximation for double-precision tanh(x), using a modified version of + FEXPA expm1 to calculate e^2x - 1. + The greatest observed error is 2.79 + 0.5 ULP: + _ZGVsMxv_tanh (0x1.fff868eb3c223p-9) got 0x1.fff7be486cae6p-9 + want 0x1.fff7be486cae9p-9. */ svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint64_t ia = svreinterpret_u64 (svabs_x (pg, x)); + svbool_t large = svacge (pg, x, d->large_bound); - /* Trigger special-cases for tiny, boring and infinity/NaN. */ - svbool_t special = svcmpgt (pg, svsub_x (pg, ia, d->tiny_bound), d->thresh); + /* We can use tanh(x) = (e^2x - 1) / (e^2x + 1) to approximate tanh. + As an additional optimisation, we can ensure more accurate values of e^x + by only using positive inputs. So we calculate tanh(|x|), and restore the + sign of the input before returning. */ + svfloat64_t ax = svabs_x (pg, x); + svuint64_t sign_bit + = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax)); - svfloat64_t u = svadd_x (pg, x, x); + svfloat64_t p = e2xm1_inline (pg, ax, d); + svfloat64_t q = svadd_x (pg, p, 2); - /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ - svfloat64_t q = expm1_inline (u, pg, d); - svfloat64_t qp2 = svadd_x (pg, q, 2); + /* For sufficiently high inputs, the result of tanh(|x|) is 1 when correctly + rounded, at this point we can return 1 directly, with sign correction. + This will also act as a guard against our approximation overflowing. */ + svfloat64_t y = svsel (large, sv_f64 (1.0), svdiv_x (pg, p, q)); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svdiv_x (pg, q, qp2), special); - return svdiv_x (pg, q, qp2); + return svreinterpret_f64 (svorr_x (pg, sign_bit, svreinterpret_u64 (y))); } diff --git a/sysdeps/aarch64/fpu/tanhf_advsimd.c b/sysdeps/aarch64/fpu/tanhf_advsimd.c index 50defd6ef0..3ced9b7a41 100644 --- a/sysdeps/aarch64/fpu/tanhf_advsimd.c +++ b/sysdeps/aarch64/fpu/tanhf_advsimd.c @@ -28,13 +28,16 @@ static const struct data /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */ .boring_bound = V4 (0x41102cb3), .large_bound = V4 (0x7f800000), - .onef = V4 (0x3f800000), }; static float32x4_t NOINLINE VPCS_ATTR -special_case (float32x4_t x, float32x4_t y, uint32x4_t special) +special_case (float32x4_t x, uint32x4_t is_boring, float32x4_t boring, + float32x4_t q, uint32x4_t special) { - return v_call_f32 (tanhf, x, y, special); + return v_call_f32 ( + tanhf, x, + vbslq_f32 (is_boring, boring, vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)))), + special); } /* Approximation for single-precision vector tanh(x), using a simplified @@ -50,7 +53,9 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x) uint32x4_t iax = vreinterpretq_u32_f32 (ax); uint32x4_t sign = veorq_u32 (ix, iax); uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound); - float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef)); + /* expm1 exponent bias is 1.0f reinterpreted to int. */ + float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 ( + sign, vreinterpretq_u32_s32 (d->expm1f_consts.exponent_bias))); #if WANT_SIMD_EXCEPT /* If fp exceptions are to be triggered properly, set all special and boring @@ -66,10 +71,12 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x) /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts); - float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0))); + if (__glibc_unlikely (v_any_u32 (special))) - return special_case (vreinterpretq_f32_u32 (ix), - vbslq_f32 (is_boring, boring, y), special); + return special_case (vreinterpretq_f32_u32 (ix), is_boring, boring, q, + special); + + float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0))); return vbslq_f32 (is_boring, boring, y); } libmvec_hidden_def (V_NAME_F1 (tanh)) diff --git a/sysdeps/aarch64/fpu/tanhf_sve.c b/sysdeps/aarch64/fpu/tanhf_sve.c index 0b94523cf5..80dd679346 100644 --- a/sysdeps/aarch64/fpu/tanhf_sve.c +++ b/sysdeps/aarch64/fpu/tanhf_sve.c @@ -19,20 +19,27 @@ #include "sv_expm1f_inline.h" +/* Largest value of x for which tanhf(x) rounds to 1 (or -1 for negative). */ +#define BoringBound 0x1.205966p+3f + static const struct data { struct sv_expm1f_data expm1f_consts; - uint32_t boring_bound, onef; + uint32_t onef, special_bound; + float boring_bound; } data = { .expm1f_consts = SV_EXPM1F_DATA, - /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */ - .boring_bound = 0x41102cb3, .onef = 0x3f800000, + .special_bound = 0x7f800000, + .boring_bound = BoringBound, }; static svfloat32_t NOINLINE -special_case (svfloat32_t x, svfloat32_t y, svbool_t special) +special_case (svfloat32_t x, svbool_t pg, svbool_t is_boring, + svfloat32_t boring, svfloat32_t q, svbool_t special) { + svfloat32_t y + = svsel_f32 (is_boring, boring, svdiv_x (pg, q, svadd_x (pg, q, 2.0))); return sv_call_f32 (tanhf, x, y, special); } @@ -47,15 +54,16 @@ svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg) svfloat32_t ax = svabs_x (pg, x); svuint32_t iax = svreinterpret_u32 (ax); svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax); - svbool_t is_boring = svcmpgt (pg, iax, d->boring_bound); svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef)); - - svbool_t special = svcmpgt (pg, iax, 0x7f800000); + svbool_t special = svcmpgt (pg, iax, d->special_bound); + svbool_t is_boring = svacgt (pg, x, d->boring_bound); /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ - svfloat32_t q = expm1f_inline (svmul_x (pg, x, 2.0), pg, &d->expm1f_consts); - svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0)); + svfloat32_t q = expm1f_inline (svmul_x (svptrue_b32 (), x, 2.0), pg, + &d->expm1f_consts); + if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svsel_f32 (is_boring, boring, y), special); + return special_case (x, pg, is_boring, boring, q, special); + svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0)); return svsel_f32 (is_boring, boring, y); } diff --git a/sysdeps/aarch64/fpu/v_expf_inline.h b/sysdeps/aarch64/fpu/v_expf_inline.h index 08b06e0a6b..eacd2af241 100644 --- a/sysdeps/aarch64/fpu/v_expf_inline.h +++ b/sysdeps/aarch64/fpu/v_expf_inline.h @@ -24,50 +24,45 @@ struct v_expf_data { - float32x4_t poly[5]; - float32x4_t shift; - float invln2_and_ln2[4]; + float ln2_hi, ln2_lo, c0, c2; + float32x4_t inv_ln2, c1, c3, c4; + /* asuint(1.0f). */ + uint32x4_t exponent_bias; }; /* maxerr: 1.45358 +0.5 ulp. */ #define V_EXPF_DATA \ { \ - .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), \ - V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, \ - .shift = V4 (0x1.8p23f), \ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \ + .c0 = 0x1.0e4020p-7f, .c1 = V4 (0x1.573e2ep-5f), .c2 = 0x1.555e66p-3f, \ + .c3 = V4 (0x1.fffdb6p-2f), .c4 = V4 (0x1.ffffecp-1f), \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ + .inv_ln2 = V4 (0x1.715476p+0f), .exponent_bias = V4 (0x3f800000), \ } -#define ExponentBias v_u32 (0x3f800000) /* asuint(1.0f). */ -#define C(i) d->poly[i] - static inline float32x4_t v_expf_inline (float32x4_t x, const struct v_expf_data *d) { - /* Helper routine for calculating exp(x). + /* Helper routine for calculating exp(ax). Copied from v_expf.c, with all special-case handling removed - the calling routine should handle special values if required. */ - /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] - x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ - float32x4_t n, r, z; - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - z = vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0); - n = vsubq_f32 (z, d->shift); - r = vfmsq_laneq_f32 (x, n, invln2_and_ln2, 1); - r = vfmsq_laneq_f32 (r, n, invln2_and_ln2, 2); - uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); - float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias)); + /* exp(ax) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] + ax = ln2*n + r, with r in [-ln2/2, ln2/2]. */ + float32x4_t ax = vabsq_f32 (x); + float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi); + float32x4_t n = vrndaq_f32 (vmulq_f32 (ax, d->inv_ln2)); + float32x4_t r = vfmsq_laneq_f32 (ax, n, ln2_c02, 0); + r = vfmsq_laneq_f32 (r, n, ln2_c02, 1); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23); + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias)); /* Custom order-4 Estrin avoids building high order monomial. */ float32x4_t r2 = vmulq_f32 (r, r); - float32x4_t p, q, poly; - p = vfmaq_f32 (C (1), C (0), r); - q = vfmaq_f32 (C (3), C (2), r); + float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2); + float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3); q = vfmaq_f32 (q, p, r2); - p = vmulq_f32 (C (4), r); - poly = vfmaq_f32 (p, q, r2); + p = vmulq_f32 (d->c4, r); + float32x4_t poly = vfmaq_f32 (p, q, r2); return vfmaq_f32 (scale, poly, scale); } - #endif diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h index 59b552da6b..1daedfdd51 100644 --- a/sysdeps/aarch64/fpu/v_expm1f_inline.h +++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h @@ -21,48 +21,47 @@ #define AARCH64_FPU_V_EXPM1F_INLINE_H #include "v_math.h" -#include "poly_advsimd_f32.h" +#include "math_config.h" struct v_expm1f_data { - float32x4_t poly[5]; - float invln2_and_ln2[4]; - float32x4_t shift; + float32x4_t c0, c2; int32x4_t exponent_bias; + float c1, c3, inv_ln2, c4; + float ln2_hi, ln2_lo; }; /* Coefficients generated using fpminimax with degree=5 in [-log(2)/2, - log(2)/2]. Exponent bias is asuint(1.0f). - invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0. */ + log(2)/2]. Exponent bias is asuint(1.0f). */ #define V_EXPM1F_DATA \ { \ - .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), \ - V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, \ - .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000), \ - .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \ + .c0 = V4 (0x1.fffffep-2), .c1 = 0x1.5554aep-3, .c2 = V4 (0x1.555736p-5), \ + .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \ + .exponent_bias = V4 (0x3f800000), .inv_ln2 = 0x1.715476p+0f, \ + .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \ } static inline float32x4_t expm1f_inline (float32x4_t x, const struct v_expm1f_data *d) { - /* Helper routine for calculating exp(x) - 1. - Copied from v_expm1f_1u6.c, with all special-case handling removed - the - calling routine should handle special values if required. */ + /* Helper routine for calculating exp(x) - 1. */ + + float32x2_t ln2 = vld1_f32 (&d->ln2_hi); + float32x4_t lane_consts = vld1q_f32 (&d->c1); /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ - float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2); - float32x4_t j - = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift); + float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2)); int32x4_t i = vcvtq_s32_f32 (j); - float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1); - f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2); + float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0); + f = vfmsq_lane_f32 (f, j, ln2, 1); - /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). - Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses - Horner. */ + /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). */ float32x4_t f2 = vmulq_f32 (f, f); float32x4_t f4 = vmulq_f32 (f2, f2); - float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1); + float32x4_t p = vfmaq_f32 (p01, f2, p23); + p = vfmaq_laneq_f32 (p, f4, lane_consts, 3); p = vfmaq_f32 (f, f2, p); /* t = 2^i. */ diff --git a/sysdeps/aarch64/fpu/v_log1p_inline.h b/sysdeps/aarch64/fpu/v_log1p_inline.h index 242e43b6ee..834ff65adf 100644 --- a/sysdeps/aarch64/fpu/v_log1p_inline.h +++ b/sysdeps/aarch64/fpu/v_log1p_inline.h @@ -21,29 +21,30 @@ #define AARCH64_FPU_V_LOG1P_INLINE_H #include "v_math.h" -#include "poly_advsimd_f64.h" struct v_log1p_data { - float64x2_t poly[19], ln2[2]; + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16; uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask; int64x2_t one_top; + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18; + double ln2[2]; }; /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */ #define V_LOG1P_CONSTANTS_TABLE \ { \ - .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2), \ - V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3), \ - V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3), \ - V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4), \ - V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4), \ - V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4), \ - V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4), \ - V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5), \ - V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4), \ - V2 (-0x1.cfa7385bdb37ep-6) }, \ - .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) }, \ + .c0 = V2 (-0x1.ffffffffffffbp-2), .c1 = 0x1.55555555551a9p-2, \ + .c2 = V2 (-0x1.00000000008e3p-2), .c3 = 0x1.9999999a32797p-3, \ + .c4 = V2 (-0x1.555555552fecfp-3), .c5 = 0x1.249248e071e5ap-3, \ + .c6 = V2 (-0x1.ffffff8bf8482p-4), .c7 = 0x1.c71c8f07da57ap-4, \ + .c8 = V2 (-0x1.9999ca4ccb617p-4), .c9 = 0x1.7459ad2e1dfa3p-4, \ + .c10 = V2 (-0x1.554d2680a3ff2p-4), .c11 = 0x1.3b4c54d487455p-4, \ + .c12 = V2 (-0x1.2548a9ffe80e6p-4), .c13 = 0x1.0f389a24b2e07p-4, \ + .c14 = V2 (-0x1.eee4db15db335p-5), .c15 = 0x1.e95b494d4a5ddp-5, \ + .c16 = V2 (-0x1.15fdf07cb7c73p-4), .c17 = 0x1.0310b70800fcfp-4, \ + .c18 = -0x1.cfa7385bdb37ep-6, \ + .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 }, \ .hf_rt2_top = V2 (0x3fe6a09e00000000), \ .one_m_hf_rt2_top = V2 (0x00095f6200000000), \ .umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff) \ @@ -51,19 +52,45 @@ struct v_log1p_data #define BottomMask v_u64 (0xffffffff) +static inline float64x2_t +eval_poly (float64x2_t m, float64x2_t m2, const struct v_log1p_data *d) +{ + /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + float64x2_t c1315 = vld1q_f64 (&d->c13); + float64x2_t c1718 = vld1q_f64 (&d->c17); + float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, m, c1718, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, m, c1315, 1); + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, m, c1315, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, m, c911, 1); + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, m, c911, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, m, c57, 1); + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, m, c57, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, m, c13, 1); + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, m, c13, 0); + float64x2_t p = vfmaq_laneq_f64 (p1617, m2, c1718, 1); + p = vfmaq_f64 (p1415, m2, p); + p = vfmaq_f64 (p1213, m2, p); + p = vfmaq_f64 (p1011, m2, p); + p = vfmaq_f64 (p89, m2, p); + p = vfmaq_f64 (p67, m2, p); + p = vfmaq_f64 (p45, m2, p); + p = vfmaq_f64 (p23, m2, p); + return vfmaq_f64 (p01, m2, p); +} + static inline float64x2_t log1p_inline (float64x2_t x, const struct v_log1p_data *d) { - /* Helper for calculating log(x + 1). Copied from v_log1p_2u5.c, with several - modifications: + /* Helper for calculating log(x + 1): - No special-case handling - this should be dealt with by the caller. - - Pairwise Horner polynomial evaluation for improved accuracy. - Optionally simulate the shortcut for k=0, used in the scalar routine, - using v_sel, for improved accuracy when the argument to log1p is close to - 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 in - the source of the caller before including this file. - See v_log1pf_2u1.c for details of the algorithm. */ - float64x2_t m = vaddq_f64 (x, v_f64 (1)); + using v_sel, for improved accuracy when the argument to log1p is close + to 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 + in the source of the caller before including this file. */ + float64x2_t m = vaddq_f64 (x, v_f64 (1.0)); uint64x2_t mi = vreinterpretq_u64_f64 (m); uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top); @@ -74,14 +101,14 @@ log1p_inline (float64x2_t x, const struct v_log1p_data *d) /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top); uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask)); - float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1)); + float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1.0)); /* Correction term c/m. */ - float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m); + float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1.0))), m); #ifndef WANT_V_LOG1P_K0_SHORTCUT -#error \ - "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0" +# error \ + "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0" #elif WANT_V_LOG1P_K0_SHORTCUT /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is that the approximation is solely the polynomial. */ @@ -92,11 +119,12 @@ log1p_inline (float64x2_t x, const struct v_log1p_data *d) /* Approximate log1p(f) on the reduced input using a polynomial. */ float64x2_t f2 = vmulq_f64 (f, f); - float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly); + float64x2_t p = eval_poly (f, f2, d); /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */ - float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]); - float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]); + float64x2_t ln2 = vld1q_f64 (&d->ln2[0]); + float64x2_t ylo = vfmaq_laneq_f64 (cm, k, ln2, 1); + float64x2_t yhi = vfmaq_laneq_f64 (f, k, ln2, 0); return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p); } diff --git a/sysdeps/aarch64/fpu/v_log1pf_inline.h b/sysdeps/aarch64/fpu/v_log1pf_inline.h index 643a6cdcfc..73e45a942e 100644 --- a/sysdeps/aarch64/fpu/v_log1pf_inline.h +++ b/sysdeps/aarch64/fpu/v_log1pf_inline.h @@ -25,54 +25,81 @@ struct v_log1pf_data { - float32x4_t poly[8], ln2; uint32x4_t four; int32x4_t three_quarters; + float c0, c3, c5, c7; + float32x4_t c4, c6, c1, c2, ln2; }; /* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients (1, -0.5) are not stored as they can be generated more efficiently. */ #define V_LOG1PF_CONSTANTS_TABLE \ { \ - .poly \ - = { V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), \ - V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), \ - V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, \ - .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \ - .three_quarters = V4 (0x3f400000) \ + .c0 = 0x1.5555aap-2f, .c1 = V4 (-0x1.000038p-2f), \ + .c2 = V4 (0x1.99675cp-3f), .c3 = -0x1.54ef78p-3f, \ + .c4 = V4 (0x1.28a1f4p-3f), .c5 = -0x1.0da91p-3f, \ + .c6 = V4 (0x1.abcb6p-4f), .c7 = -0x1.6f0d5ep-5f, \ + .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \ + .three_quarters = V4 (0x3f400000) \ } static inline float32x4_t -eval_poly (float32x4_t m, const float32x4_t *c) +eval_poly (float32x4_t m, const struct v_log1pf_data *d) { - /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine - uses split Estrin, but this way reduces register pressure in the calling - routine). */ - float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]); + /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner. */ + float32x4_t c0357 = vld1q_f32 (&d->c0); + float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0); float32x4_t m2 = vmulq_f32 (m, m); - q = vfmaq_f32 (m, m2, q); - float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1); + float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1); + float32x4_t p = vfmaq_f32 (p45, m2, p67); + p = vfmaq_f32 (p23, m2, p); + p = vfmaq_f32 (d->c1, m, p); p = vmulq_f32 (m2, p); - return vfmaq_f32 (q, m2, p); + p = vfmaq_f32 (m, m2, p); + return vfmaq_f32 (p, m2, q); } static inline float32x4_t -log1pf_inline (float32x4_t x, const struct v_log1pf_data d) +log1pf_inline (float32x4_t x, const struct v_log1pf_data *d) { - /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no - special-case handling. See that file for details of the algorithm. */ + /* Helper for calculating log(x + 1). */ + + /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m + is in [-0.25, 0.5]): + log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). + + We approximate log1p(m) with a polynomial, then scale by + k*log(2). Instead of doing this directly, we use an intermediate + scale factor s = 4*k*log(2) to ensure the scale is representable + as a normalised fp32 number. */ float32x4_t m = vaddq_f32 (x, v_f32 (1.0f)); + + /* Choose k to scale x to the range [-1/4, 1/2]. */ int32x4_t k - = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters), + = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters), v_s32 (0xff800000)); uint32x4_t ku = vreinterpretq_u32_s32 (k); - float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku)); + + /* Scale up to ensure that the scale factor is representable as normalised + fp32 number, and scale m down accordingly. */ + float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku)); + + /* Scale x by exponent manipulation. */ float32x4_t m_scale = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku)); m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s)); - float32x4_t p = eval_poly (m_scale, d.poly); + + /* Evaluate polynomial on the reduced interval. */ + float32x4_t p = eval_poly (m_scale, d); + + /* The scale factor to be applied back at the end - by multiplying float(k) + by 2^-23 we get the unbiased exponent of k. */ float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f)); - return vfmaq_f32 (p, scale_back, d.ln2); + + /* Apply the scaling back. */ + return vfmaq_f32 (p, scale_back, d->ln2); } #endif diff --git a/sysdeps/aarch64/fpu/vecmath_config.h b/sysdeps/aarch64/fpu/vecmath_config.h index 7f0a8aa5f2..862eefaf8f 100644 --- a/sysdeps/aarch64/fpu/vecmath_config.h +++ b/sysdeps/aarch64/fpu/vecmath_config.h @@ -75,49 +75,37 @@ extern const struct v_log10_data } table[1 << V_LOG10_TABLE_BITS]; } __v_log10_data attribute_hidden; -extern const struct erff_data +extern const struct v_erff_data { struct { float erf, scale; } tab[513]; -} __erff_data attribute_hidden; +} __v_erff_data attribute_hidden; -extern const struct sv_erff_data -{ - float erf[513]; - float scale[513]; -} __sv_erff_data attribute_hidden; - -extern const struct erf_data +extern const struct v_erf_data { struct { double erf, scale; } tab[769]; -} __erf_data attribute_hidden; - -extern const struct sv_erf_data -{ - double erf[769]; - double scale[769]; -} __sv_erf_data attribute_hidden; +} __v_erf_data attribute_hidden; -extern const struct erfc_data +extern const struct v_erfc_data { struct { double erfc, scale; } tab[3488]; -} __erfc_data attribute_hidden; +} __v_erfc_data attribute_hidden; -extern const struct erfcf_data +extern const struct v_erfcf_data { struct { float erfc, scale; } tab[645]; -} __erfcf_data attribute_hidden; +} __v_erfcf_data attribute_hidden; /* Some data for AdvSIMD and SVE pow's internal exp and log. */ #define V_POW_EXP_TABLE_BITS 8 diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index 6c96304611..b76c38dac2 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -1460,7 +1460,7 @@ float: 2 ldouble: 1 Function: "log_sve": -double: 1 +double: 2 float: 3 Function: "log_towardzero": diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index 7ef77ee8c9..71814d0b2f 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2024 Free Software Foundation, Inc. +/* Generic optimized memset using SIMD. + Copyright (C) 2012-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -17,7 +18,6 @@ . */ #include -#include "memset-reg.h" #ifndef MEMSET # define MEMSET memset @@ -25,167 +25,117 @@ /* Assumptions: * - * ARMv8-a, AArch64, unaligned accesses + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. * */ -ENTRY (MEMSET) +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define zva_val x5 +#define off x3 +#define dstend2 x5 +ENTRY (MEMSET) PTR_ARG (0) SIZE_ARG (2) dup v0.16B, valw + cmp count, 16 + b.lo L(set_small) + add dstend, dstin, count + cmp count, 64 + b.hs L(set_128) - cmp count, 96 - b.hi L(set_long) - cmp count, 16 - b.hs L(set_medium) - mov val, v0.D[0] + /* Set 16..63 bytes. */ + mov off, 16 + and off, off, count, lsr 1 + sub dstend2, dstend, off + str q0, [dstin] + str q0, [dstin, off] + str q0, [dstend2, -16] + str q0, [dstend, -16] + ret + .p2align 4 /* Set 0..15 bytes. */ - tbz count, 3, 1f - str val, [dstin] - str val, [dstend, -8] - ret - nop -1: tbz count, 2, 2f - str valw, [dstin] - str valw, [dstend, -4] +L(set_small): + add dstend, dstin, count + cmp count, 4 + b.lo 2f + lsr off, count, 3 + sub dstend2, dstend, off, lsl 2 + str s0, [dstin] + str s0, [dstin, off, lsl 2] + str s0, [dstend2, -4] + str s0, [dstend, -4] ret + + /* Set 0..3 bytes. */ 2: cbz count, 3f + lsr off, count, 1 strb valw, [dstin] - tbz count, 1, 3f - strh valw, [dstend, -2] + strb valw, [dstin, off] + strb valw, [dstend, -1] 3: ret - /* Set 17..96 bytes. */ -L(set_medium): - str q0, [dstin] - tbnz count, 6, L(set96) - str q0, [dstend, -16] - tbz count, 5, 1f - str q0, [dstin, 16] - str q0, [dstend, -32] -1: ret - .p2align 4 - /* Set 64..96 bytes. Write 64 bytes from the start and - 32 bytes from the end. */ -L(set96): - str q0, [dstin, 16] +L(set_128): + bic dst, dstin, 15 + cmp count, 128 + b.hi L(set_long) + stp q0, q0, [dstin] stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret - .p2align 3 - nop + .p2align 4 L(set_long): - and valw, valw, 255 - bic dst, dstin, 15 str q0, [dstin] - cmp count, 256 - ccmp valw, 0, 0, cs - b.eq L(try_zva) -L(no_zva): - sub count, dstend, dst /* Count is 16 too large. */ - sub dst, dst, 16 /* Dst is biased by -32. */ - sub count, count, 64 + 16 /* Adjust count and bias for loop. */ -1: stp q0, q0, [dst, 32] - stp q0, q0, [dst, 64]! -L(tail64): - subs count, count, 64 - b.hi 1b -2: stp q0, q0, [dstend, -64] - stp q0, q0, [dstend, -32] - ret - -L(try_zva): + str q0, [dst, 16] + tst valw, 255 + b.ne L(no_zva) #ifndef ZVA64_ONLY - .p2align 3 - mrs tmp1, dczid_el0 - tbnz tmp1w, 4, L(no_zva) - and tmp1w, tmp1w, 15 - cmp tmp1w, 4 /* ZVA size is 64 bytes. */ - b.ne L(zva_128) - nop + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne L(no_zva) #endif - /* Write the first and last 64 byte aligned block using stp rather - than using DC ZVA. This is faster on some cores. - */ - .p2align 4 -L(zva_64): - str q0, [dst, 16] - stp q0, q0, [dst, 32] - bic dst, dst, 63 - stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+64+64 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst - add dst, dst, 64 - subs count, count, 64 - b.hi 1b - stp q0, q0, [dst, 0] stp q0, q0, [dst, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 64 + 64 /* Adjust count and bias for loop. */ + + /* Write last bytes before ZVA loop. */ stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] + + .p2align 4 +L(zva64_loop): + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi L(zva64_loop) ret -#ifndef ZVA64_ONLY .p2align 3 -L(zva_128): - cmp tmp1w, 5 /* ZVA size is 128 bytes. */ - b.ne L(zva_other) - - str q0, [dst, 16] +L(no_zva): + sub count, dstend, dst /* Count is 32 too large. */ + sub count, count, 64 + 32 /* Adjust count and bias for loop. */ +L(no_zva_loop): stp q0, q0, [dst, 32] stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - bic dst, dst, 127 - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+128 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst - add dst, dst, 128 - subs count, count, 128 - b.hi 1b - stp q0, q0, [dstend, -128] - stp q0, q0, [dstend, -96] + add dst, dst, 64 + subs count, count, 64 + b.hi L(no_zva_loop) stp q0, q0, [dstend, -64] stp q0, q0, [dstend, -32] ret -L(zva_other): - mov tmp2w, 4 - lsl zva_lenw, tmp2w, tmp1w - add tmp1, zva_len, 64 /* Max alignment bytes written. */ - cmp count, tmp1 - blo L(no_zva) - - sub tmp2, zva_len, 1 - add tmp1, dst, zva_len - add dst, dst, 16 - subs count, tmp1, dst /* Actual alignment bytes to write. */ - bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ - beq 2f -1: stp q0, q0, [dst], 64 - stp q0, q0, [dst, -32] - subs count, count, 64 - b.hi 1b -2: mov dst, tmp1 - sub count, dstend, tmp1 /* Remaining bytes to write. */ - subs count, count, zva_len - b.lo 4f -3: dc zva, dst - add dst, dst, zva_len - subs count, count, zva_len - b.hs 3b -4: add count, count, zva_len - sub dst, dst, 32 /* Bias dst for tail loop. */ - b L(tail64) -#endif - END (MEMSET) libc_hidden_builtin_def (MEMSET) diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index 3e251cc234..6880ebc035 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -16,6 +16,7 @@ sysdep_routines += \ memset_kunpeng \ memset_mops \ memset_oryon1 \ + memset_sve_zva64 \ memset_zva64 \ strlen_asimd \ strlen_generic \ diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index b2fda541f9..1f101a719b 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -61,6 +61,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) #if HAVE_AARCH64_SVE_ASM IFUNC_IMPL_ADD (array, i, memset, sve && !bti && zva_size == 256, __memset_a64fx) + IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 64, __memset_sve_zva64) #endif IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic)) diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c index bd063c16c9..bb1e865c97 100644 --- a/sysdeps/aarch64/multiarch/memset.c +++ b/sysdeps/aarch64/multiarch/memset.c @@ -36,6 +36,7 @@ extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden; extern __typeof (__redirect_memset) __memset_generic attribute_hidden; extern __typeof (__redirect_memset) __memset_mops attribute_hidden; extern __typeof (__redirect_memset) __memset_oryon1 attribute_hidden; +extern __typeof (__redirect_memset) __memset_sve_zva64 attribute_hidden; static inline __typeof (__redirect_memset) * select_memset_ifunc (void) @@ -49,6 +50,9 @@ select_memset_ifunc (void) { if (IS_A64FX (midr) && zva_size == 256) return __memset_a64fx; + + if (prefer_sve_ifuncs && zva_size == 64) + return __memset_sve_zva64; } if (IS_ORYON1 (midr) && zva_size == 64) diff --git a/sysdeps/aarch64/multiarch/memset_sve_zva64.S b/sysdeps/aarch64/multiarch/memset_sve_zva64.S new file mode 100644 index 0000000000..7fb40fdd9e --- /dev/null +++ b/sysdeps/aarch64/multiarch/memset_sve_zva64.S @@ -0,0 +1,123 @@ +/* Optimized memset for SVE. + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +/* Assumptions: + * + * ARMv8-a, AArch64, Advanced SIMD, SVE, unaligned accesses. + * ZVA size is 64. + */ + +#if HAVE_AARCH64_SVE_ASM + +.arch armv8.2-a+sve + +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define zva_val x5 +#define vlen x5 +#define off x3 +#define dstend2 x5 + +ENTRY (__memset_sve_zva64) + dup v0.16B, valw + cmp count, 16 + b.lo L(set_16) + + add dstend, dstin, count + cmp count, 64 + b.hs L(set_128) + + /* Set 16..63 bytes. */ + mov off, 16 + and off, off, count, lsr 1 + sub dstend2, dstend, off + str q0, [dstin] + str q0, [dstin, off] + str q0, [dstend2, -16] + str q0, [dstend, -16] + ret + + .p2align 4 +L(set_16): + whilelo p0.b, xzr, count + st1b z0.b, p0, [dstin] + ret + + .p2align 4 +L(set_128): + bic dst, dstin, 15 + cmp count, 128 + b.hi L(set_long) + stp q0, q0, [dstin] + stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + + .p2align 4 +L(set_long): + cmp count, 256 + b.lo L(no_zva) + tst valw, 255 + b.ne L(no_zva) + + str q0, [dstin] + str q0, [dst, 16] + bic dst, dstin, 31 + stp q0, q0, [dst, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + sub x8, dstend, 1 /* Write last bytes before ZVA loop. */ + bic x8, x8, 15 + stp q0, q0, [x8, -48] + str q0, [x8, -16] + str q0, [dstend, -16] + + .p2align 4 +L(zva64_loop): + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi L(zva64_loop) + ret + +L(no_zva): + str q0, [dstin] + sub count, dstend, dst /* Count is 16 too large. */ + sub count, count, 64 + 16 /* Adjust count and bias for loop. */ +L(no_zva_loop): + stp q0, q0, [dst, 16] + stp q0, q0, [dst, 48] + add dst, dst, 64 + subs count, count, 64 + b.hi L(no_zva_loop) + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + +END (__memset_sve_zva64) +#endif diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure index 19657b627b..e1b772c586 100644 --- a/sysdeps/aarch64/preconfigure +++ b/sysdeps/aarch64/preconfigure @@ -3,5 +3,6 @@ aarch64*) base_machine=aarch64 machine=aarch64 mtls_descriptor=desc + mtls_traditional=trad ;; esac diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S index 43fdb1b2fb..2fa0dc768d 100644 --- a/sysdeps/aarch64/setjmp.S +++ b/sysdeps/aarch64/setjmp.S @@ -37,6 +37,12 @@ ENTRY (__sigsetjmp) PTR_ARG (0) 1: + +#if IS_IN(libc) + /* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */ + CALL_LIBC_ARM_ZA_DISABLE +#endif + stp x19, x20, [x0, #JB_X19<<3] stp x21, x22, [x0, #JB_X21<<3] stp x23, x24, [x0, #JB_X23<<3] diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S index ab2a576cdb..352fb40d3a 100644 --- a/sysdeps/aarch64/strlen.S +++ b/sysdeps/aarch64/strlen.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2024 Free Software Foundation, Inc. +/* Generic optimized strlen using SIMD. + Copyright (C) 2012-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -56,36 +57,50 @@ ENTRY (STRLEN) shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov synd, dend lsr synd, synd, shift - cbz synd, L(loop) + cbz synd, L(next16) rbit synd, synd clz result, synd lsr result, result, 2 ret +L(next16): + ldr data, [src, 16] + cmeq vhas_nul.16b, vdata.16b, 0 + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ + fmov synd, dend + cbz synd, L(loop) + add src, src, 16 +#ifndef __AARCH64EB__ + rbit synd, synd +#endif + sub result, src, srcin + clz tmp, synd + add result, result, tmp, lsr 2 + ret + .p2align 5 L(loop): - ldr data, [src, 16] + ldr data, [src, 32]! cmeq vhas_nul.16b, vdata.16b, 0 - umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + addhn vend.8b, vhas_nul.8h, vhas_nul.8h fmov synd, dend cbnz synd, L(loop_end) - ldr data, [src, 32]! + ldr data, [src, 16] cmeq vhas_nul.16b, vdata.16b, 0 - umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + addhn vend.8b, vhas_nul.8h, vhas_nul.8h fmov synd, dend cbz synd, L(loop) - sub src, src, 16 + add src, src, 16 L(loop_end): - shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ - sub result, src, srcin - fmov synd, dend + sub result, shift, src, lsl 2 /* (srcin - src) << 2. */ #ifndef __AARCH64EB__ rbit synd, synd + sub result, result, 3 #endif - add result, result, 16 clz tmp, synd - add result, result, tmp, lsr 2 + sub result, tmp, result + lsr result, result, 2 ret END (STRLEN) diff --git a/sysdeps/aarch64/tst-sme-clone.c b/sysdeps/aarch64/tst-sme-clone.c new file mode 100644 index 0000000000..b6ad54fa37 --- /dev/null +++ b/sysdeps/aarch64/tst-sme-clone.c @@ -0,0 +1,54 @@ +/* Test that ZA state of SME is cleared in both parent and child + when clone() syscall is used. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-sme-skeleton.c" + +#include +#include + +static int +fun (void * const arg) +{ + printf ("in child: %s\n", (const char *)arg); + /* Check that ZA state of SME was disabled in child. */ + check_sme_za_state ("after clone in child", /* Clear. */ true); + return 0; +} + +static char __attribute__((aligned(16))) +stack[1024 * 1024]; + +static void +run (struct blk *ptr) +{ + char *syscall_name = (char *)"clone"; + printf ("in parent: before %s\n", syscall_name); + + /* Enabled ZA state so that effect of disabling be observable. */ + enable_sme_za_state (ptr); + check_sme_za_state ("before clone", /* Clear. */ false); + + pid_t pid = xclone (fun, syscall_name, stack, sizeof (stack), + CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD); + + /* Check that ZA state of SME was disabled in parent. */ + check_sme_za_state ("after clone in parent", /* Clear. */ true); + + TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid); +} diff --git a/sysdeps/aarch64/tst-sme-clone3.c b/sysdeps/aarch64/tst-sme-clone3.c new file mode 100644 index 0000000000..f420d5984d --- /dev/null +++ b/sysdeps/aarch64/tst-sme-clone3.c @@ -0,0 +1,84 @@ +/* Test that ZA state of SME is cleared in both parent and child + when clone3() syscall is used. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-sme-skeleton.c" + +#include + +#include +#include +#include + +/* Since clone3 is not a public symbol, we link this test explicitly + with clone3.o and have to provide this declaration. */ +int __clone3 (struct clone_args *cl_args, size_t size, + int (*func)(void *arg), void *arg); + +static int +fun (void * const arg) +{ + printf ("in child: %s\n", (const char *)arg); + /* Check that ZA state of SME was disabled in child. */ + check_sme_za_state ("after clone3 in child", /* Clear. */ true); + return 0; +} + +static char __attribute__((aligned(16))) +stack[1024 * 1024]; + +/* Required by __arm_za_disable.o and provided by the startup code + as a hidden symbol. */ +uint64_t _dl_hwcap2; + +static void +run (struct blk *ptr) +{ + _dl_hwcap2 = getauxval (AT_HWCAP2); + + char *syscall_name = (char *)"clone3"; + struct clone_args args = { + .flags = CLONE_VM | CLONE_VFORK, + .exit_signal = SIGCHLD, + .stack = (uintptr_t) stack, + .stack_size = sizeof (stack), + }; + printf ("in parent: before %s\n", syscall_name); + + /* Enabled ZA state so that effect of disabling be observable. */ + enable_sme_za_state (ptr); + check_sme_za_state ("before clone3", /* Clear. */ false); + + pid_t pid = __clone3 (&args, sizeof (args), fun, syscall_name); + + /* Check that ZA state of SME was disabled in parent. */ + check_sme_za_state ("after clone3 in parent", /* Clear. */ true); + + printf ("%s child pid: %d\n", syscall_name, pid); + + xwaitpid (pid, NULL, 0); + printf ("in parent: after %s\n", syscall_name); +} + +/* Workaround to simplify linking with clone3.o. */ +void __syscall_error(int code) +{ + int err = -code; + fprintf (stderr, "syscall error %d (%s)\n", err, strerror (err)); + exit (err); +} diff --git a/sysdeps/aarch64/tst-sme-fork.c b/sysdeps/aarch64/tst-sme-fork.c new file mode 100644 index 0000000000..b003b08884 --- /dev/null +++ b/sysdeps/aarch64/tst-sme-fork.c @@ -0,0 +1,43 @@ +/* Test that ZA state of SME is cleared in both parent and child + when fork() function is used. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-sme-skeleton.c" + +static void +run (struct blk *blk) +{ + /* Enabled ZA state so that effect of disabling be observable. */ + enable_sme_za_state (blk); + check_sme_za_state ("before fork", /* Clear. */ false); + fflush (stdout); + + pid_t pid = xfork (); + + if (pid == 0) + { + /* Check that ZA state of SME was disabled in child. */ + check_sme_za_state ("after fork in child", /* Clear. */ true); + exit (0); + } + + /* Check that ZA state of SME was disabled in parent. */ + check_sme_za_state ("after fork in parent", /* Clear. */ true); + + TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid); +} diff --git a/sysdeps/aarch64/tst-sme-helper.h b/sysdeps/aarch64/tst-sme-helper.h new file mode 100644 index 0000000000..ab9c503e45 --- /dev/null +++ b/sysdeps/aarch64/tst-sme-helper.h @@ -0,0 +1,94 @@ +/* Utility functions for SME tests. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +struct blk { + void *za_save_buffer; + uint16_t num_za_save_slices; + char __reserved[6]; +}; + +/* Read SVCR to get SM (bit0) and ZA (bit1) state. */ +static unsigned long +get_svcr (void) +{ + register unsigned long x0 asm ("x0"); + asm volatile ( + ".inst 0xd53b4240 /* mrs x0, svcr */\n" + : "=r" (x0)); + return x0; +} + +/* Returns tpidr2. */ +static void * +get_tpidr2 (void) +{ + register unsigned long x0 asm ("x0"); + asm volatile ( + ".inst 0xd53bd0a0 /* mrs x0, tpidr2_el0 */\n" + : "=r"(x0) :: "memory"); + return (void *) x0; +} + +/* Obtains current streaming SVE vector register size. */ +static unsigned long +get_svl (void) +{ + register unsigned long x0 asm ("x0"); + asm volatile ( + ".inst 0x04bf5820 /* rdsvl x0, 1 */\n" + : "=r" (x0)); + return x0; +} + +/* PSTATE.ZA = 1, set ZA state to active. */ +static void +start_za (void) +{ + asm volatile ( + ".inst 0xd503457f /* smstart za */"); +} + +/* Load data into ZA byte by byte from p. */ +static void __attribute__ ((noinline)) +load_za (const void *buf, unsigned long svl) +{ + register unsigned long x15 asm ("x15") = 0; + register unsigned long x16 asm ("x16") = (unsigned long)buf; + register unsigned long x17 asm ("x17") = svl; + + asm volatile ( + ".inst 0xd503437f /* smstart sm */\n" + ".L_ldr_loop:\n" + ".inst 0xe1006200 /* ldr za[w15, 0], [x16] */\n" + "add w15, w15, 1\n" + ".inst 0x04305030 /* addvl x16, x16, 1 */\n" + "cmp w15, w17\n" + "bne .L_ldr_loop\n" + ".inst 0xd503427f /* smstop sm */\n" + : "+r"(x15), "+r"(x16), "+r"(x17)); +} + +/* Set tpidr2 to BLK. */ +static void +set_tpidr2 (struct blk *blk) +{ + register unsigned long x0 asm ("x0") = (unsigned long)blk; + asm volatile ( + ".inst 0xd51bd0a0 /* msr tpidr2_el0, x0 */\n" + :: "r"(x0) : "memory"); +} diff --git a/sysdeps/aarch64/tst-sme-jmp.c b/sysdeps/aarch64/tst-sme-jmp.c index 35769340e7..0c1d1a4ff8 100644 --- a/sysdeps/aarch64/tst-sme-jmp.c +++ b/sysdeps/aarch64/tst-sme-jmp.c @@ -27,87 +27,15 @@ #include #include -struct blk { - void *za_save_buffer; - uint16_t num_za_save_slices; - char __reserved[6]; -}; +#include "tst-sme-helper.h" +/* Streaming SVE vector register size. */ static unsigned long svl; + static uint8_t *za_orig; static uint8_t *za_dump; static uint8_t *za_save; -static unsigned long -get_svl (void) -{ - register unsigned long x0 asm ("x0"); - asm volatile ( - ".inst 0x04bf5820 /* rdsvl x0, 1 */\n" - : "=r" (x0)); - return x0; -} - -/* PSTATE.ZA = 1, set ZA state to active. */ -static void -start_za (void) -{ - asm volatile ( - ".inst 0xd503457f /* smstart za */"); -} - -/* Read SVCR to get SM (bit0) and ZA (bit1) state. */ -static unsigned long -get_svcr (void) -{ - register unsigned long x0 asm ("x0"); - asm volatile ( - ".inst 0xd53b4240 /* mrs x0, svcr */\n" - : "=r" (x0)); - return x0; -} - -/* Load data into ZA byte by byte from p. */ -static void __attribute__ ((noinline)) -load_za (const void *p) -{ - register unsigned long x15 asm ("x15") = 0; - register unsigned long x16 asm ("x16") = (unsigned long)p; - register unsigned long x17 asm ("x17") = svl; - - asm volatile ( - ".inst 0xd503437f /* smstart sm */\n" - ".L_ldr_loop:\n" - ".inst 0xe1006200 /* ldr za[w15, 0], [x16] */\n" - "add w15, w15, 1\n" - ".inst 0x04305030 /* addvl x16, x16, 1 */\n" - "cmp w15, w17\n" - "bne .L_ldr_loop\n" - ".inst 0xd503427f /* smstop sm */\n" - : "+r"(x15), "+r"(x16), "+r"(x17)); -} - -/* Set tpidr2 to BLK. */ -static void -set_tpidr2 (struct blk *blk) -{ - register unsigned long x0 asm ("x0") = (unsigned long)blk; - asm volatile ( - ".inst 0xd51bd0a0 /* msr tpidr2_el0, x0 */\n" - :: "r"(x0) : "memory"); -} - -/* Returns tpidr2. */ -static void * -get_tpidr2 (void) -{ - register unsigned long x0 asm ("x0"); - asm volatile ( - ".inst 0xd53bd0a0 /* mrs x0, tpidr2_el0 */\n" - : "=r"(x0) :: "memory"); - return (void *) x0; -} - static void print_data(const char *msg, void *p) { @@ -157,7 +85,7 @@ longjmp_test (void) FAIL_EXIT1 ("svcr != 0: %lu", svcr); set_tpidr2 (&blk); start_za (); - load_za (za_orig); + load_za (za_orig, svl); print_data ("za save space", za_save); p = get_tpidr2 (); @@ -168,8 +96,8 @@ longjmp_test (void) { p = get_tpidr2 (); printf ("before longjmp: tp2 = %p\n", p); - if (p != &blk) - FAIL_EXIT1 ("tpidr2 is clobbered"); + if (p != NULL) + FAIL_EXIT1 ("tpidr2 has not been reset to null"); do_longjmp (env); FAIL_EXIT1 ("longjmp returned"); } @@ -206,7 +134,7 @@ setcontext_test (void) FAIL_EXIT1 ("svcr != 0: %lu", svcr); set_tpidr2 (&blk); start_za (); - load_za (za_orig); + load_za (za_orig, svl); print_data ("za save space", za_save); p = get_tpidr2 (); diff --git a/sysdeps/aarch64/tst-sme-signal.c b/sysdeps/aarch64/tst-sme-signal.c new file mode 100644 index 0000000000..b4b07bcc44 --- /dev/null +++ b/sysdeps/aarch64/tst-sme-signal.c @@ -0,0 +1,115 @@ +/* Test handling of SME state in a signal handler. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-sme-skeleton.c" + +#include + +static struct _aarch64_ctx * +extension (void *p) +{ + return p; +} + +#ifndef TPIDR2_MAGIC +#define TPIDR2_MAGIC 0x54504902 +#endif + +#ifndef ZA_MAGIC +#define ZA_MAGIC 0x54366345 +#endif + +#ifndef ZT_MAGIC +#define ZT_MAGIC 0x5a544e01 +#endif + +#ifndef EXTRA_MAGIC +#define EXTRA_MAGIC 0x45585401 +#endif + +/* We use a pipe to make sure that the final check of the SME state + happens after signal handler finished. */ +static int pipefd[2]; + +#define WRITE(msg) xwrite (1, msg, sizeof (msg)); + +static void +handler (int signo, siginfo_t *si, void *ctx) +{ + TEST_VERIFY (signo == SIGUSR1); + WRITE ("in the handler\n"); + check_sme_za_state ("during signal", true /* State is clear. */); + ucontext_t *uc = ctx; + void *p = uc->uc_mcontext.__reserved; + unsigned int found = 0; + uint32_t m; + while ((m = extension (p)->magic)) + { + if (m == TPIDR2_MAGIC) + { + WRITE ("found TPIDR2_MAGIC\n"); + found += 1; + } + if (m == ZA_MAGIC) + { + WRITE ("found ZA_MAGIC\n"); + found += 1; + } + if (m == ZT_MAGIC) + { + WRITE ("found ZT_MAGIC\n"); + found += 1; + } + if (m == EXTRA_MAGIC) + { + WRITE ("found EXTRA_MAGIC\n"); + struct { struct _aarch64_ctx h; uint64_t data; } *e = p; + p = (char *)e->data; + continue; + } + p = (char *)p + extension (p)->size; + } + TEST_COMPARE (found, 3); + + /* Signal that the wait is over (see below). */ + char message = '\0'; + xwrite (pipefd[1], &message, 1); +} + +static void +run (struct blk *blk) +{ + xpipe (pipefd); + + struct sigaction sigact; + sigemptyset (&sigact.sa_mask); + sigact.sa_flags = 0; + sigact.sa_flags |= SA_SIGINFO; + sigact.sa_sigaction = handler; + xsigaction (SIGUSR1, &sigact, NULL); + + enable_sme_za_state (blk); + check_sme_za_state ("before signal", false /* State is not clear. */); + xraise (SIGUSR1); + + /* Wait for signal handler to complete. */ + char response; + xread (pipefd[0], &response, 1); + + check_sme_za_state ("after signal", false /* State is not clear. */); +} diff --git a/sysdeps/aarch64/tst-sme-skeleton.c b/sysdeps/aarch64/tst-sme-skeleton.c new file mode 100644 index 0000000000..ba84dda1cb --- /dev/null +++ b/sysdeps/aarch64/tst-sme-skeleton.c @@ -0,0 +1,101 @@ +/* Template for SME tests. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "tst-sme-helper.h" + +/* Streaming SVE vector register size. */ +static unsigned long svl; + +static uint8_t *state; + +static void +enable_sme_za_state (struct blk *blk) +{ + start_za (); + set_tpidr2 (blk); + load_za (blk, svl); +} + +/* Check if SME state is disabled (when CLEAR is true) or + enabled (when CLEAR is false). */ +static void +check_sme_za_state (const char msg[], bool clear) +{ + unsigned long svcr = get_svcr (); + void *tpidr2 = get_tpidr2 (); + printf ("[%s]\n", msg); + printf ("svcr = %016lx\n", svcr); + printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2); + if (clear) + { + TEST_VERIFY (svcr == 0); + TEST_VERIFY (tpidr2 == NULL); + } + else + { + TEST_VERIFY (svcr != 0); + TEST_VERIFY (tpidr2 != NULL); + } +} + +/* Should be defined in actual test that includes this + skeleton file. */ +static void +run (struct blk *ptr); + +static int +do_test (void) +{ + unsigned long hwcap2 = getauxval (AT_HWCAP2); + if ((hwcap2 & HWCAP2_SME) == 0) + return EXIT_UNSUPPORTED; + + /* Get current streaming SVE vector length in bytes. */ + svl = get_svl (); + printf ("svl: %lu\n", svl); + + TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16))); + + /* Initialise buffer for ZA state of SME. */ + state = xmalloc (svl * svl); + memset (state, 1, svl * svl); + struct blk blk = { + .za_save_buffer = state, + .num_za_save_slices = svl, + .__reserved = {0}, + }; + + run (&blk); + + free (state); + return 0; +} + +#include diff --git a/sysdeps/aarch64/tst-sme-vfork.c b/sysdeps/aarch64/tst-sme-vfork.c new file mode 100644 index 0000000000..3feea065e5 --- /dev/null +++ b/sysdeps/aarch64/tst-sme-vfork.c @@ -0,0 +1,43 @@ +/* Test that ZA state of SME is cleared in both parent and child + when vfork() function is used. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-sme-skeleton.c" + +static void +run (struct blk *blk) +{ + /* Enabled ZA state so that effect of disabling be observable. */ + enable_sme_za_state (blk); + check_sme_za_state ("before vfork", /* Clear. */ false); + fflush (stdout); + + pid_t pid = vfork (); + + if (pid == 0) + { + /* Check that ZA state of SME was disabled in child. */ + check_sme_za_state ("after vfork in child", /* Clear. */ true); + _exit (0); + } + + /* Check that ZA state of SME was disabled in parent. */ + check_sme_za_state ("after vfork in parent", /* Clear. */ true); + + TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid); +} diff --git a/sysdeps/aarch64/tst-sme-za-state.c b/sysdeps/aarch64/tst-sme-za-state.c new file mode 100644 index 0000000000..00118ef506 --- /dev/null +++ b/sysdeps/aarch64/tst-sme-za-state.c @@ -0,0 +1,52 @@ +/* Test for SME ZA state being cleared on setjmp and longjmp. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "tst-sme-skeleton.c" + +#include + +static void +run (struct blk *ptr) +{ + jmp_buf buf; + int ret; + + check_sme_za_state ("initial state", /* Clear. */ true); + + /* Enabled ZA state so that effect of disabling be observable. */ + enable_sme_za_state (ptr); + check_sme_za_state ("before setjmp", /* Clear. */ false); + + if ((ret = setjmp (buf)) == 0) + { + check_sme_za_state ("after setjmp", /* Clear. */ true); + + /* Enabled ZA state so that effect of disabling be observable. */ + enable_sme_za_state (ptr); + check_sme_za_state ("before longjmp", /* Clear. */ false); + + longjmp (buf, 42); + + /* Unreachable. */ + TEST_VERIFY (false); + __builtin_unreachable (); + } + + TEST_COMPARE (ret, 42); + check_sme_za_state ("after longjmp", /* Clear. */ true); +} diff --git a/sysdeps/arm/find_exidx.c b/sysdeps/arm/find_exidx.c index a924d59b9f..4257c26838 100644 --- a/sysdeps/arm/find_exidx.c +++ b/sysdeps/arm/find_exidx.c @@ -15,6 +15,7 @@ License along with the GNU C Library. If not, see . */ +#include #include /* Find the exception index table containing PC. */ @@ -23,7 +24,7 @@ _Unwind_Ptr __gnu_Unwind_Find_exidx (_Unwind_Ptr pc, int * pcount) { struct dl_find_object data; - if (__dl_find_object ((void *) pc, &data) < 0) + if (GLRO(dl_find_object) ((void *) pc, &data) < 0) return 0; *pcount = data.dlfo_eh_count; return (_Unwind_Ptr) data.dlfo_eh_frame; diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 656e8a3fa0..b4c6e6d2ca 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -351,7 +351,7 @@ struct rtld_global void (*free) (void *); } _ns_unique_sym_table; /* Keep track of changes to each namespace' list. */ - struct r_debug_extended _ns_debug; + struct r_debug_extended _ns_debug_unused; } _dl_ns[DL_NNS]; /* One higher than index of last used namespace. */ EXTERN size_t _dl_nns; @@ -1014,6 +1014,13 @@ extern void _dl_relocate_object (struct link_map *map, int reloc_mode, int consider_profiling) attribute_hidden; +/* Perform relocation, but do not apply RELRO. Does not check + L->relocated. Otherwise the same as _dl_relocate_object. */ +void _dl_relocate_object_no_relro (struct link_map *map, + struct r_scope_elem *scope[], + int reloc_mode, int consider_profiling) + attribute_hidden; + /* Protect PT_GNU_RELRO area. */ extern void _dl_protect_relro (struct link_map *map) attribute_hidden; @@ -1062,15 +1069,29 @@ extern void _dl_debug_state (void); rtld_hidden_proto (_dl_debug_state) /* Initialize `struct r_debug_extended' for the namespace NS. LDBASE - is the run-time load address of the dynamic linker, to be put in the - `r_ldbase' member. Return the address of the structure. */ + is the run-time load address of the dynamic linker, to be put in + the `r_ldbase' member. + + Return the address of the r_debug structure for the namespace. + This is not merely a convenience or optimization, but it is + necessary for the LIBC_PROBE Systemtap/debugger probes to work + reliably: direct variable access can create probes that tools + cannot consume. */ extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) attribute_hidden; +/* This is called after relocation processing to handle a potential + copy relocation for _r_debug. */ +void _dl_debug_post_relocate (struct link_map *main_map) attribute_hidden; + /* Update the `r_map' member and return the address of `struct r_debug' of the namespace NS. */ extern struct r_debug *_dl_debug_update (Lmid_t ns) attribute_hidden; +/* Update R->r_state to STATE and notify the debugger by calling + _dl_debug_state. */ +void _dl_debug_change_state (struct r_debug *r, int state) attribute_hidden; + /* Initialize the basic data structure for the search paths. SOURCE is either "LD_LIBRARY_PATH" or "--library-path". GLIBC_HWCAPS_PREPEND adds additional glibc-hwcaps subdirectories to @@ -1200,10 +1221,8 @@ extern void _dl_get_tls_static_info (size_t *sizep, size_t *alignp); extern void _dl_allocate_static_tls (struct link_map *map) attribute_hidden; -/* These are internal entry points to the two halves of _dl_allocate_tls, - only used within rtld.c itself at startup time. */ extern void *_dl_allocate_tls_storage (void) attribute_hidden; -extern void *_dl_allocate_tls_init (void *, bool); +extern void *_dl_allocate_tls_init (void *result, bool main_thread); rtld_hidden_proto (_dl_allocate_tls_init) /* True if the TCB has been set up. */ @@ -1241,13 +1260,7 @@ extern void *_dl_open (const char *name, int mode, const void *caller, extern int _dl_scope_free (void *) attribute_hidden; -/* Add module to slot information data. If DO_ADD is false, only the - required memory is allocated. Must be called with GL - (dl_load_tls_lock) acquired. If the function has already been called - for the link map L with !do_add, then this function will not raise - an exception, otherwise it is possible that it encounters a memory - allocation failure. */ -extern void _dl_add_to_slotinfo (struct link_map *l, bool do_add) +extern bool _dl_add_to_slotinfo (struct link_map *l, bool do_add) attribute_hidden; /* Update slot information data for at least the generation of the diff --git a/sysdeps/generic/libc-tsd.h b/sysdeps/generic/libc-tsd.h deleted file mode 100644 index ac0e99e14b..0000000000 --- a/sysdeps/generic/libc-tsd.h +++ /dev/null @@ -1,60 +0,0 @@ -/* libc-internal interface for thread-specific data. Stub or TLS version. - Copyright (C) 1998-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#ifndef _GENERIC_LIBC_TSD_H -#define _GENERIC_LIBC_TSD_H 1 - -/* This file defines the following macros for accessing a small fixed - set of thread-specific `void *' data used only internally by libc. - - __libc_tsd_define(CLASS, TYPE, KEY) -- Define or declare a datum with TYPE - for KEY. CLASS can be `static' for - keys used in only one source file, - empty for global definitions, or - `extern' for global declarations. - __libc_tsd_address(TYPE, KEY) -- Return the `TYPE *' pointing to - the current thread's datum for KEY. - __libc_tsd_get(TYPE, KEY) -- Return the `TYPE' datum for KEY. - __libc_tsd_set(TYPE, KEY, VALUE) -- Set the datum for KEY to VALUE. - - The set of available KEY's will usually be provided as an enum, - and contains (at least): - _LIBC_TSD_KEY_MALLOC - _LIBC_TSD_KEY_DL_ERROR - _LIBC_TSD_KEY_RPC_VARS - All uses must be the literal _LIBC_TSD_* name in the __libc_tsd_* macros. - Some implementations may not provide any enum at all and instead - using string pasting in the macros. */ - -#include - -/* When full support for __thread variables is available, this interface is - just a trivial wrapper for it. Without TLS, this is the generic/stub - implementation for wholly single-threaded systems. - - We don't define an enum for the possible key values, because the KEYs - translate directly into variables by macro magic. */ - -#define __libc_tsd_define(CLASS, TYPE, KEY) \ - CLASS __thread TYPE __libc_tsd_##KEY attribute_tls_model_ie; - -#define __libc_tsd_address(TYPE, KEY) (&__libc_tsd_##KEY) -#define __libc_tsd_get(TYPE, KEY) (__libc_tsd_##KEY) -#define __libc_tsd_set(TYPE, KEY, VALUE) (__libc_tsd_##KEY = (VALUE)) - -#endif /* libc-tsd.h */ diff --git a/sysdeps/generic/mremap-failure.h b/sysdeps/generic/mremap-failure.h new file mode 100644 index 0000000000..bc0d476368 --- /dev/null +++ b/sysdeps/generic/mremap-failure.h @@ -0,0 +1,25 @@ +/* mremap failure handling. Generic version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Return exit value on mremap failure with errno ERR. */ + +static int +mremap_failure_exit (int err) +{ + return EXIT_FAILURE; +} diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile index a2e8c0b128..c0c017b899 100644 --- a/sysdeps/i386/Makefile +++ b/sysdeps/i386/Makefile @@ -30,7 +30,9 @@ stack-align-test-flags += -malign-double endif ifeq ($(subdir),elf) -sysdep-dl-routines += tlsdesc dl-tlsdesc +sysdep-dl-routines += \ + dl-tls-get-addr \ +# sysdep-dl-routines tests += tst-audit3 modules-names += tst-auditmod3a tst-auditmod3b @@ -58,6 +60,15 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so @echo "Checking ld.so for SSE register use. This will take a few seconds..." $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ $(evaluate-test) + +tests-special += $(objpfx)check-gnu-tls.out + +$(objpfx)check-gnu-tls.out: $(common-objpfx)libc.so + LC_ALL=C $(READELF) -V -W $< \ + | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ + | grep GLIBC_ABI_GNU_TLS > $@; \ + $(evaluate-test) +generated += check-gnu-tls.out else CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS)) endif diff --git a/sysdeps/i386/Versions b/sysdeps/i386/Versions index 36e23b466a..9c84c8ef04 100644 --- a/sysdeps/i386/Versions +++ b/sysdeps/i386/Versions @@ -28,6 +28,11 @@ libc { GLIBC_2.13 { __fentry__; } + GLIBC_ABI_GNU_TLS { + # This symbol is used only for empty version map and will be removed + # by scripts/versions.awk. + __placeholder_only_for_empty_version_map; + } } libm { GLIBC_2.1 { diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c new file mode 100644 index 0000000000..c97e5c57be --- /dev/null +++ b/sysdeps/i386/dl-tls-get-addr.c @@ -0,0 +1,68 @@ +/* Ifunc selector for ___tls_get_addr. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef SHARED +# define ___tls_get_addr __redirect____tls_get_addr +# include +# undef ___tls_get_addr +# undef __tls_get_addr + +# define SYMBOL_NAME ___tls_get_addr +# include + +extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (cpu_features->xsave_state_size != 0) + { + if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) + return OPTIMIZE (xsavec); + else + return OPTIMIZE (xsave); + } + else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) + return OPTIMIZE (fxsave); + return OPTIMIZE (fnsave); +} + +libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr, + IFUNC_SELECTOR ()); + +/* The special thing about the x86 TLS ABI is that we have two + variants of the __tls_get_addr function with different calling + conventions. The GNU version, which we are mostly concerned here, + takes the parameter in a register. The name is changed by adding + an additional underscore at the beginning. The Sun version uses + the normal calling convention. */ + +rtld_hidden_proto (___tls_get_addr) +rtld_hidden_def (___tls_get_addr) + +void * +__tls_get_addr (tls_index *ti) +{ + return ___tls_get_addr (ti); +} +#endif diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h index f17286703d..2380ec1bd4 100644 --- a/sysdeps/i386/dl-tls.h +++ b/sysdeps/i386/dl-tls.h @@ -29,33 +29,13 @@ typedef struct dl_tls_index /* This is the prototype for the GNU version. */ extern void *___tls_get_addr (tls_index *ti) __attribute__ ((__regparm__ (1))); -extern void *___tls_get_addr_internal (tls_index *ti) - __attribute__ ((__regparm__ (1))) attribute_hidden; - # if IS_IN (rtld) -/* The special thing about the x86 TLS ABI is that we have two - variants of the __tls_get_addr function with different calling - conventions. The GNU version, which we are mostly concerned here, - takes the parameter in a register. The name is changed by adding - an additional underscore at the beginning. The Sun version uses - the normal calling convention. */ -void * -__tls_get_addr (tls_index *ti) -{ - return ___tls_get_addr_internal (ti); -} - - /* Prepare using the definition of __tls_get_addr in the generic version of this file. */ -# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr -strong_alias (___tls_get_addr, ___tls_get_addr_internal) -rtld_hidden_proto (___tls_get_addr) -rtld_hidden_def (___tls_get_addr) -#else - +# define __tls_get_addr \ + __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal +# else /* Users should get the better interface. */ -# define __tls_get_addr ___tls_get_addr - +# define __tls_get_addr ___tls_get_addr # endif #endif diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h index 3627028577..8a5952421e 100644 --- a/sysdeps/i386/dl-tlsdesc-dynamic.h +++ b/sysdeps/i386/dl-tlsdesc-dynamic.h @@ -16,34 +16,6 @@ License along with the GNU C Library; if not, see . */ -#undef REGISTER_SAVE_AREA - -#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 -# error STATE_SAVE_ALIGNMENT must be multiple of 16 -#endif - -#if DL_RUNTIME_RESOLVE_REALIGN_STACK -# ifdef USE_FNSAVE -# error USE_FNSAVE shouldn't be defined -# endif -# ifdef USE_FXSAVE -/* Use fxsave to save all registers. */ -# define REGISTER_SAVE_AREA 512 -# endif -#else -# ifdef USE_FNSAVE -/* Use fnsave to save x87 FPU stack registers. */ -# define REGISTER_SAVE_AREA 108 -# else -# ifndef USE_FXSAVE -# error USE_FXSAVE must be defined -# endif -/* Use fxsave to save all registers. Add 12 bytes to align the stack - to 16 bytes. */ -# define REGISTER_SAVE_AREA (512 + 12) -# endif -#endif - .hidden _dl_tlsdesc_dynamic .global _dl_tlsdesc_dynamic .type _dl_tlsdesc_dynamic,@function @@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic: ret .p2align 4,,7 2: - cfi_adjust_cfa_offset (32) -#if DL_RUNTIME_RESOLVE_REALIGN_STACK - movl %ebx, -28(%esp) - movl %esp, %ebx - cfi_def_cfa_register(%ebx) - and $-STATE_SAVE_ALIGNMENT, %esp -#endif -#ifdef REGISTER_SAVE_AREA - subl $REGISTER_SAVE_AREA, %esp -# if !DL_RUNTIME_RESOLVE_REALIGN_STACK - cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) -# endif -#else -# if !DL_RUNTIME_RESOLVE_REALIGN_STACK -# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true -# endif - /* Allocate stack space of the required size to save the state. */ - LOAD_PIC_REG (cx) - subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp -#endif -#ifdef USE_FNSAVE - fnsave (%esp) -#elif defined USE_FXSAVE - fxsave (%esp) -#else - /* Save the argument for ___tls_get_addr in EAX. */ - movl %eax, %ecx - movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax - xorl %edx, %edx - /* Clear the XSAVE Header. */ -# ifdef USE_XSAVE - movl %edx, (512)(%esp) - movl %edx, (512 + 4 * 1)(%esp) - movl %edx, (512 + 4 * 2)(%esp) - movl %edx, (512 + 4 * 3)(%esp) -# endif - movl %edx, (512 + 4 * 4)(%esp) - movl %edx, (512 + 4 * 5)(%esp) - movl %edx, (512 + 4 * 6)(%esp) - movl %edx, (512 + 4 * 7)(%esp) - movl %edx, (512 + 4 * 8)(%esp) - movl %edx, (512 + 4 * 9)(%esp) - movl %edx, (512 + 4 * 10)(%esp) - movl %edx, (512 + 4 * 11)(%esp) - movl %edx, (512 + 4 * 12)(%esp) - movl %edx, (512 + 4 * 13)(%esp) - movl %edx, (512 + 4 * 14)(%esp) - movl %edx, (512 + 4 * 15)(%esp) -# ifdef USE_XSAVE - xsave (%esp) -# else - xsavec (%esp) -# endif - /* Restore the argument for ___tls_get_addr in EAX. */ - movl %ecx, %eax -#endif - call HIDDEN_JUMPTARGET (___tls_get_addr) - /* Get register content back. */ -#ifdef USE_FNSAVE - frstor (%esp) -#elif defined USE_FXSAVE - fxrstor (%esp) -#else - /* Save and retore ___tls_get_addr return value stored in EAX. */ - movl %eax, %ecx - movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax - xorl %edx, %edx - xrstor (%esp) - movl %ecx, %eax -#endif -#if DL_RUNTIME_RESOLVE_REALIGN_STACK - mov %ebx, %esp - cfi_def_cfa_register(%esp) - movl -28(%esp), %ebx - cfi_restore(%ebx) -#else - addl $REGISTER_SAVE_AREA, %esp - cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) -#endif +#include "tls-get-addr-wrapper.h" jmp 1b cfi_endproc .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S index f002feee56..dec7049911 100644 --- a/sysdeps/i386/dl-tlsdesc.S +++ b/sysdeps/i386/dl-tlsdesc.S @@ -22,23 +22,6 @@ #include #include "tlsdesc.h" -#ifndef DL_STACK_ALIGNMENT -/* Due to GCC bug: - - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 - - __tls_get_addr may be called with 4-byte stack alignment. Although - this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume - that stack will be always aligned at 16 bytes. */ -# define DL_STACK_ALIGNMENT 4 -#endif - -/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align - stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ -#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ - (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ - || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) - .text /* This function is used to compute the TP offset for symbols in diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h new file mode 100644 index 0000000000..0708e5ad1d --- /dev/null +++ b/sysdeps/i386/tls-get-addr-wrapper.h @@ -0,0 +1,127 @@ +/* Wrapper of i386 ___tls_get_addr to save and restore vector registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#undef REGISTER_SAVE_AREA + +#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 +# error STATE_SAVE_ALIGNMENT must be multiple of 16 +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK +# ifdef USE_FNSAVE +# error USE_FNSAVE shouldn't be defined +# endif +# ifdef USE_FXSAVE +/* Use fxsave to save all registers. */ +# define REGISTER_SAVE_AREA 512 +# endif +#else +# ifdef USE_FNSAVE +/* Use fnsave to save x87 FPU stack registers. */ +# define REGISTER_SAVE_AREA 108 +# else +# ifndef USE_FXSAVE +# error USE_FXSAVE must be defined +# endif +/* Use fxsave to save all registers. Add 12 bytes to align the stack + to 16 bytes. */ +# define REGISTER_SAVE_AREA (512 + 12) +# endif +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + movl %ebx, 28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + and $-STATE_SAVE_ALIGNMENT, %esp +#endif +#ifdef REGISTER_SAVE_AREA + subl $REGISTER_SAVE_AREA, %esp +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +# endif +#else +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true +# endif + /* Allocate stack space of the required size to save the state. */ + LOAD_PIC_REG (cx) + subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \ + +XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp +#endif +#ifdef USE_FNSAVE + fnsave (%esp) +#elif defined USE_FXSAVE + fxsave (%esp) +#else + /* Save the argument for ___tls_get_addr in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + /* Clear the XSAVE Header. */ +# ifdef USE_XSAVE + movl %edx, (512)(%esp) + movl %edx, (512 + 4 * 1)(%esp) + movl %edx, (512 + 4 * 2)(%esp) + movl %edx, (512 + 4 * 3)(%esp) +# endif + movl %edx, (512 + 4 * 4)(%esp) + movl %edx, (512 + 4 * 5)(%esp) + movl %edx, (512 + 4 * 6)(%esp) + movl %edx, (512 + 4 * 7)(%esp) + movl %edx, (512 + 4 * 8)(%esp) + movl %edx, (512 + 4 * 9)(%esp) + movl %edx, (512 + 4 * 10)(%esp) + movl %edx, (512 + 4 * 11)(%esp) + movl %edx, (512 + 4 * 12)(%esp) + movl %edx, (512 + 4 * 13)(%esp) + movl %edx, (512 + 4 * 14)(%esp) + movl %edx, (512 + 4 * 15)(%esp) +# ifdef USE_XSAVE + xsave (%esp) +# else + xsavec (%esp) +# endif + /* Restore the argument for ___tls_get_addr in EAX. */ + movl %ecx, %eax +#endif + call ___tls_get_addr_internal + /* Get register content back. */ +#ifdef USE_FNSAVE + frstor (%esp) +#elif defined USE_FXSAVE + fxrstor (%esp) +#else + /* Save and retore ___tls_get_addr return value stored in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + xrstor (%esp) + movl %ecx, %eax +#endif +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl 28(%esp), %ebx + cfi_restore(%ebx) +#else + addl $REGISTER_SAVE_AREA, %esp + cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +#endif + +#undef STATE_SAVE_ALIGNMENT diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S new file mode 100644 index 0000000000..7d143d8a23 --- /dev/null +++ b/sysdeps/i386/tls_get_addr.S @@ -0,0 +1,57 @@ +/* Thread-local storage handling in the ELF dynamic linker. i386 version. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + + .text +#ifdef SHARED +# define USE_FNSAVE +# define MINIMUM_ALIGNMENT 4 +# define STATE_SAVE_ALIGNMENT 4 +# define ___tls_get_addr _____tls_get_addr_fnsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef MINIMUM_ALIGNMENT +# undef USE_FNSAVE + +# define MINIMUM_ALIGNMENT 16 + +# define USE_FXSAVE +# define STATE_SAVE_ALIGNMENT 16 +# define ___tls_get_addr _____tls_get_addr_fxsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_FXSAVE + +# define USE_XSAVE +# define STATE_SAVE_ALIGNMENT 64 +# define ___tls_get_addr _____tls_get_addr_xsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_XSAVE + +# define USE_XSAVEC +# define STATE_SAVE_ALIGNMENT 64 +# define ___tls_get_addr _____tls_get_addr_xsavec +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_XSAVEC +#endif /* SHARED */ diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h new file mode 100644 index 0000000000..1825798724 --- /dev/null +++ b/sysdeps/i386/tls_get_addr.h @@ -0,0 +1,42 @@ +/* Thread-local storage handling in the ELF dynamic linker. i386 version. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + .hidden ___tls_get_addr + .global ___tls_get_addr + .type ___tls_get_addr,@function + + /* This function is a wrapper of ___tls_get_addr_internal to + preserve caller-saved vector registers. */ + + cfi_startproc + .align 16 +___tls_get_addr: + /* Like all TLS resolvers, preserve call-clobbered registers. + We need two scratch regs anyway. */ + subl $32, %esp + cfi_adjust_cfa_offset (32) + movl %ecx, 20(%esp) + movl %edx, 24(%esp) +#include "tls-get-addr-wrapper.h" + movl 20(%esp), %ecx + movl 24(%esp), %edx + addl $32, %esp + cfi_adjust_cfa_offset (-32) + ret + cfi_endproc + .size ___tls_get_addr, .-___tls_get_addr diff --git a/sysdeps/ieee754/dbl-64/e_atanh.c b/sysdeps/ieee754/dbl-64/e_atanh.c index 11a2a45799..05ac0a1b30 100644 --- a/sysdeps/ieee754/dbl-64/e_atanh.c +++ b/sysdeps/ieee754/dbl-64/e_atanh.c @@ -44,6 +44,11 @@ static const double huge = 1e300; +#ifndef SECTION +# define SECTION +#endif + +SECTION double __ieee754_atanh (double x) { @@ -73,4 +78,7 @@ __ieee754_atanh (double x) return copysign (t, x); } + +#ifndef __ieee754_atanh libm_alias_finite (__ieee754_atanh, __atanh) +#endif diff --git a/sysdeps/ieee754/dbl-64/e_sinh.c b/sysdeps/ieee754/dbl-64/e_sinh.c index b4b5857ddd..3f787967f9 100644 --- a/sysdeps/ieee754/dbl-64/e_sinh.c +++ b/sysdeps/ieee754/dbl-64/e_sinh.c @@ -41,6 +41,11 @@ static char rcsid[] = "$NetBSD: e_sinh.c,v 1.7 1995/05/10 20:46:13 jtc Exp $"; static const double one = 1.0, shuge = 1.0e307; +#ifndef SECTION +# define SECTION +#endif + +SECTION double __ieee754_sinh (double x) { @@ -90,4 +95,7 @@ __ieee754_sinh (double x) /* |x| > overflowthresold, sinh(x) overflow */ return math_narrow_eval (x * shuge); } + +#ifndef __ieee754_sinh libm_alias_finite (__ieee754_sinh, __sinh) +#endif diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h index ef87cfa6be..05515fd95a 100644 --- a/sysdeps/ieee754/dbl-64/math_config.h +++ b/sysdeps/ieee754/dbl-64/math_config.h @@ -195,16 +195,18 @@ check_uflow (double x) extern const struct exp_data { double invln2N; - double shift; double negln2hiN; double negln2loN; double poly[4]; /* Last four coefficients. */ + double shift; + double exp2_shift; double exp2_poly[EXP2_POLY_ORDER]; - double invlog10_2N; + double neglog10_2hiN; double neglog10_2loN; double exp10_poly[5]; + double invlog10_2N; uint64_t tab[2*(1 << EXP_TABLE_BITS)]; } __exp_data attribute_hidden; diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index c5f5abdc68..79a3cd721d 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -244,6 +244,9 @@ __fma (double x, double y, double z) /* Reset rounding mode and test for inexact simultaneously. */ int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0; + /* Ensure value of a1 + u.d is not reused. */ + a1 = math_opt_barrier (a1); + if (__glibc_likely (adjust == 0)) { if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff) diff --git a/sysdeps/ieee754/dbl-64/s_tanh.c b/sysdeps/ieee754/dbl-64/s_tanh.c index 673a97102d..13063db04e 100644 --- a/sysdeps/ieee754/dbl-64/s_tanh.c +++ b/sysdeps/ieee754/dbl-64/s_tanh.c @@ -46,6 +46,11 @@ static char rcsid[] = "$NetBSD: s_tanh.c,v 1.7 1995/05/10 20:48:22 jtc Exp $"; static const double one = 1.0, two = 2.0, tiny = 1.0e-300; +#ifndef SECTION +# define SECTION +#endif + +SECTION double __tanh (double x) { diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h index 729f22cd4f..dc07ebd459 100644 --- a/sysdeps/ieee754/flt-32/math_config.h +++ b/sysdeps/ieee754/flt-32/math_config.h @@ -166,9 +166,9 @@ extern const struct exp2f_data uint64_t tab[1 << EXP2F_TABLE_BITS]; double shift_scaled; double poly[EXP2F_POLY_ORDER]; - double shift; double invln2_scaled; double poly_scaled[EXP2F_POLY_ORDER]; + double shift; } __exp2f_data attribute_hidden; #define LOGF_TABLE_BITS 4 diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure index 0d1e9ed8df..6726ab8302 100644 --- a/sysdeps/loongarch/preconfigure +++ b/sysdeps/loongarch/preconfigure @@ -44,6 +44,7 @@ loongarch*) base_machine=loongarch mtls_descriptor=desc + mtls_traditional=trad ;; esac diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac index 67e4357013..56402261df 100644 --- a/sysdeps/loongarch/preconfigure.ac +++ b/sysdeps/loongarch/preconfigure.ac @@ -41,6 +41,8 @@ loongarch*) AC_DEFINE_UNQUOTED([LOONGARCH_ABI_FRLEN], [$abi_flen]) base_machine=loongarch + mtls_descriptor=desc + mtls_traditional=trad ;; esac diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h index df54eef6f7..bccc2003ec 100644 --- a/sysdeps/nptl/bits/thread-shared-types.h +++ b/sysdeps/nptl/bits/thread-shared-types.h @@ -95,11 +95,12 @@ struct __pthread_cond_s { __atomic_wide_counter __wseq; __atomic_wide_counter __g1_start; - unsigned int __g_refs[2] __LOCK_ALIGNMENT; - unsigned int __g_size[2]; + unsigned int __g_size[2] __LOCK_ALIGNMENT; unsigned int __g1_orig_size; unsigned int __wrefs; unsigned int __g_signals[2]; + unsigned int __unused_initialized_1; + unsigned int __unused_initialized_2; }; typedef unsigned int __tss_t; diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c index 7803e19fd1..ed10185e37 100644 --- a/sysdeps/nptl/dl-tls_init_tp.c +++ b/sysdeps/nptl/dl-tls_init_tp.c @@ -23,6 +23,7 @@ #include #include #include +#include #define TUNABLE_NAMESPACE pthread #include diff --git a/sysdeps/nptl/pthread.h b/sysdeps/nptl/pthread.h index 3d4f4a756c..e0f24418fe 100644 --- a/sysdeps/nptl/pthread.h +++ b/sysdeps/nptl/pthread.h @@ -152,7 +152,7 @@ enum /* Conditional variable handling. */ -#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, {0, 0}, 0, 0, {0, 0} } } +#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0}, 0, 0 } } /* Cleanup buffers */ diff --git a/sysdeps/posix/libc_fatal.c b/sysdeps/posix/libc_fatal.c index f9e3425e04..089c47b04b 100644 --- a/sysdeps/posix/libc_fatal.c +++ b/sysdeps/posix/libc_fatal.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -105,7 +106,8 @@ __libc_message_impl (const char *fmt, ...) { WRITEV_FOR_FATAL (fd, iov, iovcnt, total); - total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1); + total = ALIGN_UP (total + sizeof (struct abort_msg_s) + 1, + GLRO(dl_pagesize)); struct abort_msg_s *buf = __mmap (NULL, total, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); diff --git a/sysdeps/posix/tempname.c b/sysdeps/posix/tempname.c index c00fe0c181..fc30958a0c 100644 --- a/sysdeps/posix/tempname.c +++ b/sysdeps/posix/tempname.c @@ -117,6 +117,8 @@ random_bits (random_value *r, random_value s) succeed. */ #if !_LIBC *r = mix_random_values (v, clock ()); +#else + *r = v; #endif return false; } diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile index 5e6cb07ce6..5cdb64f29b 100644 --- a/sysdeps/powerpc/Makefile +++ b/sysdeps/powerpc/Makefile @@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so + +# The test checks if the __tls_get_addr does not clobber caller-saved +# register, so disable the powerpc specific optimization to force a +# __tls_get_addr call. +LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize endif ifneq (no,$(multi-arch)) diff --git a/sysdeps/powerpc/powerpc64/le/Makefile b/sysdeps/powerpc/powerpc64/le/Makefile index 9d568d4f44..b77775cf95 100644 --- a/sysdeps/powerpc/powerpc64/le/Makefile +++ b/sysdeps/powerpc/powerpc64/le/Makefile @@ -129,6 +129,10 @@ CFLAGS-tst-strtod-round.c += $(type-float128-CFLAGS) CFLAGS-tst-wcstod-round.c += $(type-float128-CFLAGS) CFLAGS-tst-strtod-nan-locale.c += $(type-float128-CFLAGS) CFLAGS-tst-wcstod-nan-locale.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod1i.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod3.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod4.c += $(type-float128-CFLAGS) +CFLAGS-tst-strtod5i.c += $(type-float128-CFLAGS) CFLAGS-tst-strtod6.c += $(type-float128-CFLAGS) CFLAGS-tst-strfrom.c += $(type-float128-CFLAGS) CFLAGS-tst-strfrom-locale.c += $(type-float128-CFLAGS) diff --git a/sysdeps/powerpc/powerpc64/le/power10/memchr.S b/sysdeps/powerpc/powerpc64/le/power10/memchr.S deleted file mode 100644 index 53e5716d72..0000000000 --- a/sysdeps/powerpc/powerpc64/le/power10/memchr.S +++ /dev/null @@ -1,315 +0,0 @@ -/* Optimized memchr implementation for POWER10 LE. - Copyright (C) 2021-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include - -# ifndef MEMCHR -# define MEMCHR __memchr -# endif -# define M_VREG_ZERO v20 -# define M_OFF_START_LOOP 256 -# define MEMCHR_SUBTRACT_VECTORS \ - vsububm v4,v4,v18; \ - vsububm v5,v5,v18; \ - vsububm v6,v6,v18; \ - vsububm v7,v7,v18; -# define M_TAIL(vreg,increment) \ - vctzlsbb r4,vreg; \ - cmpld r5,r4; \ - ble L(null); \ - addi r4,r4,increment; \ - add r3,r6,r4; \ - blr - -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define M_VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define M_LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define CHECK16B(vreg,offset,addr,label) \ - lxv vreg+32,offset(addr); \ - vcmpequb. vreg,vreg,v18; \ - bne cr6,L(label); \ - cmpldi r5,16; \ - ble L(null); \ - addi r5,r5,-16; - -/* Load 4 quadwords, merge into one VR for speed and check for NULLs. r6 has # - of bytes already checked. */ -#define CHECK64B(offset,addr,label) \ - M_LXVP(v4+32,offset,addr); \ - M_LXVP(v6+32,offset+32,addr); \ - MEMCHR_SUBTRACT_VECTORS; \ - vminub v14,v4,v5; \ - vminub v15,v6,v7; \ - vminub v16,v14,v15; \ - vcmpequb. v0,v16,M_VREG_ZERO; \ - beq cr6,$+12; \ - li r7,offset; \ - b L(label); \ - cmpldi r5,64; \ - ble L(null); \ - addi r5,r5,-64 - -/* Implements the function - void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]). */ - - .machine power9 - -ENTRY_TOCLESS (MEMCHR) - CALL_MCOUNT 3 - - cmpldi r5,0 - beq L(null) - mr r0,r5 - xori r6,r4,0xff - - mtvsrd v18+32,r4 /* matching char in v18 */ - mtvsrd v19+32,r6 /* non matching char in v19 */ - - vspltb v18,v18,7 /* replicate */ - vspltb v19,v19,7 /* replicate */ - vspltisb M_VREG_ZERO,0 - - /* Next 16B-aligned address. Prepare address for L(aligned). */ - addi r6,r3,16 - clrrdi r6,r6,4 - - /* Align data and fill bytes not loaded with non matching char. */ - lvx v0,0,r3 - lvsr v1,0,r3 - vperm v0,v19,v0,v1 - - vcmpequb. v6,v0,v18 - bne cr6,L(found) - sub r4,r6,r3 - cmpld r5,r4 - ble L(null) - sub r5,r5,r4 - - /* Test up to OFF_START_LOOP-16 bytes in 16B chunks. The main loop is - optimized for longer strings, so checking the first bytes in 16B - chunks benefits a lot small strings. */ - .p2align 5 -L(aligned): - cmpldi r5,0 - beq L(null) - - CHECK16B(v0,0,r6,tail1) - CHECK16B(v1,16,r6,tail2) - CHECK16B(v2,32,r6,tail3) - CHECK16B(v3,48,r6,tail4) - CHECK16B(v4,64,r6,tail5) - CHECK16B(v5,80,r6,tail6) - CHECK16B(v6,96,r6,tail7) - CHECK16B(v7,112,r6,tail8) - CHECK16B(v8,128,r6,tail9) - CHECK16B(v9,144,r6,tail10) - CHECK16B(v10,160,r6,tail11) - CHECK16B(v0,176,r6,tail12) - CHECK16B(v1,192,r6,tail13) - CHECK16B(v2,208,r6,tail14) - CHECK16B(v3,224,r6,tail15) - - cmpdi cr5,r4,0 /* Check if c == 0. This will be useful to - choose how we will perform the main loop. */ - - /* Prepare address for the loop. */ - addi r4,r3,M_OFF_START_LOOP - clrrdi r4,r4,6 - sub r6,r4,r3 - sub r5,r0,r6 - addi r6,r4,128 - - /* If c == 0, use the loop without the vsububm. */ - beq cr5,L(loop) - - /* This is very similar to the block after L(loop), the difference is - that here MEMCHR_SUBTRACT_VECTORS is not empty, and we subtract - each byte loaded by the char we are looking for, this way we can keep - using vminub to merge the results and checking for nulls. */ - .p2align 5 -L(memchr_loop): - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - b L(memchr_loop) - /* Switch to a more aggressive approach checking 64B each time. Use 2 - pointers 128B apart and unroll the loop once to make the pointer - updates and usages separated enough to avoid stalls waiting for - address calculation. */ - .p2align 5 -L(loop): -#undef MEMCHR_SUBTRACT_VECTORS -#define MEMCHR_SUBTRACT_VECTORS /* nothing */ - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - b L(loop) - - .p2align 5 -L(pre_tail_64b): - mr r6,r4 -L(tail_64b): - /* OK, we found a null byte. Let's look for it in the current 64-byte - block and mark it in its corresponding VR. lxvp vx,0(ry) puts the - low 16B bytes into vx+1, and the high into vx, so the order here is - v5, v4, v7, v6. */ - vcmpequb v1,v5,M_VREG_ZERO - vcmpequb v2,v4,M_VREG_ZERO - vcmpequb v3,v7,M_VREG_ZERO - vcmpequb v4,v6,M_VREG_ZERO - - /* Take into account the other 64B blocks we had already checked. */ - add r6,r6,r7 - /* Extract first bit of each byte. */ - M_VEXTRACTBM(r8,v1) - M_VEXTRACTBM(r9,v2) - M_VEXTRACTBM(r10,v3) - M_VEXTRACTBM(r11,v4) - - /* Shift each value into their corresponding position. */ - sldi r9,r9,16 - sldi r10,r10,32 - sldi r11,r11,48 - - /* Merge the results. */ - or r8,r8,r9 - or r9,r10,r11 - or r11,r9,r8 - - cnttzd r0,r11 /* Count trailing zeros before the match. */ - cmpld r5,r0 - ble L(null) - add r3,r6,r0 /* Compute final address. */ - blr - - .p2align 5 -L(tail1): - M_TAIL(v0,0) - - .p2align 5 -L(tail2): - M_TAIL(v1,16) - - .p2align 5 -L(tail3): - M_TAIL(v2,32) - - .p2align 5 -L(tail4): - M_TAIL(v3,48) - - .p2align 5 -L(tail5): - M_TAIL(v4,64) - - .p2align 5 -L(tail6): - M_TAIL(v5,80) - - .p2align 5 -L(tail7): - M_TAIL(v6,96) - - .p2align 5 -L(tail8): - M_TAIL(v7,112) - - .p2align 5 -L(tail9): - M_TAIL(v8,128) - - .p2align 5 -L(tail10): - M_TAIL(v9,144) - - .p2align 5 -L(tail11): - M_TAIL(v10,160) - - .p2align 5 -L(tail12): - M_TAIL(v0,176) - - .p2align 5 -L(tail13): - M_TAIL(v1,192) - - .p2align 5 -L(tail14): - M_TAIL(v2,208) - - .p2align 5 -L(tail15): - M_TAIL(v3,224) - - .p2align 5 -L(found): - vctzlsbb r7,v6 - cmpld r5,r7 - ble L(null) - add r3,r3,r7 - blr - - .p2align 5 -L(null): - li r3,0 - blr - -END (MEMCHR) - -weak_alias (__memchr, memchr) -libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S index f0d6732a25..0d4a53317c 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S +++ b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S @@ -1,5 +1,5 @@ /* Optimized strcmp implementation for PowerPC64/POWER10. - Copyright (C) 2021-2024 Free Software Foundation, Inc. + Copyright (C) 2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -24,16 +24,6 @@ /* Implements the function int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]). */ -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) #define COMPARE_16(vreg1,vreg2,offset) \ lxv vreg1+32,offset(r3); \ @@ -42,8 +32,8 @@ bne cr6,L(different); \ #define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ + lxvp vreg1+32,offset(r3); \ + lxvp vreg2+32,offset(r4); \ vcmpnezb. v7,vreg1+1,vreg2+1; \ bne cr6,L(label1); \ vcmpnezb. v7,vreg1,vreg2; \ @@ -62,120 +52,77 @@ lxvl 32+v5,reg2,r0; \ add reg1,reg1,len_reg; \ add reg2,reg2,len_reg; \ - vcmpnezb v7,v4,v5; \ + vcmpnezb. v7,v4,v5; \ vctzlsbb r6,v7; \ cmpld cr7,r6,len_reg; \ blt cr7,L(different); \ - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - .machine power9 + .machine power10 ENTRY_TOCLESS (STRCMP, 4) - andi. r7,r3,4095 - andi. r8,r4,4095 - cmpldi cr0,r7,4096-16 - cmpldi cr1,r8,4096-16 - bgt cr0,L(crosses) - bgt cr1,L(crosses) - COMPARE_16(v4,v5,0) - -L(crosses): - andi. r7,r3,15 - subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */ - andi. r9,r4,15 - subfic r5,r9,16 /* r5(nalign2) = 16 - (str2 & 15). */ - cmpld cr7,r7,r5 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) + li r11,16 + /* eq bit of cr1 used as swap status flag to indicate if + source pointers were swapped. */ + crclr 4*cr1+eq + andi. r7,r3,15 + sub r7,r11,r7 /* r7(nalign1) = 16 - (str1 & 15). */ + andi. r9,r4,15 + sub r5,r11,r9 /* r5(nalign2) = 16 - (str2 & 15). */ + cmpld cr7,r7,r5 + beq cr7,L(same_aligned) + blt cr7,L(nalign1_min) + /* Swap r3 and r4, and r7 and r5 such that r3 and r7 hold the + pointer which is closer to the next 16B boundary so that only + one CHECK_N_BYTES is needed before entering the loop below. */ + mr r8,r4 + mr r4,r3 + mr r3,r8 + mr r12,r7 + mr r7,r5 + mr r5,r12 + crset 4*cr1+eq /* Set bit on swapping source pointers. */ - /* nalign2 is minimum and s2 pointer is aligned. */ - CHECK_N_BYTES(r3,r4,r5) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r3,63 /* Determine offset into 64B hunk. */ - andi. r8,r3,15 /* The offset into the 16B hunk. */ - neg r7,r3 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r3-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - b L(compare_64B_unaligned) - - /* nalign1 is minimum and s1 pointer is aligned. */ + .p2align 5 L(nalign1_min): CHECK_N_BYTES(r3,r4,r7) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r4,63 /* Determine offset into 64B hunk. */ - andi. r8,r4,15 /* The offset into the 16B hunk. */ - neg r7,r4 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 .p2align 5 -L(compare_64B_unaligned): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - bdnz L(compare_64B_unaligned) +L(s1_aligned): + /* r9 and r5 is number of bytes to be read after and before + page boundary correspondingly. */ + sub r5,r5,r7 + subfic r9,r5,16 + /* Now let r7 hold the count of quadwords which can be + checked without crossing a page boundary. quadword offset is + (str2>>4)&0xFF. */ + rlwinm r7,r4,28,0xFF + /* Below check is required only for first iteration. For second + iteration and beyond, the new loop counter is always 255. */ + cmpldi r7,255 + beq L(L3) + /* Get the initial loop count by 255-((str2>>4)&0xFF). */ + subfic r11,r7,255 - /* Cross the page boundary of s2, carefully. Only for first - iteration we have to get the count of 64B blocks to be checked. - From second iteration and beyond, loop counter is always 63. */ -L(compare_64_pagecross): - li r11, 63 + .p2align 5 +L(L1): mtctr r11 - cmpldi r10,16 - ble L(cross_4) - cmpldi r10,32 - ble L(cross_3) - cmpldi r10,48 - ble L(cross_2) -L(cross_1): - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - b L(compare_64B_unaligned) -L(cross_2): - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - b L(compare_64B_unaligned) -L(cross_3): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) + + .p2align 5 +L(L2): + COMPARE_16(v4,v5,0) /* Load 16B blocks using lxv. */ addi r3,r3,16 addi r4,r4,16 - b L(compare_64B_unaligned) -L(cross_4): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 + bdnz L(L2) + /* Cross the page boundary of s2, carefully. */ + + .p2align 5 +L(L3): + CHECK_N_BYTES(r3,r4,r5) CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - b L(compare_64B_unaligned) + li r11,255 /* Load the new loop counter. */ + b L(L1) + .p2align 5 L(same_aligned): CHECK_N_BYTES(r3,r4,r7) /* Align s1 to 32B and adjust s2 address. @@ -208,26 +155,31 @@ L(16B_aligned_loop): /* Calculate and return the difference. */ L(different): - TAIL(v4,v5) + vctzlsbb r6,v7 + vextubrx r5,r6,v4 + vextubrx r4,r6,v5 + bt 4*cr1+eq,L(swapped) + subf r3,r4,r5 + blr + + /* If src pointers were swapped, then swap the + indices and calculate the return value. */ +L(swapped): + subf r3,r5,r4 + blr .p2align 5 L(32B_aligned_loop): COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) + COMPARE_32(v14,v16,32,tail1,tail2) + COMPARE_32(v14,v16,64,tail1,tail2) + COMPARE_32(v14,v16,96,tail1,tail2) addi r3,r3,128 addi r4,r4,128 b L(32B_aligned_loop) L(tail1): TAIL(v15,v17) L(tail2): TAIL(v14,v16) -L(tail3): TAIL(v19,v21) -L(tail4): TAIL(v18,v20) -L(tail5): TAIL(v23,v25) -L(tail6): TAIL(v22,v24) -L(tail7): TAIL(v27,v29) -L(tail8): TAIL(v26,v28) END (STRCMP) libc_hidden_builtin_def (strcmp) diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S index 0bd794540f..574a24b586 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S +++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S @@ -31,7 +31,7 @@ # define FUNCNAME RAWMEMCHR # endif # define MCOUNT_NARGS 2 -# define VREG_ZERO v20 +# define VREG_ZERO v17 # define OFF_START_LOOP 256 # define RAWMEMCHR_SUBTRACT_VECTORS \ vsububm v4,v4,v18; \ diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S index d4ba76acae..6e09fcb7f2 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S +++ b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S @@ -1,5 +1,5 @@ /* Optimized strncmp implementation for PowerPC64/POWER10. - Copyright (C) 2024 Free Software Foundation, Inc. + Copyright (C) 2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -30,17 +30,6 @@ # define STRNCMP strncmp #endif -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - #define COMPARE_16(vreg1,vreg2,offset) \ lxv vreg1+32,offset(r3); \ lxv vreg2+32,offset(r4); \ @@ -51,8 +40,8 @@ addi r5,r5,-16; #define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ + lxvp vreg1+32,offset(r3); \ + lxvp vreg2+32,offset(r4); \ vcmpnezb. v7,vreg1+1,vreg2+1; \ bne cr6,L(label1); \ vcmpnezb. v7,vreg1,vreg2; \ @@ -94,9 +83,7 @@ ble cr7,L(ret0); \ sub r5,r5,len_reg; \ - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - .machine power9 + .machine power10 ENTRY_TOCLESS (STRNCMP, 4) /* Check if size is 0. */ cmpdi cr0,r5,0 @@ -246,21 +233,15 @@ L(different): .p2align 5 L(32B_aligned_loop): COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) + COMPARE_32(v14,v16,32,tail1,tail2) + COMPARE_32(v14,v16,64,tail1,tail2) + COMPARE_32(v14,v16,96,tail1,tail2) addi r3,r3,128 addi r4,r4,128 b L(32B_aligned_loop) L(tail1): TAIL_FIRST_16B(v15,v17) L(tail2): TAIL_SECOND_16B(v14,v16) -L(tail3): TAIL_FIRST_16B(v19,v21) -L(tail4): TAIL_SECOND_16B(v18,v20) -L(tail5): TAIL_FIRST_16B(v23,v25) -L(tail6): TAIL_SECOND_16B(v22,v24) -L(tail7): TAIL_FIRST_16B(v27,v29) -L(tail8): TAIL_SECOND_16B(v26,v28) .p2align 5 L(ret0): diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index b847c19049..9072b0d23a 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -31,10 +31,10 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ strncase-power8 ifneq (,$(filter %le,$(config-machine))) -sysdep_routines += memchr-power10 memcmp-power10 memcpy-power10 \ - memmove-power10 memset-power10 rawmemchr-power9 \ - rawmemchr-power10 strcmp-power9 strcmp-power10 \ - strncmp-power9 strncmp-power10 strcpy-power9 stpcpy-power9 \ +sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \ + rawmemchr-power9 rawmemchr-power10 \ + strcmp-power9 strcmp-power10 strncmp-power9 strncmp-power10 \ + strcpy-power9 stpcpy-power9 \ strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10 endif CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 2bb47d3527..14aa642675 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -229,12 +229,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c. */ IFUNC_IMPL (i, name, memchr, -#ifdef __LITTLE_ENDIAN__ - IFUNC_IMPL_ADD (array, i, memchr, - hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX, - __memchr_power10) -#endif IFUNC_IMPL_ADD (array, i, memchr, hwcap2 & PPC_FEATURE2_ARCH_2_07 && hwcap & PPC_FEATURE_HAS_ALTIVEC, diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/sysdeps/powerpc/powerpc64/multiarch/memchr.c index 57d23e7b18..b4655dfcaa 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memchr.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memchr.c @@ -25,23 +25,15 @@ extern __typeof (__memchr) __memchr_ppc attribute_hidden; extern __typeof (__memchr) __memchr_power7 attribute_hidden; extern __typeof (__memchr) __memchr_power8 attribute_hidden; -# ifdef __LITTLE_ENDIAN__ -extern __typeof (__memchr) __memchr_power10 attribute_hidden; -# endif /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ libc_ifunc (__memchr, -# ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __memchr_power10 : -# endif - (hwcap2 & PPC_FEATURE2_ARCH_2_07 - && hwcap & PPC_FEATURE_HAS_ALTIVEC) - ? __memchr_power8 : - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __memchr_power7 - : __memchr_ppc); + (hwcap2 & PPC_FEATURE2_ARCH_2_07 + && hwcap & PPC_FEATURE_HAS_ALTIVEC) + ? __memchr_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memchr_power7 + : __memchr_ppc); weak_alias (__memchr, memchr) libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S index 1a9f6069f5..a4ee7fb53c 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S +++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S @@ -1,5 +1,5 @@ /* Optimized strcmp implementation for POWER10/PPC64. - Copyright (C) 2021-2024 Free Software Foundation, Inc. + Copyright (C) 2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S index d7026c12e2..bb25bc75b8 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2024 Free Software Foundation, Inc. +/* Copyright (C) 2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile index 04ea56559e..830d9cb878 100644 --- a/sysdeps/pthread/Makefile +++ b/sysdeps/pthread/Makefile @@ -254,6 +254,7 @@ tests += \ tst-sem14 \ tst-sem15 \ tst-sem16 \ + tst-sem18 \ tst-setuid3 \ tst-signal1 \ tst-signal2 \ diff --git a/sysdeps/pthread/sem_open.c b/sysdeps/pthread/sem_open.c index e41236157a..dab734191a 100644 --- a/sysdeps/pthread/sem_open.c +++ b/sysdeps/pthread/sem_open.c @@ -76,6 +76,7 @@ __sem_open (const char *name, int oflag, ...) goto try_create; /* Return. errno is already set. */ + result = SEM_FAILED; } else /* Check whether we already have this semaphore mapped and diff --git a/sysdeps/pthread/tst-sem18.c b/sysdeps/pthread/tst-sem18.c new file mode 100644 index 0000000000..1be207bcbe --- /dev/null +++ b/sysdeps/pthread/tst-sem18.c @@ -0,0 +1,35 @@ +/* Test sem_open with missing file. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#include + +int +do_test (void) +{ + sem_unlink ("/glibc-tst-sem18"); + errno = 0; + sem_t *s = sem_open ("/glibc-tst-sem18", 0); + TEST_VERIFY (s == SEM_FAILED); + TEST_COMPARE (errno, ENOENT); + return 0; +} + +#include diff --git a/sysdeps/pthread/tst-setuid3.c b/sysdeps/pthread/tst-setuid3.c index 83f42a0ae5..3845ab03d3 100644 --- a/sysdeps/pthread/tst-setuid3.c +++ b/sysdeps/pthread/tst-setuid3.c @@ -15,24 +15,19 @@ License along with the GNU C Library; if not, see . */ -#include #include #include #include #include +#include + /* The test must run under a non-privileged user ID. */ static const uid_t test_uid = 1; static pthread_barrier_t barrier1; static pthread_barrier_t barrier2; -#define FAIL(fmt, ...) \ - do { printf ("FAIL: " fmt "\n", __VA_ARGS__); _exit (1); } while (0) - -#define FAIL_ERR(fmt, ...) \ - do { printf ("FAIL: " fmt ": %m\n", __VA_ARGS__); _exit (1); } while (0) - /* True if x is not a successful return code from pthread_barrier_wait. */ static inline bool is_invalid_barrier_ret (int x) @@ -45,10 +40,10 @@ thread_func (void *ctx __attribute__ ((unused))) { int ret = pthread_barrier_wait (&barrier1); if (is_invalid_barrier_ret (ret)) - FAIL ("pthread_barrier_wait (barrier1) (on thread): %d", ret); + FAIL_EXIT1 ("pthread_barrier_wait (barrier1) (on thread): %d", ret); ret = pthread_barrier_wait (&barrier2); if (is_invalid_barrier_ret (ret)) - FAIL ("pthread_barrier_wait (barrier2) (on thread): %d", ret); + FAIL_EXIT1 ("pthread_barrier_wait (barrier2) (on thread): %d", ret); return NULL; } @@ -59,13 +54,13 @@ setuid_failure (int phase) switch (ret) { case 0: - FAIL ("setuid succeeded unexpectedly in phase %d", phase); + FAIL_EXIT1 ("setuid succeeded unexpectedly in phase %d", phase); case -1: if (errno != EPERM) - FAIL_ERR ("setuid phase %d", phase); + FAIL_EXIT1 ("setuid phase %d: %m", phase); break; default: - FAIL ("invalid setuid return value in phase %d: %d", phase, ret); + FAIL_EXIT1 ("invalid setuid return value in phase %d: %d", phase, ret); } } @@ -74,42 +69,42 @@ do_test (void) { if (getuid () == 0) if (setuid (test_uid) != 0) - FAIL_ERR ("setuid (%u)", (unsigned) test_uid); + FAIL_EXIT1 ("setuid (%u): %m", (unsigned) test_uid); if (setuid (getuid ())) - FAIL_ERR ("setuid (%s)", "getuid ()"); + FAIL_EXIT1 ("setuid (%s): %m", "getuid ()"); setuid_failure (1); int ret = pthread_barrier_init (&barrier1, NULL, 2); if (ret != 0) - FAIL ("pthread_barrier_init (barrier1): %d", ret); + FAIL_EXIT1 ("pthread_barrier_init (barrier1): %d", ret); ret = pthread_barrier_init (&barrier2, NULL, 2); if (ret != 0) - FAIL ("pthread_barrier_init (barrier2): %d", ret); + FAIL_EXIT1 ("pthread_barrier_init (barrier2): %d", ret); pthread_t thread; ret = pthread_create (&thread, NULL, thread_func, NULL); if (ret != 0) - FAIL ("pthread_create: %d", ret); + FAIL_EXIT1 ("pthread_create: %d", ret); /* Ensure that the thread is running properly. */ ret = pthread_barrier_wait (&barrier1); if (is_invalid_barrier_ret (ret)) - FAIL ("pthread_barrier_wait (barrier1): %d", ret); + FAIL_EXIT1 ("pthread_barrier_wait (barrier1): %d", ret); setuid_failure (2); /* Check success case. */ if (setuid (getuid ()) != 0) - FAIL_ERR ("setuid (%s)", "getuid ()"); + FAIL_EXIT1 ("setuid (%s): %m", "getuid ()"); /* Shutdown. */ ret = pthread_barrier_wait (&barrier2); if (is_invalid_barrier_ret (ret)) - FAIL ("pthread_barrier_wait (barrier2): %d", ret); + FAIL_EXIT1 ("pthread_barrier_wait (barrier2): %d", ret); ret = pthread_join (thread, NULL); if (ret != 0) - FAIL ("pthread_join: %d", ret); + FAIL_EXIT1 ("pthread_join: %d", ret); return 0; } diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 097b5a26fc..b7ff9c5293 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -206,6 +206,7 @@ tests += \ tst-getauxval \ tst-gettid \ tst-gettid-kill \ + tst-linux-mremap1 \ tst-memfd_create \ tst-misalign-clone \ tst-mlock2 \ @@ -226,6 +227,7 @@ tests += \ tst-scm_rights \ tst-sigtimedwait \ tst-sync_file_range \ + tst-syscall-restart \ tst-sysconf-iov_max \ tst-sysvmsg-linux \ tst-sysvsem-linux \ @@ -637,7 +639,15 @@ install-bin += \ # install-bin $(objpfx)pldd: $(objpfx)xmalloc.o + +test-internal-extras += tst-nolink-libc +ifeq ($(run-built-tests),yes) +tests-special += \ + $(objpfx)tst-nolink-libc-1.out \ + $(objpfx)tst-nolink-libc-2.out \ + # tests-special endif +endif # $(subdir) == elf ifeq ($(subdir),rt) CFLAGS-mq_send.c += -fexceptions diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S index 0e7ee24e68..4fb9f02792 100644 --- a/sysdeps/unix/sysv/linux/aarch64/clone.S +++ b/sysdeps/unix/sysv/linux/aarch64/clone.S @@ -51,6 +51,9 @@ ENTRY(__clone) and x1, x1, -16 cbz x1, .Lsyscall_error + /* Clear ZA state of SME. */ + CALL_LIBC_ARM_ZA_DISABLE + /* Do the system call. */ /* X0:flags, x1:newsp, x2:parenttidptr, x3:newtls, x4:childtid. */ mov x0, x2 /* flags */ diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S index e28aaa5083..17a40af916 100644 --- a/sysdeps/unix/sysv/linux/aarch64/clone3.S +++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S @@ -50,6 +50,9 @@ ENTRY(__clone3) cbz x10, .Lsyscall_error /* No NULL cl_args pointer. */ cbz x2, .Lsyscall_error /* No NULL function pointer. */ + /* Clear ZA state of SME. */ + CALL_LIBC_ARM_ZA_DISABLE + /* Do the system call, the kernel expects: x8: system call number x0: cl_args diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c index c0b047bc0d..0ad55a0c7f 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c @@ -23,6 +23,7 @@ #include #include #include +#include #define DCZID_DZP_MASK (1 << 4) #define DCZID_BS_MASK (0xf) diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S index ba659438c5..1be9be2c27 100644 --- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S @@ -49,25 +49,8 @@ ENTRY (__setcontext) cbz x0, 1f b C_SYMBOL_NAME (__syscall_error) 1: - /* Disable ZA of SME. */ -#if HAVE_AARCH64_PAC_RET - PACIASP - cfi_window_save -#endif - stp x29, x30, [sp, -16]! - cfi_adjust_cfa_offset (16) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - mov x29, sp - bl __libc_arm_za_disable - ldp x29, x30, [sp], 16 - cfi_adjust_cfa_offset (-16) - cfi_restore (x29) - cfi_restore (x30) -#if HAVE_AARCH64_PAC_RET - AUTIASP - cfi_window_save -#endif + /* Clear ZA state of SME. */ + CALL_LIBC_ARM_ZA_DISABLE /* Restore the general purpose registers. */ mov x0, x9 cfi_def_cfa (x0, 0) diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h index bbbe35723c..dd98242dad 100644 --- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h +++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h @@ -150,6 +150,19 @@ mov x8, SYS_ify (syscall_name); \ svc 0 +/* Clear ZA state of SME (ASM version). */ +/* The __libc_arm_za_disable function has special calling convention + that allows to call it without stack manipulation and preserving + most of the registers. */ + .macro CALL_LIBC_ARM_ZA_DISABLE + cfi_remember_state + mov x13, x30 + cfi_register(x30, x13) + bl __libc_arm_za_disable + mov x30, x13 + cfi_restore_state + .endm + #else /* not __ASSEMBLER__ */ # ifdef __LP64__ @@ -234,6 +247,32 @@ #undef HAVE_INTERNAL_BRK_ADDR_SYMBOL #define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1 +/* Clear ZA state of SME (C version). */ +/* The __libc_arm_za_disable function has special calling convention + that allows to call it without stack manipulation and preserving + most of the registers. */ +#define CALL_LIBC_ARM_ZA_DISABLE() \ +({ \ + unsigned long int __tmp; \ + asm volatile ( \ + " .cfi_remember_state\n" \ + " mov %0, x30\n" \ + " .cfi_register x30, %0\n" \ + " bl __libc_arm_za_disable\n" \ + " mov x30, %0\n" \ + " .cfi_restore_state\n" \ + : "=r" (__tmp) \ + : \ + : "x14", "x15", "x16", "x17", "x18", "memory" ); \ +}) + +/* Do clear ZA state of SME before making normal clone syscall. */ +#define INLINE_CLONE_SYSCALL(a0, a1, a2, a3, a4) \ +({ \ + CALL_LIBC_ARM_ZA_DISABLE (); \ + INLINE_SYSCALL_CALL (clone, a0, a1, a2, a3, a4); \ +}) + #endif /* __ASSEMBLER__ */ #endif /* linux/aarch64/sysdep.h */ diff --git a/sysdeps/unix/sysv/linux/aarch64/vfork.S b/sysdeps/unix/sysv/linux/aarch64/vfork.S index e71e492da3..65fa85ae89 100644 --- a/sysdeps/unix/sysv/linux/aarch64/vfork.S +++ b/sysdeps/unix/sysv/linux/aarch64/vfork.S @@ -27,6 +27,9 @@ ENTRY (__vfork) + /* Clear ZA state of SME. */ + CALL_LIBC_ARM_ZA_DISABLE + mov x0, #0x4111 /* CLONE_VM | CLONE_VFORK | SIGCHLD */ mov x1, sp DO_CALL (clone, 2) diff --git a/sysdeps/unix/sysv/linux/arm/Makefile b/sysdeps/unix/sysv/linux/arm/Makefile index a73c897f43..e73ce4f811 100644 --- a/sysdeps/unix/sysv/linux/arm/Makefile +++ b/sysdeps/unix/sysv/linux/arm/Makefile @@ -1,5 +1,8 @@ ifeq ($(subdir),elf) sysdep-rtld-routines += aeabi_read_tp libc-do-syscall +# The test uses INTERNAL_SYSCALL_CALL. In thumb mode, this uses +# an undefined reference to __libc_do_syscall. +CFLAGS-tst-nolink-libc.c += -marm endif ifeq ($(subdir),misc) diff --git a/sysdeps/unix/sysv/linux/hppa/clone.S b/sysdeps/unix/sysv/linux/hppa/clone.S index a31afea429..e85e7f517f 100644 --- a/sysdeps/unix/sysv/linux/hppa/clone.S +++ b/sysdeps/unix/sysv/linux/hppa/clone.S @@ -59,16 +59,6 @@ .text ENTRY(__clone) - /* Prologue */ - stwm %r4, 64(%sp) - .cfi_def_cfa_offset -64 - .cfi_offset 4, 0 - stw %sp, -4(%sp) -#ifdef PIC - stw %r19, -32(%sp) - .cfi_offset 19, 32 -#endif - /* Sanity check arguments. */ comib,=,n 0,%arg0,.LerrorSanity /* no NULL function pointers */ comib,=,n 0,%arg1,.LerrorSanity /* no NULL stack pointers */ @@ -87,54 +77,33 @@ ENTRY(__clone) /* User stack pointer is in the correct register already */ /* Load args from stack... */ - ldw -116(%sp), %r24 /* Load parent_tidptr */ - ldw -120(%sp), %r23 /* Load newtls */ - ldw -124(%sp), %r22 /* Load child_tidptr */ - - /* Save the PIC register. */ -#ifdef PIC - copy %r19, %r4 /* parent */ -#endif + ldw -52(%sp), %r24 /* Load parent_tidptr */ + ldw -56(%sp), %r23 /* Load newtls */ + ldw -60(%sp), %r22 /* Load child_tidptr */ /* Do the system call */ ble 0x100(%sr2, %r0) ldi __NR_clone, %r20 ldi -4096, %r1 - comclr,>>= %r1, %ret0, %r0 /* Note: unsigned compare. */ - b,n .LerrorRest - - /* Restore the PIC register. */ -#ifdef PIC - copy %r4, %r19 /* parent */ -#endif - + comb,<<,n %r1, %ret0, .LerrorRest /* Note: unsigned compare. */ comib,=,n 0, %ret0, .LthreadStart + bv,n %r0(%rp) - /* Successful return from the parent - No need to restore the PIC register, - since we return immediately. */ - - ldw -84(%sp), %rp - bv %r0(%rp) - ldwm -64(%sp), %r4 +.LerrorSanity: + /* Sanity checks failed, set errno to EINVAL. */ + ldi -EINVAL, %ret0 .LerrorRest: - /* Something bad happened -- no child created */ + /* Something bad happened -- no child created -- need a frame */ + ldo 64(%sp),%sp + .cfi_def_cfa_offset -64 bl __syscall_error, %rp sub %r0, %ret0, %arg0 ldw -84(%sp), %rp /* Return after setting errno, ret0 is set to -1 by __syscall_error. */ bv %r0(%rp) - ldwm -64(%sp), %r4 - -.LerrorSanity: - /* Sanity checks failed, return -1, and set errno to EINVAL. */ - bl __syscall_error, %rp - ldi EINVAL, %arg0 - ldw -84(%sp), %rp - bv %r0(%rp) - ldwm -64(%sp), %r4 + ldo -64(%sp), %sp .LthreadStart: /* Load up the arguments. */ @@ -144,20 +113,14 @@ ENTRY(__clone) /* $$dyncall fixes child's PIC register */ /* Call the user's function */ -#ifdef PIC - copy %r19, %r4 -#endif bl $$dyncall, %r31 copy %r31, %rp -#ifdef PIC - copy %r4, %r19 -#endif copy %r28, %r26 ble 0x100(%sr2, %r0) ldi __NR_exit, %r20 /* We should not return from exit. - We do not restore r4, or the stack state. */ + We do not restore the stack state. */ iitlbp %r0, (%sr0, %r0) PSEUDO_END(__clone) diff --git a/sysdeps/unix/sysv/linux/hppa/vfork.S b/sysdeps/unix/sysv/linux/hppa/vfork.S new file mode 100644 index 0000000000..5fd368f3cf --- /dev/null +++ b/sysdeps/unix/sysv/linux/hppa/vfork.S @@ -0,0 +1,53 @@ +/* Copyright (C) 1999-2024 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#define _ERRNO_H 1 +#include + +/* Clone the calling process, but without copying the whole address space. + The calling process is suspended until the new process exits or is + replaced by a call to `execve'. Return -1 for errors, 0 to the new process, + and the process ID of the new process to the old process. */ + +ENTRY (__vfork) + ble 0x100(%sr2, %r0) + ldi __NR_vfork, %r20 + + ldi -4096, %r1 + comclr,<< %r1, %ret0, %r0 /* Note: unsigned compare. */ + bv,n %r0(%rp) + + /* Something bad happened -- no child created -- we need a frame */ + ldo 64(%sp), %sp + .cfi_def_cfa_offset -64 + + /* Set errno */ + bl __syscall_error, %rp + sub %r0, %ret0, %arg0 + + /* ret0 is set to -1 by __syscall_error */ + ldw -84(%sp), %rp + bv %r0(%rp) + ldo -64(%sp), %sp + +PSEUDO_END (__vfork) +libc_hidden_def (__vfork) + +weak_alias (__vfork, vfork) +strong_alias (__vfork, __libc_vfork) diff --git a/sysdeps/unix/sysv/linux/mremap-failure.h b/sysdeps/unix/sysv/linux/mremap-failure.h new file mode 100644 index 0000000000..c99ab30ca9 --- /dev/null +++ b/sysdeps/unix/sysv/linux/mremap-failure.h @@ -0,0 +1,30 @@ +/* mremap failure handling. Linux version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* Return exit value on mremap failure with errno ERR. */ + +static int +mremap_failure_exit (int err) +{ + if (err != EINVAL) + return EXIT_FAILURE; + + return EXIT_UNSUPPORTED; +} diff --git a/sysdeps/unix/sysv/linux/mremap.c b/sysdeps/unix/sysv/linux/mremap.c index 4f770799c4..1ada5c1f40 100644 --- a/sysdeps/unix/sysv/linux/mremap.c +++ b/sysdeps/unix/sysv/linux/mremap.c @@ -20,6 +20,12 @@ #include #include #include +#include + +#define MREMAP_KNOWN_BITS \ + (MREMAP_MAYMOVE \ + | MREMAP_FIXED \ + | MREMAP_DONTUNMAP) void * __mremap (void *addr, size_t old_len, size_t new_len, int flags, ...) @@ -27,7 +33,13 @@ __mremap (void *addr, size_t old_len, size_t new_len, int flags, ...) va_list va; void *new_addr = NULL; - if (flags & MREMAP_FIXED) + if (flags & ~(MREMAP_KNOWN_BITS)) + { + __set_errno (EINVAL); + return MAP_FAILED; + } + + if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) { va_start (va, flags); new_addr = va_arg (va, void *); diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h index 7ea935b4ad..76de2b7ff0 100644 --- a/sysdeps/unix/sysv/linux/rseq-internal.h +++ b/sysdeps/unix/sysv/linux/rseq-internal.h @@ -51,11 +51,21 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) /* The initial implementation used only 20 bytes out of 32, but still expected size 32. */ size = RSEQ_AREA_SIZE_INITIAL; + + /* Initialize the whole rseq area to zero prior to registration. */ + memset (&self->rseq_area, 0, size); + + /* Set the cpu_id field to RSEQ_CPU_ID_UNINITIALIZED, this is checked by + the kernel at registration when CONFIG_DEBUG_RSEQ is enabled. */ + THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); + int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area, size, 0, RSEQ_SIG); if (!INTERNAL_SYSCALL_ERROR_P (ret)) return true; } + /* When rseq is disabled by tunables or the registration fails, inform + userspace by setting 'cpu_id' to RSEQ_CPU_ID_REGISTRATION_FAILED. */ THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); return false; } diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S index 748d25fcfe..c84244f56b 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/clone.S @@ -28,6 +28,13 @@ .text ENTRY (__clone) save %sp,-96,%sp + save %sp,-96,%sp +#ifdef __sparcv9 + flushw +#else + ta 3 +#endif + restore cfi_def_cfa_register(%fp) cfi_window_save cfi_register(%o7, %i7) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h index d2d68f5312..c2ffbb5c8f 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h @@ -107,6 +107,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x10;" \ "bcc 1f;" \ " nop;" \ @@ -114,7 +115,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S b/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S index e5ff2cf1a0..370d51fda2 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/clone.S @@ -32,6 +32,9 @@ ENTRY (__clone) save %sp, -192, %sp + save %sp, -192, %sp + flushw + restore cfi_def_cfa_register(%fp) cfi_window_save cfi_register(%o7, %i7) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h index 96047424e9..5598fab08a 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h @@ -106,6 +106,7 @@ ENTRY(name); \ #else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ + "mov %[scn], %%g1;" \ "ta 0x6d;" \ "bcc,pt %%xcc, 1f;" \ " nop;" \ @@ -113,7 +114,7 @@ ENTRY(name); \ "1:" #define __SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "g1", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ diff --git a/sysdeps/unix/sysv/linux/sparc/sysdep.h b/sysdeps/unix/sysv/linux/sparc/sysdep.h index dcabb57fe2..c287740a8c 100644 --- a/sysdeps/unix/sysv/linux/sparc/sysdep.h +++ b/sysdeps/unix/sysv/linux/sparc/sysdep.h @@ -50,97 +50,109 @@ #undef INTERNAL_SYSCALL_NCS #define INTERNAL_SYSCALL_NCS(name, nr, args...) \ - internal_syscall##nr(__SYSCALL_STRING, name, args) + _internal_syscall##nr(__SYSCALL_STRING, "p", name, args) -#define internal_syscall0(string,name,dummy...) \ +#define _internal_syscall0(string,nc,name,dummy...) \ ({ \ - register long int __g1 __asm__ ("g1") = (name); \ register long __o0 __asm__ ("o0"); \ + long int _name = (long int) (name); \ __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall0(string,name,args...) \ + _internal_syscall0(string, "i", name, args) -#define internal_syscall1(string,name,arg1) \ +#define _internal_syscall1(string,nc,name,arg1) \ ({ \ long int _arg1 = (long int) (arg1); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall1(string,name,args...) \ + _internal_syscall1(string, "i", name, args) -#define internal_syscall2(string,name,arg1,arg2) \ +#define _internal_syscall2(string,nc,name,arg1,arg2) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1) : \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall2(string,name,args...) \ + _internal_syscall2(string, "i", name, args) -#define internal_syscall3(string,name,arg1,arg2,arg3) \ +#define _internal_syscall3(string,nc,name,arg1,arg2,arg3) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall3(string,name,args...) \ + _internal_syscall3(string, "i", name, args) -#define internal_syscall4(string,name,arg1,arg2,arg3,arg4) \ +#define _internal_syscall4(string,nc,name,arg1,arg2,arg3,arg4) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall4(string,name,args...) \ + _internal_syscall4(string, "i", name, args) -#define internal_syscall5(string,name,arg1,arg2,arg3,arg4,arg5) \ +#define _internal_syscall5(string,nc,name,arg1,arg2,arg3,arg4,arg5) \ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall5(string,name,args...) \ + _internal_syscall5(string, "i", name, args) -#define internal_syscall6(string,name,arg1,arg2,arg3,arg4,arg5,arg6) \ +#define _internal_syscall6(string,nc,name,arg1,arg2,arg3,arg4,arg5,arg6)\ ({ \ long int _arg1 = (long int) (arg1); \ long int _arg2 = (long int) (arg2); \ @@ -148,20 +160,22 @@ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ long int _arg6 = (long int) (arg6); \ - register long int __g1 __asm__("g1") = (name); \ + long int _name = (long int) (name); \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ register long int __o5 __asm__ ("o5") = _arg6; \ - __asm __volatile (string : "=r" (__o0) : \ - "r" (__g1), "0" (__o0), "r" (__o1), \ + __asm __volatile (string : "+r" (__o0) : \ + [scn] nc (_name), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4), \ "r" (__o5) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) +#define internal_syscall6(string,name,args...) \ + _internal_syscall6(string, "i", name, args) #define INLINE_CLONE_SYSCALL(arg1,arg2,arg3,arg4,arg5) \ ({ \ @@ -170,15 +184,15 @@ long int _arg3 = (long int) (arg3); \ long int _arg4 = (long int) (arg4); \ long int _arg5 = (long int) (arg5); \ + long int _name = __NR_clone; \ register long int __o0 __asm__ ("o0") = _arg1; \ register long int __o1 __asm__ ("o1") = _arg2; \ register long int __o2 __asm__ ("o2") = _arg3; \ register long int __o3 __asm__ ("o3") = _arg4; \ register long int __o4 __asm__ ("o4") = _arg5; \ - register long int __g1 __asm__ ("g1") = __NR_clone; \ __asm __volatile (__SYSCALL_STRING : \ "=r" (__o0), "=r" (__o1) : \ - "r" (__g1), "0" (__o0), "1" (__o1), \ + [scn] "i" (_name), "0" (__o0), "1" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ if (__glibc_unlikely ((unsigned long int) (__o0) > -4096UL)) \ diff --git a/sysdeps/unix/sysv/linux/tst-linux-mremap1.c b/sysdeps/unix/sysv/linux/tst-linux-mremap1.c new file mode 100644 index 0000000000..408e8af2ab --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-linux-mremap1.c @@ -0,0 +1,63 @@ +/* Test mremap with MREMAP_DONTUNMAP. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + size_t old_size = getpagesize (); + size_t new_size = old_size; + char *old_addr = xmmap (NULL, old_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + old_addr[0] = 1; + old_addr[old_size - 1] = 2; + + /* Create an available 64-page mmap region. */ + size_t fixed_size = old_size * 64; + char *fixed_addr = xmmap (NULL, fixed_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + xmunmap (fixed_addr, fixed_size); + + /* Add 3 * pagesize. */ + fixed_size += 3 * old_size; + + /* Test MREMAP_DONTUNMAP. It should return FIXED_ADDR created above. */ + char *new_addr = mremap (old_addr, old_size, new_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE, + fixed_addr); + if (new_addr == MAP_FAILED) + return mremap_failure_exit (errno); + TEST_VERIFY_EXIT (fixed_addr == new_addr); + old_addr[0] = 3; + old_addr[old_size - 1] = 4; + new_addr[0] = 1; + new_addr[new_size - 1] = 2; + xmunmap (new_addr, new_size); + xmunmap (old_addr, old_size); + + return 0; +} + +#include diff --git a/sysdeps/unix/sysv/linux/tst-nolink-libc.c b/sysdeps/unix/sysv/linux/tst-nolink-libc.c new file mode 100644 index 0000000000..817f37784b --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-nolink-libc.c @@ -0,0 +1,25 @@ +/* Test program not linked against libc.so and not using any glibc functions. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +void +_start (void) +{ + INTERNAL_SYSCALL_CALL (exit_group, 0); +} diff --git a/sysdeps/unix/sysv/linux/tst-syscall-restart.c b/sysdeps/unix/sysv/linux/tst-syscall-restart.c new file mode 100644 index 0000000000..0ee7dc8517 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-syscall-restart.c @@ -0,0 +1,112 @@ +/* Test if a syscall is correctly restarted. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +check_pid (pid_t pid) +{ + /* Wait until the child has called pause and it blocking on kernel. */ + support_process_state_wait (pid, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGSTOP), 0); + + /* Adding process_state_tracing_stop ('t') allows the test to work under + trace programs such as ptrace. */ + support_process_state_wait (pid, support_process_state_stopped + | support_process_state_tracing_stop); + + TEST_COMPARE (kill (pid, SIGCONT), 0); + + enum support_process_state state + = support_process_state_wait (pid, support_process_state_sleeping + | support_process_state_zombie); + + TEST_COMPARE (state, support_process_state_sleeping); + + TEST_COMPARE (kill (pid, SIGTERM), 0); + + siginfo_t info; + TEST_COMPARE (waitid (P_PID, pid, &info, WEXITED), 0); + TEST_COMPARE (info.si_signo, SIGCHLD); + TEST_COMPARE (info.si_code, CLD_KILLED); + TEST_COMPARE (info.si_status, SIGTERM); + TEST_COMPARE (info.si_pid, pid); + + return 0; +} + +static void * +tf (void *closure) +{ + pause (); + return NULL; +} + +static void +child_mt (void) +{ + /* Let only the created thread to handle signals. */ + sigset_t set; + sigfillset (&set); + xpthread_sigmask (SIG_BLOCK, &set, NULL); + + sigdelset (&set, SIGSTOP); + sigdelset (&set, SIGCONT); + sigdelset (&set, SIGTERM); + + pthread_attr_t attr; + xpthread_attr_init (&attr); + TEST_COMPARE (pthread_attr_setsigmask_np (&attr, &set), 0); + + xpthread_join (xpthread_create (&attr, tf, NULL)); +} + +static void +do_test_syscall (bool multithread) +{ + pid_t pid = xfork (); + if (pid == 0) + { + if (multithread) + child_mt (); + else + pause (); + _exit (127); + } + + check_pid (pid); +} + +static int +do_test (void) +{ + /* Check for both single and multi thread, since they use different syscall + mechanisms. */ + do_test_syscall (false); + do_test_syscall (true); + + return 0; +} + +#include diff --git a/sysdeps/unix/sysv/linux/x86_64/dl-cet.h b/sysdeps/unix/sysv/linux/x86_64/dl-cet.h index 1fe3133406..b4f7e6c9cd 100644 --- a/sysdeps/unix/sysv/linux/x86_64/dl-cet.h +++ b/sysdeps/unix/sysv/linux/x86_64/dl-cet.h @@ -92,9 +92,9 @@ dl_cet_ibt_enabled (void) # Pass GL(dl_x86_feature_1) to _dl_cet_setup_features.\n\ movl %edx, %edi\n\ # Align stack for the _dl_cet_setup_features call.\n\ - andq $-16, %rsp\n\ + and $-16, %" RSP_LP "\n\ call _dl_cet_setup_features\n\ # Restore %rax and %rsp from %r12 and %r13.\n\ - movq %r12, %rax\n\ - movq %r13, %rsp\n\ + mov %" R12_LP ", %" RAX_LP "\n\ + mov %" R13_LP ", %" RSP_LP "\n\ " diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h index 3040a47d72..df3e22236d 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h @@ -155,6 +155,7 @@ #define __NR_lsm_set_self_attr 1073742284 #define __NR_lstat 1073741830 #define __NR_madvise 1073741852 +#define __NR_map_shadow_stack 1073742277 #define __NR_mbind 1073742061 #define __NR_membarrier 1073742148 #define __NR_memfd_create 1073742143 diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/fixup-asm-unistd.h b/sysdeps/unix/sysv/linux/x86_64/x32/fixup-asm-unistd.h index 98124169e6..47fa8af4ce 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/fixup-asm-unistd.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/fixup-asm-unistd.h @@ -15,6 +15,10 @@ License along with the GNU C Library; if not, see . */ +#ifndef __NR_map_shadow_stack +# define __NR_map_shadow_stack 1073742277 +#endif + /* X32 uses the same 64-bit syscall interface for set_thread_area. */ #ifndef __NR_set_thread_area # define __NR_set_thread_area 1073742029 diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 5311b594af..c814060e08 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -4,7 +4,13 @@ endif ifeq ($(subdir),elf) sysdep_routines += get-cpuid-feature-leaf -sysdep-dl-routines += dl-get-cpu-features +sysdep-dl-routines += \ + dl-get-cpu-features \ + dl-tlsdesc \ + tls_get_addr \ + tlsdesc \ +# sysdep-dl-routines + sysdep_headers += \ bits/platform/features.h \ bits/platform/x86.h \ @@ -21,6 +27,9 @@ tests += \ tst-cpu-features-supports-static \ tst-get-cpu-features \ tst-get-cpu-features-static \ + tst-gnu2-tls2-x86-noxsave \ + tst-gnu2-tls2-x86-noxsavec \ + tst-gnu2-tls2-x86-noxsavexsavec \ tst-hwcap-tunables \ # tests tests-static += \ @@ -91,6 +100,42 @@ CFLAGS-tst-gnu2-tls2.c += -msse CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell + +LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy +LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy +LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy + +# Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled +# via tunable. +tst-gnu2-tls2-x86-noxsave-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE +tst-gnu2-tls2-x86-noxsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC +tst-gnu2-tls2-x86-noxsavexsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE,-XSAVEC +$(objpfx)tst-gnu2-tls2-x86-noxsave: $(shared-thread-library) +$(objpfx)tst-gnu2-tls2-x86-noxsavec: $(shared-thread-library) +$(objpfx)tst-gnu2-tls2-x86-noxsavexsavec: $(shared-thread-library) +$(objpfx)tst-gnu2-tls2-x86-noxsave.out \ +$(objpfx)tst-gnu2-tls2-x86-noxsavec.out \ +$(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ + $(objpfx)tst-gnu2-tls2mod0.so \ + $(objpfx)tst-gnu2-tls2mod1.so \ + $(objpfx)tst-gnu2-tls2mod2.so + +CFLAGS-tst-tls23.c += -msse2 +CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell + +LDFLAGS-tst-tls23 += -rdynamic +tst-tls23-mod.so-no-z-defs = yes + +$(objpfx)tst-tls23-mod.so: $(libsupport) + +tests-special += $(objpfx)check-gnu2-tls.out + +$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so + LC_ALL=C $(READELF) -V -W $< \ + | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ + | grep GLIBC_ABI_GNU2_TLS > $@; \ + $(evaluate-test) +generated += check-gnu2-tls.out endif ifeq ($(subdir),math) diff --git a/sysdeps/x86/Versions b/sysdeps/x86/Versions index 4b10c4b5d7..e8dcfccbe4 100644 --- a/sysdeps/x86/Versions +++ b/sysdeps/x86/Versions @@ -7,4 +7,9 @@ libc { GLIBC_2.33 { __x86_get_cpuid_feature_leaf; } + GLIBC_ABI_GNU2_TLS { + # This symbol is used only for empty version map and will be removed + # by scripts/versions.awk. + __placeholder_only_for_empty_version_map; + } } diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index c096dd390a..89041e6e05 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -24,6 +24,7 @@ #include #include #include +#include extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden; @@ -83,6 +84,8 @@ extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *) # include #endif +unsigned long int _dl_x86_features_tlsdesc_state_size; + static void update_active (struct cpu_features *cpu_features) { @@ -317,17 +320,13 @@ update_active (struct cpu_features *cpu_features) = xsave_state_full_size; cpu_features->xsave_state_full_size = xsave_state_full_size; + _dl_x86_features_tlsdesc_state_size = xsave_state_full_size; /* Check if XSAVEC is available. */ if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC)) { - unsigned int xstate_comp_offsets[32]; - unsigned int xstate_comp_sizes[32]; -#ifdef __x86_64__ - unsigned int xstate_amx_comp_offsets[32]; - unsigned int xstate_amx_comp_sizes[32]; - unsigned int amx_ecx; -#endif + unsigned int xstate_comp_offsets[X86_XSTATE_MAX_ID + 1]; + unsigned int xstate_comp_sizes[X86_XSTATE_MAX_ID + 1]; unsigned int i; xstate_comp_offsets[0] = 0; @@ -335,39 +334,16 @@ update_active (struct cpu_features *cpu_features) xstate_comp_offsets[2] = 576; xstate_comp_sizes[0] = 160; xstate_comp_sizes[1] = 256; -#ifdef __x86_64__ - xstate_amx_comp_offsets[0] = 0; - xstate_amx_comp_offsets[1] = 160; - xstate_amx_comp_offsets[2] = 576; - xstate_amx_comp_sizes[0] = 160; - xstate_amx_comp_sizes[1] = 256; -#endif - for (i = 2; i < 32; i++) + for (i = 2; i <= X86_XSTATE_MAX_ID; i++) { if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0) { __cpuid_count (0xd, i, eax, ebx, ecx, edx); -#ifdef __x86_64__ - /* Include this in xsave_state_full_size. */ - amx_ecx = ecx; - xstate_amx_comp_sizes[i] = eax; - if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0) - { - /* Exclude this from xsave_state_size. */ - ecx = 0; - xstate_comp_sizes[i] = 0; - } - else -#endif - xstate_comp_sizes[i] = eax; + xstate_comp_sizes[i] = eax; } else { -#ifdef __x86_64__ - amx_ecx = 0; - xstate_amx_comp_sizes[i] = 0; -#endif ecx = 0; xstate_comp_sizes[i] = 0; } @@ -376,44 +352,32 @@ update_active (struct cpu_features *cpu_features) { xstate_comp_offsets[i] = (xstate_comp_offsets[i - 1] - + xstate_comp_sizes[i -1]); + + xstate_comp_sizes[i - 1]); if ((ecx & (1 << 1)) != 0) xstate_comp_offsets[i] = ALIGN_UP (xstate_comp_offsets[i], 64); -#ifdef __x86_64__ - xstate_amx_comp_offsets[i] - = (xstate_amx_comp_offsets[i - 1] - + xstate_amx_comp_sizes[i - 1]); - if ((amx_ecx & (1 << 1)) != 0) - xstate_amx_comp_offsets[i] - = ALIGN_UP (xstate_amx_comp_offsets[i], - 64); -#endif } } /* Use XSAVEC. */ unsigned int size - = xstate_comp_offsets[31] + xstate_comp_sizes[31]; + = (xstate_comp_offsets[X86_XSTATE_MAX_ID] + + xstate_comp_sizes[X86_XSTATE_MAX_ID]); if (size) { + size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, + 64); #ifdef __x86_64__ - unsigned int amx_size - = (xstate_amx_comp_offsets[31] - + xstate_amx_comp_sizes[31]); - amx_size - = ALIGN_UP ((amx_size - + TLSDESC_CALL_REGISTER_SAVE_AREA), - 64); - /* Set xsave_state_full_size to the compact AMX - state size for XSAVEC. NB: xsave_state_full_size - is only used in _dl_tlsdesc_dynamic_xsave and - _dl_tlsdesc_dynamic_xsavec. */ - cpu_features->xsave_state_full_size = amx_size; + _dl_x86_features_tlsdesc_state_size = size; + /* Exclude the AMX space from the start of TILECFG + space to the end of TILEDATA space. If CPU + doesn't support AMX, TILECFG offset is the same + as TILEDATA + 1 offset. Otherwise, they are + multiples of 64. */ + size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] + - xstate_comp_offsets[X86_XSTATE_TILECFG_ID]); #endif - cpu_features->xsave_state_size - = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, - 64); + cpu_features->xsave_state_size = size; CPU_FEATURE_SET (cpu_features, XSAVEC); } } @@ -538,8 +502,8 @@ _Static_assert (((index_arch_Fast_Unaligned_Load "Incorrect index_arch_Fast_Unaligned_Load"); -/* Intel Family-6 microarch list. */ -enum +/* Intel microarch list. */ +enum intel_microarch { /* Atom processors. */ INTEL_ATOM_BONNELL, @@ -548,6 +512,7 @@ enum INTEL_ATOM_GOLDMONT, INTEL_ATOM_GOLDMONT_PLUS, INTEL_ATOM_SIERRAFOREST, + INTEL_ATOM_CLEARWATERFOREST, INTEL_ATOM_GRANDRIDGE, INTEL_ATOM_TREMONT, @@ -575,7 +540,11 @@ enum INTEL_BIGCORE_METEORLAKE, INTEL_BIGCORE_LUNARLAKE, INTEL_BIGCORE_ARROWLAKE, + INTEL_BIGCORE_PANTHERLAKE, INTEL_BIGCORE_GRANITERAPIDS, + INTEL_BIGCORE_DIAMONDRAPIDS, + INTEL_BIGCORE_WILDCATLAKE, + INTEL_BIGCORE_NOVALAKE, /* Mixed (bigcore + atom SOC). */ INTEL_MIXED_LAKEFIELD, @@ -589,7 +558,7 @@ enum INTEL_UNKNOWN, }; -static unsigned int +static enum intel_microarch intel_get_fam6_microarch (unsigned int model, __attribute__ ((unused)) unsigned int stepping) { @@ -620,6 +589,8 @@ intel_get_fam6_microarch (unsigned int model, return INTEL_ATOM_GOLDMONT_PLUS; case 0xAF: return INTEL_ATOM_SIERRAFOREST; + case 0xDD: + return INTEL_ATOM_CLEARWATERFOREST; case 0xB6: return INTEL_ATOM_GRANDRIDGE; case 0x86: @@ -727,8 +698,14 @@ intel_get_fam6_microarch (unsigned int model, return INTEL_BIGCORE_METEORLAKE; case 0xbd: return INTEL_BIGCORE_LUNARLAKE; + case 0xb5: + case 0xc5: case 0xc6: return INTEL_BIGCORE_ARROWLAKE; + case 0xCC: + return INTEL_BIGCORE_PANTHERLAKE; + case 0xD5: + return INTEL_BIGCORE_WILDCATLAKE; case 0xAD: case 0xAE: return INTEL_BIGCORE_GRANITERAPIDS; @@ -756,6 +733,12 @@ init_cpu_features (struct cpu_features *cpu_features) unsigned int stepping = 0; enum cpu_features_kind kind; + /* Default is avoid non-temporal memset for non Intel/AMD hardware. This is, + as of writing this, we only have benchmarks indicatings it profitability + on Intel/AMD. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + |= bit_arch_Avoid_Non_Temporal_Memset; + cpu_features->cachesize_non_temporal_divisor = 4; #if !HAS_CPUID if (__get_cpuid_max (0, 0) == 0) @@ -781,132 +764,25 @@ init_cpu_features (struct cpu_features *cpu_features) update_active (cpu_features); + /* Benchmarks indicate non-temporal memset can be profitable on Intel + hardware. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + &= ~bit_arch_Avoid_Non_Temporal_Memset; + + enum intel_microarch microarch = INTEL_UNKNOWN; if (family == 0x06) { model += extended_model; - unsigned int microarch - = intel_get_fam6_microarch (model, stepping); + microarch = intel_get_fam6_microarch (model, stepping); + /* Disable TSX on some processors to avoid TSX on kernels that + weren't updated with the latest microcode package (which + disables broken feature by default). */ switch (microarch) { - /* Atom / KNL tuning. */ - case INTEL_ATOM_BONNELL: - /* BSF is slow on Bonnell. */ - cpu_features->preferred[index_arch_Slow_BSF] - |= bit_arch_Slow_BSF; - break; - - /* Unaligned load versions are faster than SSSE3 - on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ - case INTEL_ATOM_AIRMONT: - case INTEL_ATOM_SILVERMONT: - case INTEL_ATOM_GOLDMONT: - case INTEL_ATOM_GOLDMONT_PLUS: - - /* Knights Landing. Enable Silvermont optimizations. */ - case INTEL_KNIGHTS_LANDING: - - cpu_features->preferred[index_arch_Fast_Unaligned_Load] - |= (bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop - | bit_arch_Slow_SSE4_2); - break; - - case INTEL_ATOM_TREMONT: - /* Enable rep string instructions, unaligned load, unaligned - copy, pminub and avoid SSE 4.2 on Tremont. */ - cpu_features->preferred[index_arch_Fast_Rep_String] - |= (bit_arch_Fast_Rep_String - | bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop - | bit_arch_Slow_SSE4_2); - break; - - /* - Default tuned Knights microarch. - case INTEL_KNIGHTS_MILL: - */ - - /* - Default tuned atom microarch. - case INTEL_ATOM_SIERRAFOREST: - case INTEL_ATOM_GRANDRIDGE: - */ - - /* Bigcore/Default Tuning. */ default: - default_tuning: - /* Unknown family 0x06 processors. Assuming this is one - of Core i3/i5/i7 processors if AVX is available. */ - if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) - break; - - enable_modern_features: - /* Rep string instructions, unaligned load, unaligned copy, - and pminub are fast on Intel Core i3, i5 and i7. */ - cpu_features->preferred[index_arch_Fast_Rep_String] - |= (bit_arch_Fast_Rep_String - | bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop); break; - case INTEL_BIGCORE_NEHALEM: - case INTEL_BIGCORE_WESTMERE: - /* Older CPUs prefer non-temporal stores at lower threshold. */ - cpu_features->cachesize_non_temporal_divisor = 8; - goto enable_modern_features; - - /* Older Bigcore microarch (smaller non-temporal store - threshold). */ - case INTEL_BIGCORE_SANDYBRIDGE: - case INTEL_BIGCORE_IVYBRIDGE: - case INTEL_BIGCORE_HASWELL: - case INTEL_BIGCORE_BROADWELL: - cpu_features->cachesize_non_temporal_divisor = 8; - goto default_tuning; - - /* Newer Bigcore microarch (larger non-temporal store - threshold). */ - case INTEL_BIGCORE_SKYLAKE_AVX512: - case INTEL_BIGCORE_CANNONLAKE: - /* Benchmarks indicate non-temporal memset is not - necessarily profitable on SKX (and in some cases much - worse). This is likely unique to SKX due its it unique - mesh interconnect (not present on ICX or BWD). Disable - non-temporal on all Skylake servers. */ - cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] - |= bit_arch_Avoid_Non_Temporal_Memset; - case INTEL_BIGCORE_COMETLAKE: - case INTEL_BIGCORE_SKYLAKE: - case INTEL_BIGCORE_KABYLAKE: - case INTEL_BIGCORE_ICELAKE: - case INTEL_BIGCORE_TIGERLAKE: - case INTEL_BIGCORE_ROCKETLAKE: - case INTEL_BIGCORE_RAPTORLAKE: - case INTEL_BIGCORE_METEORLAKE: - case INTEL_BIGCORE_LUNARLAKE: - case INTEL_BIGCORE_ARROWLAKE: - case INTEL_BIGCORE_SAPPHIRERAPIDS: - case INTEL_BIGCORE_EMERALDRAPIDS: - case INTEL_BIGCORE_GRANITERAPIDS: - cpu_features->cachesize_non_temporal_divisor = 2; - goto default_tuning; - - /* Default tuned Mixed (bigcore + atom SOC). */ - case INTEL_MIXED_LAKEFIELD: - case INTEL_MIXED_ALDERLAKE: - cpu_features->cachesize_non_temporal_divisor = 2; - goto default_tuning; - } - - /* Disable TSX on some processors to avoid TSX on kernels that - weren't updated with the latest microcode package (which - disables broken feature by default). */ - switch (microarch) - { case INTEL_BIGCORE_SKYLAKE_AVX512: /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */ if (stepping <= 5) @@ -915,38 +791,176 @@ init_cpu_features (struct cpu_features *cpu_features) case INTEL_BIGCORE_KABYLAKE: /* NB: Although the errata documents that for model == 0x8e - (kabylake skylake client), only 0xb stepping or lower are - impacted, the intention of the errata was to disable TSX on - all client processors on all steppings. Include 0xc - stepping which is an Intel Core i7-8665U, a client mobile - processor. */ + (kabylake skylake client), only 0xb stepping or lower are + impacted, the intention of the errata was to disable TSX on + all client processors on all steppings. Include 0xc + stepping which is an Intel Core i7-8665U, a client mobile + processor. */ if (stepping > 0xc) break; /* Fall through. */ case INTEL_BIGCORE_SKYLAKE: - /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for - processors listed in: - -https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html - */ - disable_tsx: - CPU_FEATURE_UNSET (cpu_features, HLE); - CPU_FEATURE_UNSET (cpu_features, RTM); - CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); - break; + /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for + processors listed in: + + https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html + */ +disable_tsx: + CPU_FEATURE_UNSET (cpu_features, HLE); + CPU_FEATURE_UNSET (cpu_features, RTM); + CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); + break; case INTEL_BIGCORE_HASWELL: - /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working - TSX. Haswell also include other model numbers that have - working TSX. */ - if (model == 0x3f && stepping >= 4) + /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working + TSX. Haswell also includes other model numbers that have + working TSX. */ + if (model == 0x3f && stepping >= 4) break; - CPU_FEATURE_UNSET (cpu_features, RTM); - break; + CPU_FEATURE_UNSET (cpu_features, RTM); + break; } } + else if (family == 18) + switch (model) + { + case 0x01: + case 0x03: + microarch = INTEL_BIGCORE_NOVALAKE; + break; + default: + break; + } + else if (family == 19) + switch (model) + { + case 0x01: + microarch = INTEL_BIGCORE_DIAMONDRAPIDS; + break; + + default: + break; + } + + switch (microarch) + { + /* Atom / KNL tuning. */ + case INTEL_ATOM_BONNELL: + /* BSF is slow on Bonnell. */ + cpu_features->preferred[index_arch_Slow_BSF] + |= bit_arch_Slow_BSF; + break; + + /* Unaligned load versions are faster than SSSE3 + on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ + case INTEL_ATOM_AIRMONT: + case INTEL_ATOM_SILVERMONT: + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_PLUS: + + /* Knights Landing. Enable Silvermont optimizations. */ + case INTEL_KNIGHTS_LANDING: + + cpu_features->preferred[index_arch_Fast_Unaligned_Load] + |= (bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); + break; + + case INTEL_ATOM_TREMONT: + /* Enable rep string instructions, unaligned load, unaligned + copy, pminub and avoid SSE 4.2 on Tremont. */ + cpu_features->preferred[index_arch_Fast_Rep_String] + |= (bit_arch_Fast_Rep_String + | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); + break; + + /* + Default tuned Knights microarch. + case INTEL_KNIGHTS_MILL: + */ + + /* + Default tuned atom microarch. + case INTEL_ATOM_SIERRAFOREST: + case INTEL_ATOM_GRANDRIDGE: + case INTEL_ATOM_CLEARWATERFOREST: + */ + + /* Bigcore/Default Tuning. */ + default: + default_tuning: + /* Unknown Intel processors. Assuming this is one of Core + i3/i5/i7 processors if AVX is available. */ + if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) + break; + + enable_modern_features: + /* Rep string instructions, unaligned load, unaligned copy, + and pminub are fast on Intel Core i3, i5 and i7. */ + cpu_features->preferred[index_arch_Fast_Rep_String] + |= (bit_arch_Fast_Rep_String + | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop); + break; + + case INTEL_BIGCORE_NEHALEM: + case INTEL_BIGCORE_WESTMERE: + /* Older CPUs prefer non-temporal stores at lower threshold. */ + cpu_features->cachesize_non_temporal_divisor = 8; + goto enable_modern_features; + + /* Older Bigcore microarch (smaller non-temporal store + threshold). */ + case INTEL_BIGCORE_SANDYBRIDGE: + case INTEL_BIGCORE_IVYBRIDGE: + case INTEL_BIGCORE_HASWELL: + case INTEL_BIGCORE_BROADWELL: + cpu_features->cachesize_non_temporal_divisor = 8; + goto default_tuning; + + /* Newer Bigcore microarch (larger non-temporal store + threshold). */ + case INTEL_BIGCORE_SKYLAKE_AVX512: + case INTEL_BIGCORE_CANNONLAKE: + /* Benchmarks indicate non-temporal memset is not + necessarily profitable on SKX (and in some cases much + worse). This is likely unique to SKX due to its unique + mesh interconnect (not present on ICX or BWD). Disable + non-temporal on all Skylake servers. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + |= bit_arch_Avoid_Non_Temporal_Memset; + /* fallthrough */ + case INTEL_BIGCORE_COMETLAKE: + case INTEL_BIGCORE_SKYLAKE: + case INTEL_BIGCORE_KABYLAKE: + case INTEL_BIGCORE_ICELAKE: + case INTEL_BIGCORE_TIGERLAKE: + case INTEL_BIGCORE_ROCKETLAKE: + case INTEL_BIGCORE_RAPTORLAKE: + case INTEL_BIGCORE_METEORLAKE: + case INTEL_BIGCORE_LUNARLAKE: + case INTEL_BIGCORE_ARROWLAKE: + case INTEL_BIGCORE_PANTHERLAKE: + case INTEL_BIGCORE_WILDCATLAKE: + case INTEL_BIGCORE_NOVALAKE: + case INTEL_BIGCORE_SAPPHIRERAPIDS: + case INTEL_BIGCORE_EMERALDRAPIDS: + case INTEL_BIGCORE_GRANITERAPIDS: + case INTEL_BIGCORE_DIAMONDRAPIDS: + /* Default tuned Mixed (bigcore + atom SOC). */ + case INTEL_MIXED_LAKEFIELD: + case INTEL_MIXED_ALDERLAKE: + cpu_features->cachesize_non_temporal_divisor = 2; + goto default_tuning; + } /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER if AVX512ER is available. Don't use AVX512 to avoid lower CPU @@ -991,6 +1005,11 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx; + /* Benchmarks indicate non-temporal memset can be profitable on AMD + hardware. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + &= ~bit_arch_Avoid_Non_Temporal_Memset; + if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) { /* Since the FMA4 bit is in CPUID_INDEX_80000001 and @@ -1119,6 +1138,9 @@ no_cpuid: TUNABLE_CALLBACK (set_prefer_map_32bit_exec)); #endif + /* Do not add the logic to disable XSAVE/XSAVEC if this glibc build + requires AVX and therefore XSAVE or XSAVEC support. */ +#ifndef GCCMACRO__AVX__ bool disable_xsave_features = false; if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE)) @@ -1172,6 +1194,7 @@ no_cpuid: CPU_FEATURE_UNSET (cpu_features, FMA4); } +#endif #ifdef __x86_64__ GLRO(dl_hwcap) = HWCAP_X86_64; diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c index ae9dcd6180..a0b31d80f6 100644 --- a/sysdeps/x86/cpu-tunables.c +++ b/sysdeps/x86/cpu-tunables.c @@ -164,6 +164,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) /* Update xsave_state_size to XSAVE state size. */ cpu_features->xsave_state_size = cpu_features->xsave_state_full_size; + _dl_x86_features_tlsdesc_state_size + = cpu_features->xsave_state_full_size; CPU_FEATURE_UNSET (cpu_features, XSAVEC); } } @@ -248,6 +250,7 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, Avoid_Non_Temporal_Memset, 25); } + break; case 26: { CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index a1c03b8903..7b1b61c096 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -961,11 +961,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) non_temporal_threshold = maximum_non_temporal_threshold; /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */ - unsigned int minimum_rep_movsb_threshold; + unsigned long int minimum_rep_movsb_threshold; /* NB: The default REP MOVSB threshold is 4096 * (VEC_SIZE / 16) for VEC_SIZE == 64 or 32. For VEC_SIZE == 16, the default REP MOVSB threshold is 2048 * (VEC_SIZE / 16). */ - unsigned int rep_movsb_threshold; + unsigned long int rep_movsb_threshold; if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512)) { @@ -988,14 +988,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (CPU_FEATURE_USABLE_P (cpu_features, FSRM)) rep_movsb_threshold = 2112; - /* Non-temporal stores are more performant on Intel and AMD hardware above - non_temporal_threshold. Enable this for both Intel and AMD hardware. */ - unsigned long int memset_non_temporal_threshold = SIZE_MAX; - if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset) - && (cpu_features->basic.kind == arch_kind_intel - || cpu_features->basic.kind == arch_kind_amd)) - memset_non_temporal_threshold = non_temporal_threshold; - /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of cases slower than the vectorized path (and for some alignments, it is really slow, check BZ #30994). */ @@ -1017,6 +1009,13 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) if (tunable_size != 0) shared = tunable_size; + /* Non-temporal stores are more performant on some hardware above + non_temporal_threshold. Currently Prefer_Non_Temporal is set for for both + Intel and AMD hardware. */ + unsigned long int memset_non_temporal_threshold = SIZE_MAX; + if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset)) + memset_non_temporal_threshold = non_temporal_threshold; + tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL); if (tunable_size > minimum_non_temporal_threshold && tunable_size <= maximum_non_temporal_threshold) diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c index 49eeb5f70a..41100a908a 100644 --- a/sysdeps/x86/dl-diagnostics-cpu.c +++ b/sysdeps/x86/dl-diagnostics-cpu.c @@ -89,6 +89,8 @@ _dl_diagnostics_cpu (void) cpu_features->xsave_state_size); print_cpu_features_value ("xsave_state_full_size", cpu_features->xsave_state_full_size); + print_cpu_features_value ("tlsdesc_state_full_size", + _dl_x86_features_tlsdesc_state_size); print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size); print_cpu_features_value ("shared_cache_size", cpu_features->shared_cache_size); diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h index aaae44f0e1..03c71387dd 100644 --- a/sysdeps/x86/include/cpu-features.h +++ b/sysdeps/x86/include/cpu-features.h @@ -934,8 +934,6 @@ struct cpu_features /* The full state size for XSAVE when XSAVEC is disabled by GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC - - and the AMX state size when XSAVEC is available. */ unsigned int xsave_state_full_size; /* Data cache size for use in memory and string routines, typically @@ -989,6 +987,13 @@ extern const struct cpu_features *_dl_x86_get_cpu_features (void) #define __get_cpu_features() _dl_x86_get_cpu_features() +#if IS_IN (rtld) || IS_IN (libc) +/* XSAVE/XSAVEC state size used by TLS descriptors. Compared to + xsave_state_size from struct cpu_features, this includes additional + registers. */ +extern unsigned long int _dl_x86_features_tlsdesc_state_size attribute_hidden; +#endif + #if defined (_LIBC) && !IS_IN (nonlib) /* Unused for x86. */ # define INIT_ARCH() diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index 7359149e17..d5f5ec0ecc 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -102,6 +102,9 @@ | (1 << X86_XSTATE_ZMM_ID) \ | (1 << X86_XSTATE_APX_F_ID)) +/* The maximum supported xstate ID. */ +# define X86_XSTATE_MAX_ID X86_XSTATE_APX_F_ID + /* AMX state mask. */ # define AMX_STATE_SAVE_MASK \ ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID)) @@ -123,6 +126,9 @@ | (1 << X86_XSTATE_K_ID) \ | (1 << X86_XSTATE_ZMM_H_ID)) +/* The maximum supported xstate ID. */ +# define X86_XSTATE_MAX_ID X86_XSTATE_ZMM_H_ID + /* States to be included in xsave_state_size. */ # define FULL_STATE_SAVE_MASK STATE_SAVE_MASK #endif @@ -177,6 +183,29 @@ #define atom_text_section .section ".text.atom", "ax" +#ifndef DL_STACK_ALIGNMENT +/* Due to GCC bug: + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 + + __tls_get_addr may be called with 8-byte/4-byte stack alignment. + Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't + assume that stack will be always aligned at 16 bytes. */ +# ifdef __x86_64__ +# define DL_STACK_ALIGNMENT 8 +# define MINIMUM_ALIGNMENT 16 +# else +# define DL_STACK_ALIGNMENT 4 +# endif +#endif + +/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for + STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling + _dl_fixup/__tls_get_addr. */ +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ + || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) + #endif /* __ASSEMBLER__ */ #endif /* _X86_SYSDEP_H */ diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c new file mode 100644 index 0000000000..f0024c143d --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c new file mode 100644 index 0000000000..f0024c143d --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c new file mode 100644 index 0000000000..f0024c143d --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c new file mode 100644 index 0000000000..6130d91cf8 --- /dev/null +++ b/sysdeps/x86/tst-tls23.c @@ -0,0 +1,22 @@ +#ifndef __x86_64__ +#include + +#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) +#endif + +/* Set XMM0...XMM7 to all 1s. */ +#define PREPARE_MALLOC() \ +{ \ + asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ + asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ + asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ + asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ + asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ + asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ + asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ + asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ +} + +#include + +v2di v1, v2, v3; diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h new file mode 100644 index 0000000000..21cee4ca07 --- /dev/null +++ b/sysdeps/x86/tst-tls23.h @@ -0,0 +1,35 @@ +/* Test that __tls_get_addr preserves XMM registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +typedef long long v2di __attribute__((vector_size(16))); +extern v2di v1, v2, v3; + +#define BEFORE_TLS_CALL() \ + v1 = __extension__(v2di){0, 0}; \ + v2 = __extension__(v2di){0, 0}; + +#define AFTER_TLS_CALL() \ + v3 = __extension__(v2di){0, 0}; \ + asm volatile ("" : "+x" (v3)); \ + union { v2di x; long long a[2]; } u; \ + u.x = v3; \ + TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); + +#include diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index ce949dba27..27fd7ddea2 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -41,9 +41,6 @@ ifeq ($(subdir),elf) CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ -mno-mmx) -sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr - -tests += ifuncmain8 modules-names += ifuncmod8 $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so @@ -209,6 +206,15 @@ LDFLAGS-tst-plt-rewrite2 = -Wl,-z,now LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2 $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so + +tests-special += $(objpfx)check-dt-x86-64-plt.out + +$(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so + LC_ALL=C $(READELF) -V -W $< \ + | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ + | grep GLIBC_ABI_DT_X86_64_PLT > $@; \ + $(evaluate-test) +generated += check-dt-x86-64-plt.out endif test-internal-extras += tst-gnu2-tls2mod1 diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions index e94758b236..6a989ad3b3 100644 --- a/sysdeps/x86_64/Versions +++ b/sysdeps/x86_64/Versions @@ -5,6 +5,11 @@ libc { GLIBC_2.13 { __fentry__; } + GLIBC_ABI_DT_X86_64_PLT { + # This symbol is used only for empty version map and will be removed + # by scripts/versions.awk. + __placeholder_only_for_empty_version_map; + } } libm { GLIBC_2.1 { diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index a6de3793e4..4f12955875 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -139,37 +139,37 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], .globl _start\n\ .globl _dl_start_user\n\ _start:\n\ - movq %rsp, %rdi\n\ + mov %" RSP_LP ", %" RDI_LP "\n\ call _dl_start\n\ _dl_start_user:\n\ # Save the user entry point address in %r12.\n\ - movq %rax, %r12\n\ + mov %" RAX_LP ", %" R12_LP "\n\ # Save %rsp value in %r13.\n\ - movq %rsp, %r13\n\ + mov %" RSP_LP ", % " R13_LP "\n\ "\ RTLD_START_ENABLE_X86_FEATURES \ "\ # Read the original argument count.\n\ - movq (%rsp), %rdx\n\ + mov (%rsp), %" RDX_LP "\n\ # Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\ # argc -> rsi\n\ - movq %rdx, %rsi\n\ + mov %" RDX_LP ", %" RSI_LP "\n\ # And align stack for the _dl_init call. \n\ - andq $-16, %rsp\n\ + and $-16, %" RSP_LP "\n\ # _dl_loaded -> rdi\n\ - movq _rtld_local(%rip), %rdi\n\ + mov _rtld_local(%rip), %" RDI_LP "\n\ # env -> rcx\n\ - leaq 16(%r13,%rdx,8), %rcx\n\ + lea 2*" LP_SIZE "(%r13,%rdx," LP_SIZE "), %" RCX_LP "\n\ # argv -> rdx\n\ - leaq 8(%r13), %rdx\n\ + lea " LP_SIZE "(%r13), %" RDX_LP "\n\ # Clear %rbp to mark outermost frame obviously even for constructors.\n\ xorl %ebp, %ebp\n\ # Call the function to run the initializers.\n\ call _dl_init\n\ # Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\ - leaq _dl_fini(%rip), %rdx\n\ + lea _dl_fini(%rip), %" RDX_LP "\n\ # And make sure %rsp points to argc stored on the stack.\n\ - movq %r13, %rsp\n\ + mov %" R13_LP ", %" RSP_LP "\n\ # Jump to the user's entry point.\n\ jmp *%r12\n\ .previous\n\ @@ -234,8 +234,13 @@ elf_machine_plt_value (struct link_map *map, const ElfW(Rela) *reloc, /* Names of the architecture-specific auditing callback functions. */ +#ifdef __LP64__ #define ARCH_LA_PLTENTER x86_64_gnu_pltenter #define ARCH_LA_PLTEXIT x86_64_gnu_pltexit +#else +#define ARCH_LA_PLTENTER x32_gnu_pltenter +#define ARCH_LA_PLTEXIT x32_gnu_pltexit +#endif #endif /* !dl_machine_h */ diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h index 9f02cfc3eb..44d948696f 100644 --- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h +++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h @@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic: # endif #else /* Allocate stack space of the required size to save the state. */ - sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP + sub _dl_x86_features_tlsdesc_state_size(%rip), %RSP_LP #endif /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9, r10 and r11. */ diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S index 057a10862a..586079291a 100644 --- a/sysdeps/x86_64/dl-tlsdesc.S +++ b/sysdeps/x86_64/dl-tlsdesc.S @@ -22,7 +22,6 @@ #include #include #include "tlsdesc.h" -#include "dl-trampoline-save.h" /* Area on stack to save and restore registers used for parameter passing when calling _dl_tlsdesc_dynamic. */ diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 87c5137837..4c11fcf032 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -22,7 +22,6 @@ #include #include #include -#include "dl-trampoline-save.h" /* Area on stack to save and restore registers used for parameter passing when calling _dl_fixup. */ diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index cbe09d49f4..bc479b42d2 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -1,15 +1,18 @@ ifeq ($(subdir),math) CFLAGS-e_asin-fma.c = -mfma -mavx2 CFLAGS-e_atan2-fma.c = -mfma -mavx2 +CFLAGS-e_atanh-fma.c = -mfma -mavx2 CFLAGS-e_exp-fma.c = -mfma -mavx2 CFLAGS-e_log-fma.c = -mfma -mavx2 CFLAGS-e_log2-fma.c = -mfma -mavx2 CFLAGS-e_pow-fma.c = -mfma -mavx2 +CFLAGS-e_sinh-fma.c = -mfma -mavx2 CFLAGS-s_atan-fma.c = -mfma -mavx2 CFLAGS-s_expm1-fma.c = -mfma -mavx2 CFLAGS-s_log1p-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 +CFLAGS-s_tanh-fma.c = -mfma -mavx2 CFLAGS-s_sincos-fma.c = -mfma -mavx2 CFLAGS-e_exp2f-fma.c = -mfma -mavx2 @@ -55,6 +58,7 @@ libm-sysdep_routines += \ e_asin-fma \ e_atan2-avx \ e_atan2-fma \ + e_atanh-fma \ e_exp-avx \ e_exp-fma \ e_exp2f-fma \ @@ -66,6 +70,7 @@ libm-sysdep_routines += \ e_logf-fma \ e_pow-fma \ e_powf-fma \ + e_sinh-fma \ s_atan-avx \ s_atan-fma \ s_ceil-sse4_1 \ @@ -92,6 +97,7 @@ libm-sysdep_routines += \ s_sinf-sse2 \ s_tan-avx \ s_tan-fma \ + s_tanh-fma \ s_trunc-sse4_1 \ s_truncf-sse4_1 \ # libm-sysdep_routines diff --git a/sysdeps/x86_64/fpu/multiarch/e_atanh-fma.c b/sysdeps/x86_64/fpu/multiarch/e_atanh-fma.c new file mode 100644 index 0000000000..c3f2f9e550 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_atanh-fma.c @@ -0,0 +1,6 @@ +#define __ieee754_atanh __ieee754_atanh_fma +#define __log1p __log1p_fma + +#define SECTION __attribute__ ((section (".text.fma"))) + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/e_atanh.c b/sysdeps/x86_64/fpu/multiarch/e_atanh.c new file mode 100644 index 0000000000..d2b785dfc0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_atanh.c @@ -0,0 +1,34 @@ +/* Multiple versions of atanh. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include + +extern double __redirect_ieee754_atanh (double); + +# define SYMBOL_NAME ieee754_atanh +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_ieee754_atanh, __ieee754_atanh, IFUNC_SELECTOR ()); + +libm_alias_finite (__ieee754_atanh, __atanh) + +# define __ieee754_atanh __ieee754_atanh_sse2 +#endif +#include diff --git a/sysdeps/x86_64/fpu/multiarch/e_sinh-fma.c b/sysdeps/x86_64/fpu/multiarch/e_sinh-fma.c new file mode 100644 index 0000000000..e0e1e39a7a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_sinh-fma.c @@ -0,0 +1,12 @@ +#define __ieee754_sinh __ieee754_sinh_fma +#define __ieee754_exp __ieee754_exp_fma +#define __expm1 __expm1_fma + +/* NB: __expm1 may be expanded to __expm1_fma in the following + prototypes. */ +extern long double __expm1l (long double); +extern long double __expm1f128 (long double); + +#define SECTION __attribute__ ((section (".text.fma"))) + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/e_sinh.c b/sysdeps/x86_64/fpu/multiarch/e_sinh.c new file mode 100644 index 0000000000..3d3c18ccdf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_sinh.c @@ -0,0 +1,35 @@ +/* Multiple versions of sinh. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include + +extern double __redirect_ieee754_sinh (double); + +# define SYMBOL_NAME ieee754_sinh +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_ieee754_sinh, __ieee754_sinh, + IFUNC_SELECTOR ()); + +libm_alias_finite (__ieee754_sinh, __sinh) + +# define __ieee754_sinh __ieee754_sinh_sse2 +#endif +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c new file mode 100644 index 0000000000..1b808b1227 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c @@ -0,0 +1,11 @@ +#define __tanh __tanh_fma +#define __expm1 __expm1_fma + +/* NB: __expm1 may be expanded to __expm1_fma in the following + prototypes. */ +extern long double __expm1l (long double); +extern long double __expm1f128 (long double); + +#define SECTION __attribute__ ((section (".text.fma"))) + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh.c b/sysdeps/x86_64/fpu/multiarch/s_tanh.c new file mode 100644 index 0000000000..5539b6c61c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh.c @@ -0,0 +1,31 @@ +/* Multiple versions of tanh. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL + +extern double __redirect_tanh (double); + +# define SYMBOL_NAME tanh +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_tanh, __tanh, IFUNC_SELECTOR ()); + +# define __tanh __tanh_sse2 +#endif +#include diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 0bbb71bbbf..3db45db39b 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -922,7 +922,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (BMI2)), __wcsncpy_avx2) - X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy, + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy, 1, __wcsncpy_generic)) @@ -952,7 +952,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (BMI2)), __wcpncpy_avx2) - X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy, + X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy, 1, __wcpncpy_generic)) diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h index 6e20d9dd5a..727b69f95b 100644 --- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h +++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h @@ -35,7 +35,7 @@ IFUNC_SELECTOR (void) if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2) && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, - AVX_Fast_Unaligned_Load, !)) + AVX_Fast_Unaligned_Load,)) { if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)) { diff --git a/sysdeps/x86_64/multiarch/strchr-evex-base.S b/sysdeps/x86_64/multiarch/strchr-evex-base.S index 04e2c0e79e..3a0b7c9d64 100644 --- a/sysdeps/x86_64/multiarch/strchr-evex-base.S +++ b/sysdeps/x86_64/multiarch/strchr-evex-base.S @@ -124,13 +124,13 @@ L(page_cross): VPCMPNE %VMM(1), %VMM(0), %k1 VPTEST %VMM(1), %VMM(1), %k0{%k1} KMOV %k0, %VRAX -# ifdef USE_AS_WCSCHR + sar %cl, %VRAX +#ifdef USE_AS_WCSCHR sub $VEC_MATCH_MASK, %VRAX -# else +#else inc %VRAX -# endif +#endif /* Ignore number of character for alignment adjustment. */ - shr %cl, %VRAX jz L(align_more) bsf %VRAX, %VRAX diff --git a/sysdeps/x86_64/x32/dl-machine.h b/sysdeps/x86_64/x32/dl-machine.h deleted file mode 100644 index c35cee9261..0000000000 --- a/sysdeps/x86_64/x32/dl-machine.h +++ /dev/null @@ -1,76 +0,0 @@ -/* Machine-dependent ELF dynamic relocation inline functions. x32 version. - Copyright (C) 2012-2024 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* Must allow to be included more than once. - See #ifdef RESOLVE_MAP in sysdeps/x86_64/dl-machine.h. */ -#include - -#ifndef _X32_DL_MACHINE_H -#define _X32_DL_MACHINE_H - -#undef ARCH_LA_PLTENTER -#undef ARCH_LA_PLTEXIT -#undef RTLD_START - -/* Names of the architecture-specific auditing callback functions. */ -#define ARCH_LA_PLTENTER x32_gnu_pltenter -#define ARCH_LA_PLTEXIT x32_gnu_pltexit - -/* Initial entry point code for the dynamic linker. - The C function `_dl_start' is the real entry point; - its return value is the user program's entry point. */ -#define RTLD_START asm ("\n\ -.text\n\ - .p2align 4\n\ -.globl _start\n\ -.globl _dl_start_user\n\ -_start:\n\ - movl %esp, %edi\n\ - call _dl_start\n\ -_dl_start_user:\n\ - # Save the user entry point address in %r12.\n\ - movl %eax, %r12d\n\ - # Read the original argument count.\n\ - movl (%rsp), %edx\n\ - # Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\ - # argc -> rsi\n\ - movl %edx, %esi\n\ - # Save %rsp value in %r13.\n\ - movl %esp, %r13d\n\ - # And align stack for the _dl_init call.\n\ - and $-16, %esp\n\ - # _dl_loaded -> rdi\n\ - movl _rtld_local(%rip), %edi\n\ - # env -> rcx\n\ - lea 8(%r13,%rdx,4), %ecx\n\ - # argv -> rdx\n\ - lea 4(%r13), %edx\n\ - # Clear %rbp to mark outermost frame obviously even for constructors.\n\ - xorl %ebp, %ebp\n\ - # Call the function to run the initializers.\n\ - call _dl_init\n\ - # Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\ - lea _dl_fini(%rip), %edx\n\ - # And make sure %rsp points to argc stored on the stack.\n\ - movl %r13d, %esp\n\ - # Jump to the user's entry point.\n\ - jmp *%r12\n\ -.previous\n\ -"); - -#endif /* !_X32_DL_MACHINE_H */ diff --git a/test-skeleton.c b/test-skeleton.c index ae185a4f28..690f26e7cf 100644 --- a/test-skeleton.c +++ b/test-skeleton.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/time/strftime_l.c b/time/strftime_l.c index 77adec9050..066c839c2f 100644 --- a/time/strftime_l.c +++ b/time/strftime_l.c @@ -40,6 +40,7 @@ #endif #include +#include #include /* Some systems define `time_t' here. */ #ifdef TIME_WITH_SYS_TIME diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile index 1cddd8cc6d..63adf0e8ef 100644 --- a/wcsmbs/Makefile +++ b/wcsmbs/Makefile @@ -160,6 +160,7 @@ tests := \ test-wcsncmp \ test-wcsncpy \ test-wcsnlen \ + test-wcsnlen-nonarray \ test-wcspbrk \ test-wcsrchr \ test-wcsspn \ @@ -205,6 +206,10 @@ tests := \ wcsmbs-tst1 \ # tests +# This test runs for a long time. +xtests += test-wcsncmp-nonarray + + include ../Rules ifeq ($(run-built-tests),yes) diff --git a/wcsmbs/bits/wchar2.h b/wcsmbs/bits/wchar2.h index 9fdff47ee2..43c6b63027 100644 --- a/wcsmbs/bits/wchar2.h +++ b/wcsmbs/bits/wchar2.h @@ -74,9 +74,9 @@ __fortify_function __attribute_overloadable__ wchar_t * __NTH (wcscpy (__fortify_clang_overload_arg (wchar_t *, __restrict, __dest), const wchar_t *__restrict __src)) { - size_t sz = __glibc_objsize (__dest); - if (sz != (size_t) -1) - return __wcscpy_chk (__dest, __src, sz / sizeof (wchar_t)); + size_t __sz = __glibc_objsize (__dest); + if (__sz != (size_t) -1) + return __wcscpy_chk (__dest, __src, __sz / sizeof (wchar_t)); return __wcscpy_alias (__dest, __src); } @@ -84,9 +84,9 @@ __fortify_function __attribute_overloadable__ wchar_t * __NTH (wcpcpy (__fortify_clang_overload_arg (wchar_t *, __restrict, __dest), const wchar_t *__restrict __src)) { - size_t sz = __glibc_objsize (__dest); - if (sz != (size_t) -1) - return __wcpcpy_chk (__dest, __src, sz / sizeof (wchar_t)); + size_t __sz = __glibc_objsize (__dest); + if (__sz != (size_t) -1) + return __wcpcpy_chk (__dest, __src, __sz / sizeof (wchar_t)); return __wcpcpy_alias (__dest, __src); } @@ -118,9 +118,9 @@ __fortify_function __attribute_overloadable__ wchar_t * __NTH (wcscat (__fortify_clang_overload_arg (wchar_t *, __restrict, __dest), const wchar_t *__restrict __src)) { - size_t sz = __glibc_objsize (__dest); - if (sz != (size_t) -1) - return __wcscat_chk (__dest, __src, sz / sizeof (wchar_t)); + size_t __sz = __glibc_objsize (__dest); + if (__sz != (size_t) -1) + return __wcscat_chk (__dest, __src, __sz / sizeof (wchar_t)); return __wcscat_alias (__dest, __src); } @@ -128,9 +128,9 @@ __fortify_function __attribute_overloadable__ wchar_t * __NTH (wcsncat (__fortify_clang_overload_arg (wchar_t *, __restrict, __dest), const wchar_t *__restrict __src, size_t __n)) { - size_t sz = __glibc_objsize (__dest); - if (sz != (size_t) -1) - return __wcsncat_chk (__dest, __src, __n, sz / sizeof (wchar_t)); + size_t __sz = __glibc_objsize (__dest); + if (__sz != (size_t) -1) + return __wcsncat_chk (__dest, __src, __n, __sz / sizeof (wchar_t)); return __wcsncat_alias (__dest, __src, __n); } @@ -170,10 +170,10 @@ __fortify_function int __NTH (swprintf (wchar_t *__restrict __s, size_t __n, const wchar_t *__restrict __fmt, ...)) { - size_t sz = __glibc_objsize (__s); - if (sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) + size_t __sz = __glibc_objsize (__s); + if (__sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) return __swprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, - sz / sizeof (wchar_t), __fmt, __va_arg_pack ()); + __sz / sizeof (wchar_t), __fmt, __va_arg_pack ()); return __swprintf_alias (__s, __n, __fmt, __va_arg_pack ()); } #elif __fortify_use_clang @@ -206,10 +206,10 @@ __fortify_function int __NTH (vswprintf (wchar_t *__restrict __s, size_t __n, const wchar_t *__restrict __fmt, __gnuc_va_list __ap)) { - size_t sz = __glibc_objsize (__s); - if (sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) + size_t __sz = __glibc_objsize (__s); + if (__sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) return __vswprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, - sz / sizeof (wchar_t), __fmt, __ap); + __sz / sizeof (wchar_t), __fmt, __ap); return __vswprintf_alias (__s, __n, __fmt, __ap); } @@ -257,14 +257,14 @@ fgetws (__fortify_clang_overload_arg (wchar_t *, __restrict, __s), int __n, "fgetws called with length bigger " "than size of destination buffer") { - size_t sz = __glibc_objsize (__s); - if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), sz)) + size_t __sz = __glibc_objsize (__s); + if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), __sz)) return __fgetws_alias (__s, __n, __stream); #if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (wchar_t), sz)) - return __fgetws_chk_warn (__s, sz / sizeof (wchar_t), __n, __stream); + if (__glibc_unsafe_len (__n, sizeof (wchar_t), __sz)) + return __fgetws_chk_warn (__s, __sz / sizeof (wchar_t), __n, __stream); #endif - return __fgetws_chk (__s, sz / sizeof (wchar_t), __n, __stream); + return __fgetws_chk (__s, __sz / sizeof (wchar_t), __n, __stream); } #ifdef __USE_GNU @@ -275,15 +275,15 @@ fgetws_unlocked (__fortify_clang_overload_arg (wchar_t *, __restrict, __s), "fgetws_unlocked called with length bigger " "than size of destination buffer") { - size_t sz = __glibc_objsize (__s); - if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), sz)) + size_t __sz = __glibc_objsize (__s); + if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), __sz)) return __fgetws_unlocked_alias (__s, __n, __stream); # if !__fortify_use_clang - if (__glibc_unsafe_len (__n, sizeof (wchar_t), sz)) - return __fgetws_unlocked_chk_warn (__s, sz / sizeof (wchar_t), __n, + if (__glibc_unsafe_len (__n, sizeof (wchar_t), __sz)) + return __fgetws_unlocked_chk_warn (__s, __sz / sizeof (wchar_t), __n, __stream); # endif - return __fgetws_unlocked_chk (__s, sz / sizeof (wchar_t), __n, __stream); + return __fgetws_unlocked_chk (__s, __sz / sizeof (wchar_t), __n, __stream); } #endif diff --git a/wcsmbs/test-wcsncmp-nonarray.c b/wcsmbs/test-wcsncmp-nonarray.c new file mode 100644 index 0000000000..1ad9ebd8fd --- /dev/null +++ b/wcsmbs/test-wcsncmp-nonarray.c @@ -0,0 +1,5 @@ +#include +#define TEST_IDENTIFIER wcsncmp +#define TEST_NAME "wcsncmp" +typedef wchar_t CHAR; +#include "../string/test-Xncmp-nonarray.c" diff --git a/wcsmbs/test-wcsnlen-nonarray.c b/wcsmbs/test-wcsnlen-nonarray.c new file mode 100644 index 0000000000..a4b21fecd3 --- /dev/null +++ b/wcsmbs/test-wcsnlen-nonarray.c @@ -0,0 +1,5 @@ +#include +#define TEST_IDENTIFIER wcsnlen +#define TEST_NAME "wcsnlen" +typedef wchar_t CHAR; +#include "../string/test-Xnlen-nonarray.c"